update hadoop

This commit is contained in:
Newnius 2018-08-06 16:42:15 +08:00
parent d6f9225c47
commit 98a30aab91
3 changed files with 92 additions and 56 deletions

View File

@ -23,10 +23,12 @@ RUN chown root:root /root/.ssh/config
RUN echo "Port 2122" >> /etc/ssh/sshd_config
# Install Hadoop
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-2.7.4/hadoop-2.7.4.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
ENV HADOOP_VER 2.7.4
RUN ln -s /usr/local/hadoop-2.7.4 /usr/local/hadoop
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

View File

@ -1,85 +1,115 @@
# based on alpine
# Deploy one Hadoop Cluster with docker
## Create a hadoop cluster in swarm mode
`--hostname` needs docker 1.13 or higher
## Start Master
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.4
```
## Start slaves
```bash
docker service create \
--name hadoop-slave1 \
--hostname hadoop-slave1 \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.4
```
```bash
docker service create \
--name hadoop-slave1 \
--hostname hadoop-slave1 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop
--name hadoop-slave2 \
--network swarm-net \
--hostname hadoop-slave2 \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.4
```
```bash
docker service create \
--name hadoop-slave2 \
--hostname hadoop-slave2 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop
--name hadoop-slave3 \
--hostname hadoop-slave3 \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.4
```
## Init for the first time
#### format dfs first
Run these commands on the master node.
```bash
docker service create \
--name hadoop-slave3 \
--hostname hadoop-slave3 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop
```
## Init && Test
In the first deploy, format dfs first
### stop cluster (in master)
```bash
# stop HDFS services
sbin/stop-dfs.sh
```
### format hdfs (in master)
```bash
# format HDFS meta data
bin/hadoop namenode -format
```
### start cluster (in master)
```bash
# restart HDFS services
sbin/start-dfs.sh
```
### Run a test job
## Run a test job
To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
```bash
# prepare input data
bin/hadoop dfs -mkdir -p /user/root/input
# copy files to input path
bin/hadoop dfs -put etc/hadoop/* /user/root/input
```
```bash
# submit the job
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
```
### monitor cluster in browser
## Browse the web UI
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
YARN: hadoop-master:8088
To access the web UI, deploy another (socks5) proxy to route the traffic.
HDFS: hadoop-master:50070
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
_Proxy needed: newnius/docker-proxy_
Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
## Custom configuration
To persist data or modify the conf files, refer to the following script.
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
newnius/hadoop:2.7.4
```

View File

@ -9,8 +9,12 @@ rm /tmp/*.pid
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
# replace config
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/
## replace config
: ${EXTRA_CONF_DIR:=/config/hadoop}
if [ -d "$EXTRA_CONF_DIR" ]; then
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
fi
/usr/sbin/sshd