add more hadoop versions

This commit is contained in:
Newnius 2018-08-06 17:14:20 +08:00
parent e238f29b04
commit 8e119766ee
21 changed files with 655 additions and 186 deletions

View File

@ -1,17 +1,59 @@
FROM sequenceiq/hadoop-docker:2.7.1 FROM alpine:3.8
MAINTAINER Newnius <docker@newnius.com> MAINTAINER Newnius <newnius.cn@gmail.com>
USER root
# Prerequisites
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
ENV PATH $PATH:$JAVA_HOME/bin
# Passwordless SSH
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
ADD ssh_config /root/.ssh/config
RUN chmod 600 /root/.ssh/config
RUN chown root:root /root/.ssh/config
RUN echo "Port 2122" >> /etc/ssh/sshd_config
# Install Hadoop
ENV HADOOP_VER 2.7.1
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
ENV HADOOP_PREFIX $HADOOP_HOME
ENV HADOOP_COMMON_HOME $HADOOP_HOME
ENV HADOOP_HDFS_HOME $HADOOP_HOME
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
ENV HADOOP_YARN_HOME $HADOOP_HOME
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
# Default Conf Files
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
WORKDIR $HADOOP_HOME
ADD bootstrap.sh /etc/bootstrap.sh ADD bootstrap.sh /etc/bootstrap.sh
RUN mkdir -p /mnt/hadoop-config
ADD core-site.xml /mnt/hadoop-config
ADD yarn-site.xml /mnt/hadoop-config
ADD mapred-site.xml /mnt/hadoop-config
ADD hdfs-site.xml /mnt/hadoop-config
ADD slaves /mnt/hadoop-config
WORKDIR /usr/local/hadoop
CMD ["/etc/bootstrap.sh", "-d"] CMD ["/etc/bootstrap.sh", "-d"]

View File

@ -1,87 +1,115 @@
# based on sequenceiq/hadoop-docker # Deploy one Hadoop Cluster with docker
## Create a hadoop cluster in swarm mode ## Start Master
`--hostname` needs 1.13 or higher ```bash
```
docker service create \ docker service create \
--name hadoop-master \ --name hadoop-master \
--network swarm-net \ --hostname hadoop-master \
--hostname hadoop-master \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.1
``` ```
``` ## Start slaves
```bash
docker service create \ docker service create \
--name hadoop-slave1 \ --name hadoop-slave1 \
--network swarm-net \ --hostname hadoop-slave1 \
--hostname hadoop-slave1 \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.1
``` ```
``` ```bash
docker service create \ docker service create \
--name hadoop-slave2 \ --name hadoop-slave2 \
--network swarm-net \ --network swarm-net \
--hostname hadoop-slave2 \ --hostname hadoop-slave2 \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.1
``` ```
``` ```bash
docker service create \ docker service create \
--name hadoop-slave3 \ --name hadoop-slave3 \
--network swarm-net \ --hostname hadoop-slave3 \
--hostname hadoop-slave3 \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.7.1
``` ```
## Init && Test ## Init for the first time
In the first deploy, format dfs first #### format dfs first
Run these commands on the master node.
### stop cluster (in master) ```bash
`sbin/stop-yarn.sh` # stop HDFS services
`sbin/stop-dfs.sh` sbin/stop-dfs.sh
### remove previous data (in all nodes) # format HDFS meta data
clear all data in /tmp in all nodes
### format hdfs (in master)
```
bin/hadoop namenode -format bin/hadoop namenode -format
# restart HDFS services
sbin/start-dfs.sh
``` ```
### start cluster (in master) ## Run a test job
`sbin/start-dfs.sh` To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
`sbin/start-yarn.sh`
### Run a test job ```bash
# prepare input data
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+'
Note: there is no such dir default, you can add data by
```
bin/hadoop dfs -mkdir -p /user/root/input bin/hadoop dfs -mkdir -p /user/root/input
```
and # copy files to input path
```
bin/hadoop dfs -put etc/hadoop/* /user/root/input bin/hadoop dfs -put etc/hadoop/* /user/root/input
# submit the job
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
``` ```
### monitor cluster in browser ## Browse the web UI
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
YARN: hadoop-master:8088 To access the web UI, deploy another (socks5) proxy to route the traffic.
HDFS: hadoop-master:50070 If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
_Proxy needed, e.g. newnius/docker-proxy_ Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
## Custom configuration
To persist data or modify the conf files, refer to the following script.
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
newnius/hadoop:2.7.1
```

View File

@ -9,10 +9,14 @@ rm /tmp/*.pid
# installing libraries if any - (resource urls added comma separated to the ACP system variable) # installing libraries if any - (resource urls added comma separated to the ACP system variable)
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
# replace config ## replace config
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/ : ${EXTRA_CONF_DIR:=/config/hadoop}
service sshd start if [ -d "$EXTRA_CONF_DIR" ]; then
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
fi
/usr/sbin/sshd
## stop all in case master starts far behind ## stop all in case master starts far behind
$HADOOP_PREFIX/sbin/stop-yarn.sh $HADOOP_PREFIX/sbin/stop-yarn.sh

View File

@ -17,7 +17,6 @@
<!-- Put site-specific property overrides in this file. --> <!-- Put site-specific property overrides in this file. -->
<configuration> <configuration>
<property> <property>
<name>fs.defaultFS</name> <name>fs.defaultFS</name>
<value>hdfs://hadoop-master:8020</value> <value>hdfs://hadoop-master:8020</value>

View File

@ -17,12 +17,10 @@
<!-- Put site-specific property overrides in this file. --> <!-- Put site-specific property overrides in this file. -->
<configuration> <configuration>
<property> <property>
<name>dfs.permissions</name> <name>dfs.permissions</name>
<value>false</value> <value>false</value>
</property> </property>
<property> <property>
<name>dfs.namenode.secondary.http-address</name> <name>dfs.namenode.secondary.http-address</name>
<value>hadoop-slave1:50090</value> <value>hadoop-slave1:50090</value>
@ -31,16 +29,12 @@
<name>dfs.namenode.http-address</name> <name>dfs.namenode.http-address</name>
<value>hadoop-master:50070</value> <value>hadoop-master:50070</value>
</property> </property>
<property> <property>
<name>dfs.datanode.max.transfer.threads</name> <name>dfs.datanode.max.transfer.threads</name>
<value>8192</value> <value>8192</value>
</property> </property>
<property> <property>
<name>dfs.replication</name> <name>dfs.replication</name>
<value>3</value> <value>3</value>
</property> </property>
</configuration> </configuration>

View File

@ -17,10 +17,16 @@
<!-- Put site-specific property overrides in this file. --> <!-- Put site-specific property overrides in this file. -->
<configuration> <configuration>
<property> <property>
<name>mapreduce.framework.name</name> <name>mapreduce.framework.name</name>
<value>yarn</value> <value>yarn</value>
</property> </property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-master:19888</value>
</property>
</configuration> </configuration>

5
hadoop/2.7.1/ssh_config Normal file
View File

@ -0,0 +1,5 @@
Host *
UserKnownHostsFile /dev/null
StrictHostKeyChecking no
LogLevel quiet
Port 2122

View File

@ -14,34 +14,26 @@
--> -->
<!-- Site specific YARN configuration properties --> <!-- Site specific YARN configuration properties -->
<configuration> <configuration>
<property> <property>
<name>yarn.application.classpath</name> <name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value> <value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
</property> </property>
<property> <property>
<name>yarn.resourcemanager.hostname</name> <name>yarn.resourcemanager.hostname</name>
<value>hadoop-master</value> <value>hadoop-master</value>
</property> </property>
<property> <property>
<name>yarn.nodemanager.aux-services</name> <name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value> <value>mapreduce_shuffle</value>
</property> </property>
<property> <property>
<name>yarn.log-aggregation-enable</name> <name>yarn.log-aggregation-enable</name>
<value>true</value> <value>true</value>
</property> </property>
<property> <property>
<name>yarn.log-aggregation.retain-seconds</name> <name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value> <value>604800</value>
</property> </property>
<property> <property>
<name>yarn.nodemanager.resource.memory-mb</name> <name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value> <value>2048</value>

View File

@ -1,4 +1,4 @@
FROM alpine:3.6 FROM alpine:3.8
MAINTAINER Newnius <newnius.cn@gmail.com> MAINTAINER Newnius <newnius.cn@gmail.com>
@ -23,10 +23,12 @@ RUN chown root:root /root/.ssh/config
RUN echo "Port 2122" >> /etc/ssh/sshd_config RUN echo "Port 2122" >> /etc/ssh/sshd_config
# Install Hadoop # Install Hadoop
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-2.8.1/hadoop-2.8.1.tar.gz && \ ENV HADOOP_VER 2.8.1
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-2.8.1 /usr/local/hadoop RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
@ -46,10 +48,9 @@ ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
ADD slaves $HADOOP_HOME/etc/hadoop/slaves ADD slaves $HADOOP_HOME/etc/hadoop/slaves
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
RUN sed -i "s|export JAVA_HOME=.*|export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}|g" ${HADOOP_PREFIX}/etc/hadoop/hadoop-env.sh RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
RUN sed -i "s|export HADOOP_CONF_DIR=.*|export HADOOP_CONF_DIR=${HADOOP_PREFIX}/etc/hadoop/|g" ${HADOOP_PREFIX}/etc/hadoop/hadoop-env.sh
WORKDIR $HADOOP_HOME WORKDIR $HADOOP_HOME

View File

@ -1,87 +1,115 @@
# based on alpine # Deploy one Hadoop Cluster with docker
## Create a hadoop cluster in swarm mode ## Start Master
`--hostname` needs docker 1.13 or higher ```bash
```
docker service create \ docker service create \
--name hadoop-master \ --name hadoop-master \
--network swarm-net \ --hostname hadoop-master \
--hostname hadoop-master \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.1
``` ```
``` ## Start slaves
```bash
docker service create \ docker service create \
--name hadoop-slave1 \ --name hadoop-slave1 \
--network swarm-net \ --hostname hadoop-slave1 \
--hostname hadoop-slave1 \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.1
``` ```
``` ```bash
docker service create \ docker service create \
--name hadoop-slave2 \ --name hadoop-slave2 \
--network swarm-net \ --network swarm-net \
--hostname hadoop-slave2 \ --hostname hadoop-slave2 \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.1
``` ```
``` ```bash
docker service create \ docker service create \
--name hadoop-slave3 \ --name hadoop-slave3 \
--network swarm-net \ --hostname hadoop-slave3 \
--hostname hadoop-slave3 \ --network swarm-net \
--replicas 1 \ --replicas 1 \
--endpoint-mode dnsrr \ --detach=true \
newnius/hadoop --endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.1
``` ```
## Init && Test ## Init for the first time
In the first deploy, format dfs first #### format dfs first
Run these commands on the master node.
### stop cluster (in master) ```bash
`sbin/stop-yarn.sh` # stop HDFS services
`sbin/stop-dfs.sh` sbin/stop-dfs.sh
### remove previous data (in all nodes) # format HDFS meta data
clear all data in /tmp in all nodes
### format hdfs (in master)
```
bin/hadoop namenode -format bin/hadoop namenode -format
# restart HDFS services
sbin/start-dfs.sh
``` ```
### start cluster (in master) ## Run a test job
`sbin/start-dfs.sh` To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
`sbin/start-yarn.sh`
### Run a test job ```bash
# prepare input data
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
Note: there is no such dir default, you can add data by
```
bin/hadoop dfs -mkdir -p /user/root/input bin/hadoop dfs -mkdir -p /user/root/input
```
and # copy files to input path
```
bin/hadoop dfs -put etc/hadoop/* /user/root/input bin/hadoop dfs -put etc/hadoop/* /user/root/input
# submit the job
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
``` ```
### monitor cluster in browser ## Browse the web UI
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
YARN: hadoop-master:8088 To access the web UI, deploy another (socks5) proxy to route the traffic.
HDFS: hadoop-master:50070 If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
_Proxy needed: newnius/docker-proxy_ Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
## Custom configuration
To persist data or modify the conf files, refer to the following script.
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
newnius/hadoop:2.8.1
```

View File

@ -9,8 +9,12 @@ rm /tmp/*.pid
# installing libraries if any - (resource urls added comma separated to the ACP system variable) # installing libraries if any - (resource urls added comma separated to the ACP system variable)
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
# replace config ## replace config
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/ : ${EXTRA_CONF_DIR:=/config/hadoop}
if [ -d "$EXTRA_CONF_DIR" ]; then
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
fi
/usr/sbin/sshd /usr/sbin/sshd

59
hadoop/2.8.4/Dockerfile Normal file
View File

@ -0,0 +1,59 @@
FROM alpine:3.8
MAINTAINER Newnius <newnius.cn@gmail.com>
USER root
# Prerequisites
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
ENV PATH $PATH:$JAVA_HOME/bin
# Passwordless SSH
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
ADD ssh_config /root/.ssh/config
RUN chmod 600 /root/.ssh/config
RUN chown root:root /root/.ssh/config
RUN echo "Port 2122" >> /etc/ssh/sshd_config
# Install Hadoop
ENV HADOOP_VER 2.8.4
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
ENV HADOOP_HOME /usr/local/hadoop
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
ENV HADOOP_PREFIX $HADOOP_HOME
ENV HADOOP_COMMON_HOME $HADOOP_HOME
ENV HADOOP_HDFS_HOME $HADOOP_HOME
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
ENV HADOOP_YARN_HOME $HADOOP_HOME
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
# Default Conf Files
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
WORKDIR $HADOOP_HOME
ADD bootstrap.sh /etc/bootstrap.sh
CMD ["/etc/bootstrap.sh", "-d"]

115
hadoop/2.8.4/README.md Normal file
View File

@ -0,0 +1,115 @@
# Deploy one Hadoop Cluster with docker
## Start Master
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.4
```
## Start slaves
```bash
docker service create \
--name hadoop-slave1 \
--hostname hadoop-slave1 \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.4
```
```bash
docker service create \
--name hadoop-slave2 \
--network swarm-net \
--hostname hadoop-slave2 \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.4
```
```bash
docker service create \
--name hadoop-slave3 \
--hostname hadoop-slave3 \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
newnius/hadoop:2.8.4
```
## Init for the first time
#### format dfs first
Run these commands on the master node.
```bash
# stop HDFS services
sbin/stop-dfs.sh
# format HDFS meta data
bin/hadoop namenode -format
# restart HDFS services
sbin/start-dfs.sh
```
## Run a test job
To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
```bash
# prepare input data
bin/hadoop dfs -mkdir -p /user/root/input
# copy files to input path
bin/hadoop dfs -put etc/hadoop/* /user/root/input
# submit the job
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
```
## Browse the web UI
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
To access the web UI, deploy another (socks5) proxy to route the traffic.
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
## Custom configuration
To persist data or modify the conf files, refer to the following script.
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
```bash
docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--detach=true \
--endpoint-mode dnsrr \
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
newnius/hadoop:2.8.4
```

35
hadoop/2.8.4/bootstrap.sh Executable file
View File

@ -0,0 +1,35 @@
#!/bin/bash
: ${HADOOP_PREFIX:=/usr/local/hadoop}
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
rm /tmp/*.pid
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
## replace config
: ${EXTRA_CONF_DIR:=/config/hadoop}
if [ -d "$EXTRA_CONF_DIR" ]; then
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
fi
/usr/sbin/sshd
## stop all in case master starts far behind
$HADOOP_PREFIX/sbin/stop-yarn.sh
$HADOOP_PREFIX/sbin/stop-dfs.sh
$HADOOP_PREFIX/sbin/start-dfs.sh
$HADOOP_PREFIX/sbin/start-yarn.sh
$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver
if [[ $1 == "-d" ]]; then
while true; do sleep 1000; done
fi
if [[ $1 == "-bash" ]]; then
/bin/bash
fi

View File

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:8020</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop-master:8020</value>
</property>
</configuration>

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-slave1:50090</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop-master:50070</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>

View File

@ -0,0 +1,32 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-master:19888</value>
</property>
</configuration>

3
hadoop/2.8.4/slaves Normal file
View File

@ -0,0 +1,3 @@
hadoop-slave1
hadoop-slave2
hadoop-slave3

5
hadoop/2.8.4/ssh_config Normal file
View File

@ -0,0 +1,5 @@
Host *
UserKnownHostsFile /dev/null
StrictHostKeyChecking no
LogLevel quiet
Port 2122

View File

@ -0,0 +1,49 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Site specific YARN configuration properties -->
<configuration>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
</configuration>

View File

@ -1,4 +1,4 @@
FROM alpine:3.6 FROM alpine:3.8
MAINTAINER Newnius <newnius.cn@gmail.com> MAINTAINER Newnius <newnius.cn@gmail.com>
@ -25,7 +25,7 @@ RUN echo "Port 2122" >> /etc/ssh/sshd_config
# Install Hadoop # Install Hadoop
ENV HADOOP_VER 2.9.1 ENV HADOOP_VER 2.9.1
RUN wget -O hadoop.tar.gz http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \ RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop