diff --git a/hadoop/2.7.1/Dockerfile b/hadoop/2.7.1/Dockerfile new file mode 100644 index 0000000..e949a7b --- /dev/null +++ b/hadoop/2.7.1/Dockerfile @@ -0,0 +1,17 @@ +FROM sequenceiq/hadoop-docker:2.7.1 + +MAINTAINER Newnius + +ADD bootstrap.sh /etc/bootstrap.sh + +RUN mkdir -p /mnt/hadoop-config + +ADD core-site.xml /mnt/hadoop-config +ADD yarn-site.xml /mnt/hadoop-config +ADD mapred-site.xml /mnt/hadoop-config +ADD hdfs-site.xml /mnt/hadoop-config +ADD slaves /mnt/hadoop-config + +WORKDIR /usr/local/hadoop + +CMD ["/etc/bootstrap.sh", "-d"] diff --git a/hadoop/2.7.1/README b/hadoop/2.7.1/README new file mode 100644 index 0000000..3bad9de --- /dev/null +++ b/hadoop/2.7.1/README @@ -0,0 +1,97 @@ +# based on sequenceiq/hadoop-docker + +## create a hadoop cluster in swarm mode + +`--hostname` need 1.13 or higher + +``` +docker service create \ +--name hadoop-master \ +--network swarm-net \ +--hostname hadoop-master \ +--replicas 1 \ +--mount type=bind,source=/mnt/data/hadoop/hdfs/master,target=/tmp/hadoop-root \ +--mount type=bind,source=/mnt/data/hadoop/logs/master,target=/usr/local/hadoop/logs \ +--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \ +--endpoint-mode dnsrr \ +newnius/hadoop +``` + +``` +docker service create \ +--name hadoop-slave1 \ +--network swarm-net \ +--hostname hadoop-slave1 \ +--replicas 1 \ +--mount type=bind,source=/mnt/data/hadoop/hdfs/slave1,target=/tmp/hadoop-root \ +--mount type=bind,source=/mnt/data/hadoop/logs/slave1,target=/usr/local/hadoop/logs \ +--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \ +--endpoint-mode dnsrr \ +newnius/hadoop +``` + +``` +docker service create \ +--name hadoop-slave2 \ +--network swarm-net \ +--hostname hadoop-slave2 \ +--replicas 1 \ +--mount type=bind,source=/mnt/data/hadoop/hdfs/slave2,target=/tmp/hadoop-root \ +--mount type=bind,source=/mnt/data/hadoop/logs/slave2,target=/usr/local/hadoop/logs \ +--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \ +--endpoint-mode dnsrr \ +newnius/hadoop +``` + +``` +docker service create \ +--name hadoop-slave3 \ +--network swarm-net \ +--hostname hadoop-slave3 \ +--replicas 1 \ +--mount type=bind,source=/mnt/data/hadoop/hdfs/slave3,target=/tmp/hadoop-root \ +--mount type=bind,source=/mnt/data/hadoop/logs/slave3,target=/usr/local/hadoop/logs \ +--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \ +--endpoint-mode dnsrr \ +newnius/hadoop +``` + +## Init && Test + +In the first deploy, format dfs first + +### stop cluster (in master) +`sbin/stop-yarn.sh` +`sbin/stop-dfs.sh` + +### remove previous data (in all nodes) +clear all data in /tmp in all nodes + +### format hdfs (in master) +``` +bin/hadoop namenode -format +``` + +### start cluster (in master) +`sbin/start-dfs.sh` +`sbin/start-yarn.sh` + +### Run a test job + +bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+' + +Note: there is no such dir default, you can add data by + +``` +bin/hadoop dfs -mkdir -p /user/root/input +``` +and +``` +bin/hadoop dfs -put etc/hadoop/* /user/root/input +``` + +### monitor cluster in browser + +YARN: hadoop-master:8088 + +HDFS: hadoop-master:50070 diff --git a/hadoop/2.7.1/bootstrap.sh b/hadoop/2.7.1/bootstrap.sh new file mode 100755 index 0000000..33a40e4 --- /dev/null +++ b/hadoop/2.7.1/bootstrap.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +: ${HADOOP_PREFIX:=/usr/local/hadoop} + +$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh + +rm /tmp/*.pid + +# installing libraries if any - (resource urls added comma separated to the ACP system variable) +cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd - + +# replace config +cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/ + +service sshd start +$HADOOP_PREFIX/sbin/start-dfs.sh +$HADOOP_PREFIX/sbin/start-yarn.sh +$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver + +if [[ $1 == "-d" ]]; then + while true; do sleep 1000; done +fi + +if [[ $1 == "-bash" ]]; then + /bin/bash +fi diff --git a/hadoop/2.7.1/core-site.xml b/hadoop/2.7.1/core-site.xml new file mode 100644 index 0000000..da536c3 --- /dev/null +++ b/hadoop/2.7.1/core-site.xml @@ -0,0 +1,29 @@ + + + + + + + + + + fs.defaultFS + hdfs://hadoop-master:8020 + + + fs.default.name + hdfs://hadoop-master:8020 + + diff --git a/hadoop/2.7.1/hdfs-site.xml b/hadoop/2.7.1/hdfs-site.xml new file mode 100644 index 0000000..af4e13d --- /dev/null +++ b/hadoop/2.7.1/hdfs-site.xml @@ -0,0 +1,46 @@ + + + + + + + + + + dfs.permissions + false + + + + dfs.namenode.secondary.http-address + hadoop-slave1:50090 + + + dfs.namenode.http-address + hadoop-master:50070 + + + + dfs.datanode.max.transfer.threads + 8192 + + + + dfs.replication + 3 + + + + diff --git a/hadoop/2.7.1/mapred-site.xml b/hadoop/2.7.1/mapred-site.xml new file mode 100644 index 0000000..ce18519 --- /dev/null +++ b/hadoop/2.7.1/mapred-site.xml @@ -0,0 +1,26 @@ + + + + + + + + + + mapreduce.framework.name + yarn + + + diff --git a/hadoop/2.7.1/slaves b/hadoop/2.7.1/slaves new file mode 100644 index 0000000..3bb91be --- /dev/null +++ b/hadoop/2.7.1/slaves @@ -0,0 +1,3 @@ +hadoop-slave1 +hadoop-slave2 +hadoop-slave3 diff --git a/hadoop/2.7.1/yarn-site.xml b/hadoop/2.7.1/yarn-site.xml new file mode 100644 index 0000000..f3acd92 --- /dev/null +++ b/hadoop/2.7.1/yarn-site.xml @@ -0,0 +1,57 @@ + + + + + + + yarn.application.classpath + /usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/* + + + + + yarn.resourcemanager.hostname + hadoop-master + + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + + yarn.log-aggregation-enable + true + + + + yarn.log-aggregation.retain-seconds + 604800 + + + + + yarn.nodemanager.resource.memory-mb + 2048 + + + yarn.nodemanager.resource.cpu-vcores + 2 + + + yarn.scheduler.minimum-allocation-mb + 1024 + +