mirror of https://github.com/newnius/Dockerfiles.git synced 2025-06-07 08:31:55 +00:00

History

Newnius db2c73876f update hadoop		2018-08-03 13:07:18 +08:00
..
bootstrap.sh	add hadoop:2.7.4, add soft link to fix a bug	2017-09-01 17:24:37 +08:00
core-site.xml	reformat	2017-10-14 10:15:59 +08:00
Dockerfile	add hadoop:2.7.4, add soft link to fix a bug	2017-09-01 17:24:37 +08:00
hdfs-site.xml	bugfix	2018-02-01 20:41:08 +08:00
mapred-site.xml	reformat	2017-10-14 10:15:59 +08:00
README.md	update hadoop	2018-08-03 13:07:18 +08:00
slaves	add hadoop:2.7.4, add soft link to fix a bug	2017-09-01 17:24:37 +08:00
ssh_config	add hadoop:2.7.4, add soft link to fix a bug	2017-09-01 17:24:37 +08:00
yarn-site.xml	reformat	2017-10-14 10:15:59 +08:00

based on alpine

Create a hadoop cluster in swarm mode

--hostname needs docker 1.13 or higher

docker service create \
--name hadoop-master \
--hostname hadoop-master \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop

docker service create \
--name hadoop-slave1 \
--hostname hadoop-slave1 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop

docker service create \
--name hadoop-slave2 \
--hostname hadoop-slave2 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop

docker service create \
--name hadoop-slave3 \
--hostname hadoop-slave3 \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/hadoop

In the first deploy, format dfs first

sbin/stop-dfs.sh

bin/hadoop namenode -format

sbin/start-dfs.sh

# prepare input data
bin/hadoop dfs -mkdir -p /user/root/input

bin/hadoop dfs -put etc/hadoop/* /user/root/input

bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'

YARN: hadoop-master:8088

HDFS: hadoop-master:50070

Proxy needed: newnius/docker-proxy