add hadoop

This commit is contained in:
newnius 2017-02-26 15:16:53 +08:00
parent dea8e39309
commit ca6ff3ecac
8 changed files with 301 additions and 0 deletions

17
hadoop/2.7.1/Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM sequenceiq/hadoop-docker:2.7.1
MAINTAINER Newnius <newnius.cn@gmail.com>
ADD bootstrap.sh /etc/bootstrap.sh
RUN mkdir -p /mnt/hadoop-config
ADD core-site.xml /mnt/hadoop-config
ADD yarn-site.xml /mnt/hadoop-config
ADD mapred-site.xml /mnt/hadoop-config
ADD hdfs-site.xml /mnt/hadoop-config
ADD slaves /mnt/hadoop-config
WORKDIR /usr/local/hadoop
CMD ["/etc/bootstrap.sh", "-d"]

97
hadoop/2.7.1/README Normal file
View File

@ -0,0 +1,97 @@
# based on sequenceiq/hadoop-docker
## create a hadoop cluster in swarm mode
`--hostname` need 1.13 or higher
```
docker service create \
--name hadoop-master \
--network swarm-net \
--hostname hadoop-master \
--replicas 1 \
--mount type=bind,source=/mnt/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/mnt/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
--endpoint-mode dnsrr \
newnius/hadoop
```
```
docker service create \
--name hadoop-slave1 \
--network swarm-net \
--hostname hadoop-slave1 \
--replicas 1 \
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave1,target=/tmp/hadoop-root \
--mount type=bind,source=/mnt/data/hadoop/logs/slave1,target=/usr/local/hadoop/logs \
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
--endpoint-mode dnsrr \
newnius/hadoop
```
```
docker service create \
--name hadoop-slave2 \
--network swarm-net \
--hostname hadoop-slave2 \
--replicas 1 \
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave2,target=/tmp/hadoop-root \
--mount type=bind,source=/mnt/data/hadoop/logs/slave2,target=/usr/local/hadoop/logs \
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
--endpoint-mode dnsrr \
newnius/hadoop
```
```
docker service create \
--name hadoop-slave3 \
--network swarm-net \
--hostname hadoop-slave3 \
--replicas 1 \
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave3,target=/tmp/hadoop-root \
--mount type=bind,source=/mnt/data/hadoop/logs/slave3,target=/usr/local/hadoop/logs \
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
--endpoint-mode dnsrr \
newnius/hadoop
```
## Init && Test
In the first deploy, format dfs first
### stop cluster (in master)
`sbin/stop-yarn.sh`
`sbin/stop-dfs.sh`
### remove previous data (in all nodes)
clear all data in /tmp in all nodes
### format hdfs (in master)
```
bin/hadoop namenode -format
```
### start cluster (in master)
`sbin/start-dfs.sh`
`sbin/start-yarn.sh`
### Run a test job
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+'
Note: there is no such dir default, you can add data by
```
bin/hadoop dfs -mkdir -p /user/root/input
```
and
```
bin/hadoop dfs -put etc/hadoop/* /user/root/input
```
### monitor cluster in browser
YARN: hadoop-master:8088
HDFS: hadoop-master:50070

26
hadoop/2.7.1/bootstrap.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
: ${HADOOP_PREFIX:=/usr/local/hadoop}
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
rm /tmp/*.pid
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
# replace config
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/
service sshd start
$HADOOP_PREFIX/sbin/start-dfs.sh
$HADOOP_PREFIX/sbin/start-yarn.sh
$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver
if [[ $1 == "-d" ]]; then
while true; do sleep 1000; done
fi
if [[ $1 == "-bash" ]]; then
/bin/bash
fi

View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:8020</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop-master:8020</value>
</property>
</configuration>

View File

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-slave1:50090</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop-master:50070</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
</configuration>

View File

@ -0,0 +1,26 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>

3
hadoop/2.7.1/slaves Normal file
View File

@ -0,0 +1,3 @@
hadoop-slave1
hadoop-slave2
hadoop-slave3

View File

@ -0,0 +1,57 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Site specific YARN configuration properties -->
<configuration>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
</configuration>