mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-06-07 16:41:55 +00:00
add hadoop
This commit is contained in:
parent
dea8e39309
commit
ca6ff3ecac
17
hadoop/2.7.1/Dockerfile
Normal file
17
hadoop/2.7.1/Dockerfile
Normal file
@ -0,0 +1,17 @@
|
||||
FROM sequenceiq/hadoop-docker:2.7.1
|
||||
|
||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||
|
||||
ADD bootstrap.sh /etc/bootstrap.sh
|
||||
|
||||
RUN mkdir -p /mnt/hadoop-config
|
||||
|
||||
ADD core-site.xml /mnt/hadoop-config
|
||||
ADD yarn-site.xml /mnt/hadoop-config
|
||||
ADD mapred-site.xml /mnt/hadoop-config
|
||||
ADD hdfs-site.xml /mnt/hadoop-config
|
||||
ADD slaves /mnt/hadoop-config
|
||||
|
||||
WORKDIR /usr/local/hadoop
|
||||
|
||||
CMD ["/etc/bootstrap.sh", "-d"]
|
97
hadoop/2.7.1/README
Normal file
97
hadoop/2.7.1/README
Normal file
@ -0,0 +1,97 @@
|
||||
# based on sequenceiq/hadoop-docker
|
||||
|
||||
## create a hadoop cluster in swarm mode
|
||||
|
||||
`--hostname` need 1.13 or higher
|
||||
|
||||
```
|
||||
docker service create \
|
||||
--name hadoop-master \
|
||||
--network swarm-net \
|
||||
--hostname hadoop-master \
|
||||
--replicas 1 \
|
||||
--mount type=bind,source=/mnt/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
|
||||
--mount type=bind,source=/mnt/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
|
||||
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
|
||||
--endpoint-mode dnsrr \
|
||||
newnius/hadoop
|
||||
```
|
||||
|
||||
```
|
||||
docker service create \
|
||||
--name hadoop-slave1 \
|
||||
--network swarm-net \
|
||||
--hostname hadoop-slave1 \
|
||||
--replicas 1 \
|
||||
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave1,target=/tmp/hadoop-root \
|
||||
--mount type=bind,source=/mnt/data/hadoop/logs/slave1,target=/usr/local/hadoop/logs \
|
||||
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
|
||||
--endpoint-mode dnsrr \
|
||||
newnius/hadoop
|
||||
```
|
||||
|
||||
```
|
||||
docker service create \
|
||||
--name hadoop-slave2 \
|
||||
--network swarm-net \
|
||||
--hostname hadoop-slave2 \
|
||||
--replicas 1 \
|
||||
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave2,target=/tmp/hadoop-root \
|
||||
--mount type=bind,source=/mnt/data/hadoop/logs/slave2,target=/usr/local/hadoop/logs \
|
||||
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
|
||||
--endpoint-mode dnsrr \
|
||||
newnius/hadoop
|
||||
```
|
||||
|
||||
```
|
||||
docker service create \
|
||||
--name hadoop-slave3 \
|
||||
--network swarm-net \
|
||||
--hostname hadoop-slave3 \
|
||||
--replicas 1 \
|
||||
--mount type=bind,source=/mnt/data/hadoop/hdfs/slave3,target=/tmp/hadoop-root \
|
||||
--mount type=bind,source=/mnt/data/hadoop/logs/slave3,target=/usr/local/hadoop/logs \
|
||||
--mount type=bind,source=/mnt/data/hadoop/config,target=/mnt/hadoop-config \
|
||||
--endpoint-mode dnsrr \
|
||||
newnius/hadoop
|
||||
```
|
||||
|
||||
## Init && Test
|
||||
|
||||
In the first deploy, format dfs first
|
||||
|
||||
### stop cluster (in master)
|
||||
`sbin/stop-yarn.sh`
|
||||
`sbin/stop-dfs.sh`
|
||||
|
||||
### remove previous data (in all nodes)
|
||||
clear all data in /tmp in all nodes
|
||||
|
||||
### format hdfs (in master)
|
||||
```
|
||||
bin/hadoop namenode -format
|
||||
```
|
||||
|
||||
### start cluster (in master)
|
||||
`sbin/start-dfs.sh`
|
||||
`sbin/start-yarn.sh`
|
||||
|
||||
### Run a test job
|
||||
|
||||
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+'
|
||||
|
||||
Note: there is no such dir default, you can add data by
|
||||
|
||||
```
|
||||
bin/hadoop dfs -mkdir -p /user/root/input
|
||||
```
|
||||
and
|
||||
```
|
||||
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
||||
```
|
||||
|
||||
### monitor cluster in browser
|
||||
|
||||
YARN: hadoop-master:8088
|
||||
|
||||
HDFS: hadoop-master:50070
|
26
hadoop/2.7.1/bootstrap.sh
Executable file
26
hadoop/2.7.1/bootstrap.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
|
||||
: ${HADOOP_PREFIX:=/usr/local/hadoop}
|
||||
|
||||
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||
|
||||
rm /tmp/*.pid
|
||||
|
||||
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
||||
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
||||
|
||||
# replace config
|
||||
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/
|
||||
|
||||
service sshd start
|
||||
$HADOOP_PREFIX/sbin/start-dfs.sh
|
||||
$HADOOP_PREFIX/sbin/start-yarn.sh
|
||||
$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver
|
||||
|
||||
if [[ $1 == "-d" ]]; then
|
||||
while true; do sleep 1000; done
|
||||
fi
|
||||
|
||||
if [[ $1 == "-bash" ]]; then
|
||||
/bin/bash
|
||||
fi
|
29
hadoop/2.7.1/core-site.xml
Normal file
29
hadoop/2.7.1/core-site.xml
Normal file
@ -0,0 +1,29 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://hadoop-master:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hadoop-master:8020</value>
|
||||
</property>
|
||||
</configuration>
|
46
hadoop/2.7.1/hdfs-site.xml
Normal file
46
hadoop/2.7.1/hdfs-site.xml
Normal file
@ -0,0 +1,46 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.secondary.http-address</name>
|
||||
<value>hadoop-slave1:50090</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address</name>
|
||||
<value>hadoop-master:50070</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.max.transfer.threads</name>
|
||||
<value>8192</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
|
||||
|
||||
</configuration>
|
26
hadoop/2.7.1/mapred-site.xml
Normal file
26
hadoop/2.7.1/mapred-site.xml
Normal file
@ -0,0 +1,26 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
3
hadoop/2.7.1/slaves
Normal file
3
hadoop/2.7.1/slaves
Normal file
@ -0,0 +1,3 @@
|
||||
hadoop-slave1
|
||||
hadoop-slave2
|
||||
hadoop-slave3
|
57
hadoop/2.7.1/yarn-site.xml
Normal file
57
hadoop/2.7.1/yarn-site.xml
Normal file
@ -0,0 +1,57 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<!-- Site specific YARN configuration properties -->
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname</name>
|
||||
<value>hadoop-master</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services</name>
|
||||
<value>mapreduce_shuffle</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.log-aggregation-enable</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.log-aggregation.retain-seconds</name>
|
||||
<value>604800</value>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||
<value>2048</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||
<value>2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.minimum-allocation-mb</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
</configuration>
|
Loading…
Reference in New Issue
Block a user