mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-06-07 16:41:55 +00:00
add more hadoop versions
This commit is contained in:
parent
e238f29b04
commit
8e119766ee
@ -1,17 +1,59 @@
|
|||||||
FROM sequenceiq/hadoop-docker:2.7.1
|
FROM alpine:3.8
|
||||||
|
|
||||||
MAINTAINER Newnius <docker@newnius.com>
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Prerequisites
|
||||||
|
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps
|
||||||
|
|
||||||
|
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
||||||
|
ENV PATH $PATH:$JAVA_HOME/bin
|
||||||
|
|
||||||
|
# Passwordless SSH
|
||||||
|
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
|
||||||
|
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
|
||||||
|
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
|
||||||
|
RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
ADD ssh_config /root/.ssh/config
|
||||||
|
RUN chmod 600 /root/.ssh/config
|
||||||
|
RUN chown root:root /root/.ssh/config
|
||||||
|
|
||||||
|
RUN echo "Port 2122" >> /etc/ssh/sshd_config
|
||||||
|
|
||||||
|
# Install Hadoop
|
||||||
|
ENV HADOOP_VER 2.7.1
|
||||||
|
|
||||||
|
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
|
||||||
|
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
|
||||||
|
|
||||||
|
ENV HADOOP_HOME /usr/local/hadoop
|
||||||
|
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
|
||||||
|
|
||||||
|
ENV HADOOP_PREFIX $HADOOP_HOME
|
||||||
|
ENV HADOOP_COMMON_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_HDFS_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_YARN_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
|
||||||
|
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
|
||||||
|
|
||||||
|
# Default Conf Files
|
||||||
|
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
|
||||||
|
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
||||||
|
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
||||||
|
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||||
|
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
||||||
|
|
||||||
|
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
|
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
|
WORKDIR $HADOOP_HOME
|
||||||
|
|
||||||
ADD bootstrap.sh /etc/bootstrap.sh
|
ADD bootstrap.sh /etc/bootstrap.sh
|
||||||
|
|
||||||
RUN mkdir -p /mnt/hadoop-config
|
|
||||||
|
|
||||||
ADD core-site.xml /mnt/hadoop-config
|
|
||||||
ADD yarn-site.xml /mnt/hadoop-config
|
|
||||||
ADD mapred-site.xml /mnt/hadoop-config
|
|
||||||
ADD hdfs-site.xml /mnt/hadoop-config
|
|
||||||
ADD slaves /mnt/hadoop-config
|
|
||||||
|
|
||||||
WORKDIR /usr/local/hadoop
|
|
||||||
|
|
||||||
CMD ["/etc/bootstrap.sh", "-d"]
|
CMD ["/etc/bootstrap.sh", "-d"]
|
||||||
|
@ -1,87 +1,115 @@
|
|||||||
# based on sequenceiq/hadoop-docker
|
# Deploy one Hadoop Cluster with docker
|
||||||
|
|
||||||
## Create a hadoop cluster in swarm mode
|
## Start Master
|
||||||
|
|
||||||
`--hostname` needs 1.13 or higher
|
```bash
|
||||||
|
|
||||||
```
|
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-master \
|
--name hadoop-master \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-master \
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.7.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
## Start slaves
|
||||||
|
|
||||||
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave1 \
|
--name hadoop-slave1 \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-slave1 \
|
--hostname hadoop-slave1 \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.7.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave2 \
|
--name hadoop-slave2 \
|
||||||
--network swarm-net \
|
--network swarm-net \
|
||||||
--hostname hadoop-slave2 \
|
--hostname hadoop-slave2 \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.7.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave3 \
|
--name hadoop-slave3 \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-slave3 \
|
--hostname hadoop-slave3 \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.7.1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Init && Test
|
## Init for the first time
|
||||||
|
|
||||||
In the first deploy, format dfs first
|
#### format dfs first
|
||||||
|
Run these commands on the master node.
|
||||||
|
|
||||||
### stop cluster (in master)
|
```bash
|
||||||
`sbin/stop-yarn.sh`
|
# stop HDFS services
|
||||||
`sbin/stop-dfs.sh`
|
sbin/stop-dfs.sh
|
||||||
|
|
||||||
### remove previous data (in all nodes)
|
# format HDFS meta data
|
||||||
clear all data in /tmp in all nodes
|
|
||||||
|
|
||||||
### format hdfs (in master)
|
|
||||||
```
|
|
||||||
bin/hadoop namenode -format
|
bin/hadoop namenode -format
|
||||||
|
|
||||||
|
# restart HDFS services
|
||||||
|
sbin/start-dfs.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### start cluster (in master)
|
## Run a test job
|
||||||
`sbin/start-dfs.sh`
|
To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
|
||||||
`sbin/start-yarn.sh`
|
|
||||||
|
|
||||||
### Run a test job
|
```bash
|
||||||
|
# prepare input data
|
||||||
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar grep input output 'dfs[a-z.]+'
|
|
||||||
|
|
||||||
Note: there is no such dir default, you can add data by
|
|
||||||
|
|
||||||
```
|
|
||||||
bin/hadoop dfs -mkdir -p /user/root/input
|
bin/hadoop dfs -mkdir -p /user/root/input
|
||||||
```
|
|
||||||
and
|
# copy files to input path
|
||||||
```
|
|
||||||
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
||||||
|
|
||||||
|
# submit the job
|
||||||
|
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
|
||||||
```
|
```
|
||||||
|
|
||||||
### monitor cluster in browser
|
## Browse the web UI
|
||||||
|
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
|
||||||
|
|
||||||
YARN: hadoop-master:8088
|
To access the web UI, deploy another (socks5) proxy to route the traffic.
|
||||||
|
|
||||||
HDFS: hadoop-master:50070
|
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
|
||||||
|
|
||||||
_Proxy needed, e.g. newnius/docker-proxy_
|
Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
|
||||||
|
|
||||||
|
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
|
||||||
|
|
||||||
|
## Custom configuration
|
||||||
|
|
||||||
|
To persist data or modify the conf files, refer to the following script.
|
||||||
|
|
||||||
|
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-master \
|
||||||
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
|
||||||
|
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
|
||||||
|
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
|
||||||
|
newnius/hadoop:2.7.1
|
||||||
|
```
|
||||||
|
@ -9,10 +9,14 @@ rm /tmp/*.pid
|
|||||||
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
||||||
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
||||||
|
|
||||||
# replace config
|
## replace config
|
||||||
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/
|
: ${EXTRA_CONF_DIR:=/config/hadoop}
|
||||||
|
|
||||||
service sshd start
|
if [ -d "$EXTRA_CONF_DIR" ]; then
|
||||||
|
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
|
||||||
|
fi
|
||||||
|
|
||||||
|
/usr/sbin/sshd
|
||||||
|
|
||||||
## stop all in case master starts far behind
|
## stop all in case master starts far behind
|
||||||
$HADOOP_PREFIX/sbin/stop-yarn.sh
|
$HADOOP_PREFIX/sbin/stop-yarn.sh
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
<!-- Put site-specific property overrides in this file. -->
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
<configuration>
|
<configuration>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>fs.defaultFS</name>
|
<name>fs.defaultFS</name>
|
||||||
<value>hdfs://hadoop-master:8020</value>
|
<value>hdfs://hadoop-master:8020</value>
|
||||||
|
@ -17,12 +17,10 @@
|
|||||||
<!-- Put site-specific property overrides in this file. -->
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
<configuration>
|
<configuration>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.permissions</name>
|
<name>dfs.permissions</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.namenode.secondary.http-address</name>
|
<name>dfs.namenode.secondary.http-address</name>
|
||||||
<value>hadoop-slave1:50090</value>
|
<value>hadoop-slave1:50090</value>
|
||||||
@ -31,16 +29,12 @@
|
|||||||
<name>dfs.namenode.http-address</name>
|
<name>dfs.namenode.http-address</name>
|
||||||
<value>hadoop-master:50070</value>
|
<value>hadoop-master:50070</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.datanode.max.transfer.threads</name>
|
<name>dfs.datanode.max.transfer.threads</name>
|
||||||
<value>8192</value>
|
<value>8192</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>dfs.replication</name>
|
<name>dfs.replication</name>
|
||||||
<value>3</value>
|
<value>3</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
@ -17,10 +17,16 @@
|
|||||||
<!-- Put site-specific property overrides in this file. -->
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
<configuration>
|
<configuration>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.framework.name</name>
|
<name>mapreduce.framework.name</name>
|
||||||
<value>yarn</value>
|
<value>yarn</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.address</name>
|
||||||
|
<value>hadoop-master:10020</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.webapp.address</name>
|
||||||
|
<value>hadoop-master:19888</value>
|
||||||
|
</property>
|
||||||
</configuration>
|
</configuration>
|
||||||
|
5
hadoop/2.7.1/ssh_config
Normal file
5
hadoop/2.7.1/ssh_config
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
Host *
|
||||||
|
UserKnownHostsFile /dev/null
|
||||||
|
StrictHostKeyChecking no
|
||||||
|
LogLevel quiet
|
||||||
|
Port 2122
|
@ -14,34 +14,26 @@
|
|||||||
-->
|
-->
|
||||||
<!-- Site specific YARN configuration properties -->
|
<!-- Site specific YARN configuration properties -->
|
||||||
<configuration>
|
<configuration>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.application.classpath</name>
|
<name>yarn.application.classpath</name>
|
||||||
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.resourcemanager.hostname</name>
|
<name>yarn.resourcemanager.hostname</name>
|
||||||
<value>hadoop-master</value>
|
<value>hadoop-master</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.nodemanager.aux-services</name>
|
<name>yarn.nodemanager.aux-services</name>
|
||||||
<value>mapreduce_shuffle</value>
|
<value>mapreduce_shuffle</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.log-aggregation-enable</name>
|
<name>yarn.log-aggregation-enable</name>
|
||||||
<value>true</value>
|
<value>true</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.log-aggregation.retain-seconds</name>
|
<name>yarn.log-aggregation.retain-seconds</name>
|
||||||
<value>604800</value>
|
<value>604800</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>yarn.nodemanager.resource.memory-mb</name>
|
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||||
<value>2048</value>
|
<value>2048</value>
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
FROM alpine:3.6
|
FROM alpine:3.8
|
||||||
|
|
||||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
@ -23,10 +23,12 @@ RUN chown root:root /root/.ssh/config
|
|||||||
RUN echo "Port 2122" >> /etc/ssh/sshd_config
|
RUN echo "Port 2122" >> /etc/ssh/sshd_config
|
||||||
|
|
||||||
# Install Hadoop
|
# Install Hadoop
|
||||||
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-2.8.1/hadoop-2.8.1.tar.gz && \
|
ENV HADOOP_VER 2.8.1
|
||||||
|
|
||||||
|
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
|
||||||
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
||||||
|
|
||||||
RUN ln -s /usr/local/hadoop-2.8.1 /usr/local/hadoop
|
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
|
||||||
|
|
||||||
ENV HADOOP_HOME /usr/local/hadoop
|
ENV HADOOP_HOME /usr/local/hadoop
|
||||||
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
|
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
|
||||||
@ -46,10 +48,9 @@ ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
|||||||
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||||
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
||||||
|
|
||||||
|
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
RUN sed -i "s|export JAVA_HOME=.*|export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}|g" ${HADOOP_PREFIX}/etc/hadoop/hadoop-env.sh
|
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
RUN sed -i "s|export HADOOP_CONF_DIR=.*|export HADOOP_CONF_DIR=${HADOOP_PREFIX}/etc/hadoop/|g" ${HADOOP_PREFIX}/etc/hadoop/hadoop-env.sh
|
|
||||||
|
|
||||||
WORKDIR $HADOOP_HOME
|
WORKDIR $HADOOP_HOME
|
||||||
|
|
||||||
|
@ -1,87 +1,115 @@
|
|||||||
# based on alpine
|
# Deploy one Hadoop Cluster with docker
|
||||||
|
|
||||||
## Create a hadoop cluster in swarm mode
|
## Start Master
|
||||||
|
|
||||||
`--hostname` needs docker 1.13 or higher
|
```bash
|
||||||
|
|
||||||
```
|
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-master \
|
--name hadoop-master \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-master \
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
## Start slaves
|
||||||
|
|
||||||
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave1 \
|
--name hadoop-slave1 \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-slave1 \
|
--hostname hadoop-slave1 \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave2 \
|
--name hadoop-slave2 \
|
||||||
--network swarm-net \
|
--network swarm-net \
|
||||||
--hostname hadoop-slave2 \
|
--hostname hadoop-slave2 \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.1
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name hadoop-slave3 \
|
--name hadoop-slave3 \
|
||||||
--network swarm-net \
|
|
||||||
--hostname hadoop-slave3 \
|
--hostname hadoop-slave3 \
|
||||||
|
--network swarm-net \
|
||||||
--replicas 1 \
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/hadoop
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Init && Test
|
## Init for the first time
|
||||||
|
|
||||||
In the first deploy, format dfs first
|
#### format dfs first
|
||||||
|
Run these commands on the master node.
|
||||||
|
|
||||||
### stop cluster (in master)
|
```bash
|
||||||
`sbin/stop-yarn.sh`
|
# stop HDFS services
|
||||||
`sbin/stop-dfs.sh`
|
sbin/stop-dfs.sh
|
||||||
|
|
||||||
### remove previous data (in all nodes)
|
# format HDFS meta data
|
||||||
clear all data in /tmp in all nodes
|
|
||||||
|
|
||||||
### format hdfs (in master)
|
|
||||||
```
|
|
||||||
bin/hadoop namenode -format
|
bin/hadoop namenode -format
|
||||||
|
|
||||||
|
# restart HDFS services
|
||||||
|
sbin/start-dfs.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### start cluster (in master)
|
## Run a test job
|
||||||
`sbin/start-dfs.sh`
|
To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
|
||||||
`sbin/start-yarn.sh`
|
|
||||||
|
|
||||||
### Run a test job
|
```bash
|
||||||
|
# prepare input data
|
||||||
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
|
|
||||||
|
|
||||||
Note: there is no such dir default, you can add data by
|
|
||||||
|
|
||||||
```
|
|
||||||
bin/hadoop dfs -mkdir -p /user/root/input
|
bin/hadoop dfs -mkdir -p /user/root/input
|
||||||
```
|
|
||||||
and
|
# copy files to input path
|
||||||
```
|
|
||||||
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
||||||
|
|
||||||
|
# submit the job
|
||||||
|
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
|
||||||
```
|
```
|
||||||
|
|
||||||
### monitor cluster in browser
|
## Browse the web UI
|
||||||
|
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
|
||||||
|
|
||||||
YARN: hadoop-master:8088
|
To access the web UI, deploy another (socks5) proxy to route the traffic.
|
||||||
|
|
||||||
HDFS: hadoop-master:50070
|
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
|
||||||
|
|
||||||
_Proxy needed: newnius/docker-proxy_
|
Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
|
||||||
|
|
||||||
|
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
|
||||||
|
|
||||||
|
## Custom configuration
|
||||||
|
|
||||||
|
To persist data or modify the conf files, refer to the following script.
|
||||||
|
|
||||||
|
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-master \
|
||||||
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
|
||||||
|
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
|
||||||
|
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
|
||||||
|
newnius/hadoop:2.8.1
|
||||||
|
```
|
||||||
|
@ -9,8 +9,12 @@ rm /tmp/*.pid
|
|||||||
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
||||||
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
||||||
|
|
||||||
# replace config
|
## replace config
|
||||||
cp /mnt/hadoop-config/* $HADOOP_PREFIX/etc/hadoop/
|
: ${EXTRA_CONF_DIR:=/config/hadoop}
|
||||||
|
|
||||||
|
if [ -d "$EXTRA_CONF_DIR" ]; then
|
||||||
|
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
|
||||||
|
fi
|
||||||
|
|
||||||
/usr/sbin/sshd
|
/usr/sbin/sshd
|
||||||
|
|
||||||
|
59
hadoop/2.8.4/Dockerfile
Normal file
59
hadoop/2.8.4/Dockerfile
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
FROM alpine:3.8
|
||||||
|
|
||||||
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Prerequisites
|
||||||
|
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps
|
||||||
|
|
||||||
|
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
||||||
|
ENV PATH $PATH:$JAVA_HOME/bin
|
||||||
|
|
||||||
|
# Passwordless SSH
|
||||||
|
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
|
||||||
|
RUN ssh-keygen -q -N "" -t rsa -f /etc/ssh/ssh_host_rsa_key
|
||||||
|
RUN ssh-keygen -q -N "" -t rsa -f /root/.ssh/id_rsa
|
||||||
|
RUN cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys
|
||||||
|
|
||||||
|
ADD ssh_config /root/.ssh/config
|
||||||
|
RUN chmod 600 /root/.ssh/config
|
||||||
|
RUN chown root:root /root/.ssh/config
|
||||||
|
|
||||||
|
RUN echo "Port 2122" >> /etc/ssh/sshd_config
|
||||||
|
|
||||||
|
# Install Hadoop
|
||||||
|
ENV HADOOP_VER 2.8.4
|
||||||
|
|
||||||
|
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
|
||||||
|
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
|
||||||
|
|
||||||
|
ENV HADOOP_HOME /usr/local/hadoop
|
||||||
|
ENV PATH $PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
|
||||||
|
|
||||||
|
ENV HADOOP_PREFIX $HADOOP_HOME
|
||||||
|
ENV HADOOP_COMMON_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_HDFS_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_MAPRED_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_YARN_HOME $HADOOP_HOME
|
||||||
|
ENV HADOOP_CONF_DIR $HADOOP_HOME/etc/hadoop
|
||||||
|
ENV YARN_CONF_DIR $HADOOP_PREFIX/etc/hadoop
|
||||||
|
|
||||||
|
# Default Conf Files
|
||||||
|
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
|
||||||
|
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
||||||
|
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
||||||
|
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||||
|
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
||||||
|
|
||||||
|
RUN sed -i "/^export JAVA_HOME/ s:.*:export JAVA_HOME=${JAVA_HOME}\nexport HADOOP_HOME=${HADOOP_HOME}\nexport HADOOP_PREFIX=${HADOOP_PREFIX}:" ${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
|
RUN sed -i '/^export HADOOP_CONF_DIR/ s:.*:export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop/:' $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
|
WORKDIR $HADOOP_HOME
|
||||||
|
|
||||||
|
ADD bootstrap.sh /etc/bootstrap.sh
|
||||||
|
|
||||||
|
CMD ["/etc/bootstrap.sh", "-d"]
|
115
hadoop/2.8.4/README.md
Normal file
115
hadoop/2.8.4/README.md
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
# Deploy one Hadoop Cluster with docker
|
||||||
|
|
||||||
|
## Start Master
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-master \
|
||||||
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Start slaves
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-slave1 \
|
||||||
|
--hostname hadoop-slave1 \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.4
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-slave2 \
|
||||||
|
--network swarm-net \
|
||||||
|
--hostname hadoop-slave2 \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.4
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-slave3 \
|
||||||
|
--hostname hadoop-slave3 \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
newnius/hadoop:2.8.4
|
||||||
|
```
|
||||||
|
|
||||||
|
## Init for the first time
|
||||||
|
|
||||||
|
#### format dfs first
|
||||||
|
Run these commands on the master node.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# stop HDFS services
|
||||||
|
sbin/stop-dfs.sh
|
||||||
|
|
||||||
|
# format HDFS meta data
|
||||||
|
bin/hadoop namenode -format
|
||||||
|
|
||||||
|
# restart HDFS services
|
||||||
|
sbin/start-dfs.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run a test job
|
||||||
|
To make sure youui have successfully setup the Hadoop cluster, just run the floowing commands to see if it is executed well.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# prepare input data
|
||||||
|
bin/hadoop dfs -mkdir -p /user/root/input
|
||||||
|
|
||||||
|
# copy files to input path
|
||||||
|
bin/hadoop dfs -put etc/hadoop/* /user/root/input
|
||||||
|
|
||||||
|
# submit the job
|
||||||
|
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.4.jar grep input output 'dfs[a-z.]+'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Browse the web UI
|
||||||
|
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
|
||||||
|
|
||||||
|
To access the web UI, deploy another (socks5) proxy to route the traffic.
|
||||||
|
|
||||||
|
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
|
||||||
|
|
||||||
|
Visit [hadoop-master:8088](hadoop-master:8088) fo YARN pages.
|
||||||
|
|
||||||
|
Visit [hadoop-master:50070](hadoop-master:50070) fo YARN pages.
|
||||||
|
|
||||||
|
## Custom configuration
|
||||||
|
|
||||||
|
To persist data or modify the conf files, refer to the following script.
|
||||||
|
|
||||||
|
The `/config/hadoop` path is where new conf files to be replaces, you don't have to put all the files.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name hadoop-master \
|
||||||
|
--hostname hadoop-master \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach=true \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
--mount type=bind,source=/etc/localtime,target=/etc/localtime \
|
||||||
|
--mount type=bind,source=/data/hadoop/config,target=/config/hadoop \
|
||||||
|
--mount type=bind,source=/data/hadoop/hdfs/master,target=/tmp/hadoop-root \
|
||||||
|
--mount type=bind,source=/data/hadoop/logs/master,target=/usr/local/hadoop/logs \
|
||||||
|
newnius/hadoop:2.8.4
|
||||||
|
```
|
35
hadoop/2.8.4/bootstrap.sh
Executable file
35
hadoop/2.8.4/bootstrap.sh
Executable file
@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
: ${HADOOP_PREFIX:=/usr/local/hadoop}
|
||||||
|
|
||||||
|
$HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
|
||||||
|
|
||||||
|
rm /tmp/*.pid
|
||||||
|
|
||||||
|
# installing libraries if any - (resource urls added comma separated to the ACP system variable)
|
||||||
|
cd $HADOOP_PREFIX/share/hadoop/common ; for cp in ${ACP//,/ }; do echo == $cp; curl -LO $cp ; done; cd -
|
||||||
|
|
||||||
|
## replace config
|
||||||
|
: ${EXTRA_CONF_DIR:=/config/hadoop}
|
||||||
|
|
||||||
|
if [ -d "$EXTRA_CONF_DIR" ]; then
|
||||||
|
cp $EXTRA_CONF_DIR/* $HADOOP_PREFIX/etc/hadoop/
|
||||||
|
fi
|
||||||
|
|
||||||
|
/usr/sbin/sshd
|
||||||
|
|
||||||
|
## stop all in case master starts far behind
|
||||||
|
$HADOOP_PREFIX/sbin/stop-yarn.sh
|
||||||
|
$HADOOP_PREFIX/sbin/stop-dfs.sh
|
||||||
|
|
||||||
|
$HADOOP_PREFIX/sbin/start-dfs.sh
|
||||||
|
$HADOOP_PREFIX/sbin/start-yarn.sh
|
||||||
|
$HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh start historyserver
|
||||||
|
|
||||||
|
if [[ $1 == "-d" ]]; then
|
||||||
|
while true; do sleep 1000; done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $1 == "-bash" ]]; then
|
||||||
|
/bin/bash
|
||||||
|
fi
|
28
hadoop/2.8.4/core-site.xml
Normal file
28
hadoop/2.8.4/core-site.xml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>hdfs://hadoop-master:8020</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>hdfs://hadoop-master:8020</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
40
hadoop/2.8.4/hdfs-site.xml
Normal file
40
hadoop/2.8.4/hdfs-site.xml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>dfs.permissions</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.secondary.http-address</name>
|
||||||
|
<value>hadoop-slave1:50090</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.http-address</name>
|
||||||
|
<value>hadoop-master:50070</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.datanode.max.transfer.threads</name>
|
||||||
|
<value>8192</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.replication</name>
|
||||||
|
<value>3</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
32
hadoop/2.8.4/mapred-site.xml
Normal file
32
hadoop/2.8.4/mapred-site.xml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.framework.name</name>
|
||||||
|
<value>yarn</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.address</name>
|
||||||
|
<value>hadoop-master:10020</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.webapp.address</name>
|
||||||
|
<value>hadoop-master:19888</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
3
hadoop/2.8.4/slaves
Normal file
3
hadoop/2.8.4/slaves
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
hadoop-slave1
|
||||||
|
hadoop-slave2
|
||||||
|
hadoop-slave3
|
5
hadoop/2.8.4/ssh_config
Normal file
5
hadoop/2.8.4/ssh_config
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
Host *
|
||||||
|
UserKnownHostsFile /dev/null
|
||||||
|
StrictHostKeyChecking no
|
||||||
|
LogLevel quiet
|
||||||
|
Port 2122
|
49
hadoop/2.8.4/yarn-site.xml
Normal file
49
hadoop/2.8.4/yarn-site.xml
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
<!-- Site specific YARN configuration properties -->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.application.classpath</name>
|
||||||
|
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.hostname</name>
|
||||||
|
<value>hadoop-master</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.aux-services</name>
|
||||||
|
<value>mapreduce_shuffle</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.log-aggregation-enable</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.log-aggregation.retain-seconds</name>
|
||||||
|
<value>604800</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||||
|
<value>2048</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||||
|
<value>2</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.minimum-allocation-mb</name>
|
||||||
|
<value>1024</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
@ -1,4 +1,4 @@
|
|||||||
FROM alpine:3.6
|
FROM alpine:3.8
|
||||||
|
|
||||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
@ -25,7 +25,7 @@ RUN echo "Port 2122" >> /etc/ssh/sshd_config
|
|||||||
# Install Hadoop
|
# Install Hadoop
|
||||||
ENV HADOOP_VER 2.9.1
|
ENV HADOOP_VER 2.9.1
|
||||||
|
|
||||||
RUN wget -O hadoop.tar.gz http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
|
RUN wget -O hadoop.tar.gz https://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VER/hadoop-$HADOOP_VER.tar.gz && \
|
||||||
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
tar -xzf hadoop.tar.gz -C /usr/local/ && rm hadoop.tar.gz
|
||||||
|
|
||||||
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
|
RUN ln -s /usr/local/hadoop-$HADOOP_VER /usr/local/hadoop
|
||||||
|
Loading…
Reference in New Issue
Block a user