mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-06-07 16:41:55 +00:00
add spark-2.2.1
This commit is contained in:
parent
b90b784047
commit
d6f2f5bfee
26
spark/2.2.1/Dockerfile
Normal file
26
spark/2.2.1/Dockerfile
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
FROM newnius/hadoop:2.8.1
|
||||||
|
|
||||||
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
|
RUN wget -O spark-2.2.1.tgz http://mirrors.ocf.berkeley.edu/apache/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz && \
|
||||||
|
tar -xvf spark-2.2.1-bin-hadoop2.7.tgz -C /usr/local && \
|
||||||
|
rm spark-2.2.1-bin-hadoop2.7.tgz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/spark-2.2.1 /usr/local/spark
|
||||||
|
|
||||||
|
ENV SPARK_HOME /usr/local/spark
|
||||||
|
|
||||||
|
ENV PATH $PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
|
# Default Conf Files
|
||||||
|
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
|
||||||
|
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
||||||
|
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
||||||
|
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||||
|
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
||||||
|
|
||||||
|
ADD bootstrap.sh /etc/bootstrap-spark.sh
|
||||||
|
|
||||||
|
WORKDIR /usr/local/spark
|
||||||
|
|
||||||
|
CMD ["/etc/bootstrap-spark.sh", "-d"]
|
92
spark/2.2.1/README.md
Normal file
92
spark/2.2.1/README.md
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
# Spark on yarn
|
||||||
|
|
||||||
|
## Create a spark cluster in swarm mode
|
||||||
|
|
||||||
|
`--hostname` needs 1.13 or higher
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name spark-master \
|
||||||
|
--hostname spark-master \
|
||||||
|
--detach true \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
newnius/spark:2.2.1
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name spark-slave1 \
|
||||||
|
--hostname spark-slave1 \
|
||||||
|
--detach true \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
newnius/spark:2.2.1
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name spark-slave2 \
|
||||||
|
--hostname spark-slave2 \
|
||||||
|
--detach true \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
newnius/spark:2.2.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Init && Test
|
||||||
|
|
||||||
|
In the first deploy, format hdfs
|
||||||
|
|
||||||
|
### Stop HDFS (in master)
|
||||||
|
```bash
|
||||||
|
sbin/stop-dfs.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Format HDFS (in master)
|
||||||
|
```
|
||||||
|
bin/hadoop namenode -format
|
||||||
|
```
|
||||||
|
|
||||||
|
### Start HDFS (in master)
|
||||||
|
```bash
|
||||||
|
sbin/start-dfs.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run Hello World
|
||||||
|
```bash
|
||||||
|
spark-submit \
|
||||||
|
--master yarn-cluster \
|
||||||
|
--class org.apache.spark.examples.JavaSparkPi \
|
||||||
|
./examples/jars/spark-examples_2.11-2.2.1.jar 100
|
||||||
|
```
|
||||||
|
|
||||||
|
### UI
|
||||||
|
|
||||||
|
YARN: spark-master:8088
|
||||||
|
|
||||||
|
HDFS: spark-master:50070
|
||||||
|
|
||||||
|
_Proxy needed, e.g. [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/)_
|
||||||
|
|
||||||
|
## customized config
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name spark-master \
|
||||||
|
--hostname spark-master \
|
||||||
|
--detach true \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--mount type=bind,source=/mnt/data/spark/hdfs/master,target=/tmp/hadoop-root \
|
||||||
|
--mount type=bind,source=/mnt/data/spark/logs/master,target=/usr/local/hadoop/logs \
|
||||||
|
--mount type=bind,source=/mnt/data/spark/config/hadoop,target=/mnt/config/hadoop \
|
||||||
|
--mount type=bind,source=/mnt/data/spark/config/spark,target=/mnt/config/spark \
|
||||||
|
--endpoint-mode dnsrr \
|
||||||
|
newnius/spark:2.2.1
|
||||||
|
```
|
||||||
|
|
||||||
|
You dont't need to put all files in dir, only add files to be replaced.
|
6
spark/2.2.1/bootstrap.sh
Normal file
6
spark/2.2.1/bootstrap.sh
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
# replace config
|
||||||
|
|
||||||
|
cp /mnt/config/spark/* $SPARK_HOME/conf
|
||||||
|
|
||||||
|
bash -c "/etc/bootstrap.sh -d"
|
28
spark/2.2.1/core-site.xml
Normal file
28
spark/2.2.1/core-site.xml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>hdfs://spark-master:8020</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.default.name</name>
|
||||||
|
<value>hdfs://spark-master:8020</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
40
spark/2.2.1/hdfs-site.xml
Normal file
40
spark/2.2.1/hdfs-site.xml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>dfs.permissions</name>
|
||||||
|
<value>false</value>
|
||||||
|
</propert>
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.secondary.http-address</name>
|
||||||
|
<value>spark-slave1:50090</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.http-address</name>
|
||||||
|
<value>spark-master:50070</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.datanode.max.transfer.threads</name>
|
||||||
|
<value>8192</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.replication</name>
|
||||||
|
<value>2</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
32
spark/2.2.1/mapred-site.xml
Normal file
32
spark/2.2.1/mapred-site.xml
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Put site-specific property overrides in this file. -->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.framework.name</name>
|
||||||
|
<value>yarn</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.address</name>
|
||||||
|
<value>spark-master:10020</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.jobhistory.webapp.address</name>
|
||||||
|
<value>spark-master:19888</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
3
spark/2.2.1/slaves
Normal file
3
spark/2.2.1/slaves
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
spark-slave1
|
||||||
|
spark-slave2
|
||||||
|
spark-slave3
|
49
spark/2.2.1/yarn-site.xml
Normal file
49
spark/2.2.1/yarn-site.xml
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
<?xml version="1.0"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
<!-- Site specific YARN configuration properties -->
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.application.classpath</name>
|
||||||
|
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.hostname</name>
|
||||||
|
<value>spark-master</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.aux-services</name>
|
||||||
|
<value>mapreduce_shuffle</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.log-aggregation-enable</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.log-aggregation.retain-seconds</name>
|
||||||
|
<value>604800</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||||
|
<value>2048</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||||
|
<value>2</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.scheduler.minimum-allocation-mb</name>
|
||||||
|
<value>1024</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
Loading…
Reference in New Issue
Block a user