update spark

This commit is contained in:
2018-08-07 20:04:47 +08:00
parent 0582327307
commit 94228e4944
13 changed files with 98 additions and 125 deletions

View File

@@ -0,0 +1,26 @@
FROM newnius/hadoop:2.8.1
MAINTAINER Newnius <newnius.cn@gmail.com>
RUN wget http://mirrors.ocf.berkeley.edu/apache/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz && \
tar -xvf spark-2.2.1-bin-hadoop2.7.tgz -C /usr/local && \
rm spark-2.2.1-bin-hadoop2.7.tgz
RUN ln -s /usr/local/spark-2.2.1-bin-hadoop2.7 /usr/local/spark
ENV SPARK_HOME /usr/local/spark
ENV PATH $PATH:$SPARK_HOME/bin
# Default Conf Files
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
ADD bootstrap.sh /etc/bootstrap-spark.sh
WORKDIR /usr/local/spark
CMD ["/etc/bootstrap-spark.sh", "-d"]

View File

@@ -0,0 +1,93 @@
# Spark on yarn
## Create a spark cluster in swarm mode
`--hostname` needs 1.13 or higher
```bash
docker service create \
--name spark-master \
--hostname spark-master \
--detach true \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/spark:2.2.1
```
```bash
docker service create \
--name spark-slave1 \
--hostname spark-slave1 \
--detach true \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/spark:2.2.1
```
```bash
docker service create \
--name spark-slave2 \
--hostname spark-slave2 \
--detach true \
--network swarm-net \
--replicas 1 \
--endpoint-mode dnsrr \
newnius/spark:2.2.1
```
## Init && Test
In the first deploy, format hdfs
### Stop HDFS (in master)
```bash
sbin/stop-dfs.sh
```
### Format HDFS (in master)
```
bin/hadoop namenode -format
```
### Start HDFS (in master)
```bash
sbin/start-dfs.sh
```
### Run Hello World
```bash
spark-submit \
--master yarn \
--deploy-mode cluster \
--class org.apache.spark.examples.JavaSparkPi \
./examples/jars/spark-examples_2.11-2.2.1.jar 100
```
### UI
YARN: spark-master:8088
HDFS: spark-master:50070
_Proxy needed, e.g. [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/)_
## customized config
```bash
docker service create \
--name spark-master \
--hostname spark-master \
--detach=true \
--network swarm-net \
--replicas 1 \
--mount type=bind,source=/mnt/data/spark/hdfs/master,target=/tmp/hadoop-root \
--mount type=bind,source=/mnt/data/spark/logs/master,target=/usr/local/hadoop/logs \
--mount type=bind,source=/mnt/data/spark/config/hadoop,target=/mnt/config/hadoop \
--mount type=bind,source=/mnt/data/spark/config/spark,target=/mnt/config/spark \
--endpoint-mode dnsrr \
newnius/spark:2.2.1
```
You dont't need to put all files in dir, only add files to be replaced.

6
spark/2.2.1-yarn/bootstrap.sh Executable file
View File

@@ -0,0 +1,6 @@
#! /bin/bash
# replace config
cp /mnt/config/spark/* $SPARK_HOME/conf
bash -c "/etc/bootstrap.sh -d"

View File

@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://spark-master:8020</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://spark-master:8020</value>
</property>
</configuration>

View File

@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>spark-slave1:50090</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>spark-master:50070</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>

View File

@@ -0,0 +1,32 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>spark-master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>spark-master:19888</value>
</property>
</configuration>

3
spark/2.2.1-yarn/slaves Normal file
View File

@@ -0,0 +1,3 @@
spark-slave1
spark-slave2
spark-slave3

View File

@@ -0,0 +1,49 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Site specific YARN configuration properties -->
<configuration>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>spark-master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
</configuration>