mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-12-13 01:36:43 +00:00
update spark
This commit is contained in:
34
spark/2.3.1-yarn/Dockerfile
Normal file
34
spark/2.3.1-yarn/Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
||||
FROM alpine:3.8
|
||||
|
||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||
|
||||
USER root
|
||||
|
||||
# Prerequisites
|
||||
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils
|
||||
|
||||
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
||||
|
||||
ENV PATH $PATH:$JAVA_HOME/bin
|
||||
|
||||
ENV SPARK_VER 2.3.1
|
||||
|
||||
RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \
|
||||
tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \
|
||||
rm spark-$SPARK_VER-bin-hadoop2.7.tgz
|
||||
|
||||
RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark
|
||||
|
||||
ENV SPARK_HOME /usr/local/spark
|
||||
|
||||
ADD config/* /usr/local/hadoop/etc/hadoop/
|
||||
|
||||
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
|
||||
|
||||
ENV PATH $PATH:$SPARK_HOME/bin
|
||||
|
||||
ADD bootstrap.sh /etc/bootstrap.sh
|
||||
|
||||
WORKDIR /usr/local/spark
|
||||
|
||||
CMD ["/etc/bootstrap.sh"]
|
||||
45
spark/2.3.1-yarn/README.md
Normal file
45
spark/2.3.1-yarn/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# Deploy Spark On Yarn
|
||||
|
||||
## Client
|
||||
|
||||
```bash
|
||||
docker service create \
|
||||
--name spark-client \
|
||||
--hostname spark-client \
|
||||
--network swarm-net \
|
||||
--replicas 1 \
|
||||
--detach true \
|
||||
newnius/spark:2.3.1-yarn
|
||||
```
|
||||
|
||||
## Validate installation
|
||||
|
||||
#### spark-submit PI
|
||||
|
||||
```bash
|
||||
spark-submit \
|
||||
--master yarn \
|
||||
--deploy-mode cluster \
|
||||
--class org.apache.spark.examples.JavaSparkPi \
|
||||
./examples/jars/spark-examples*.jar 100
|
||||
```
|
||||
|
||||
#### spark-shell HDFS wordcount
|
||||
|
||||
Enter `spark-shell --master yarn` to enter shell.
|
||||
|
||||
```shell
|
||||
val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input")
|
||||
|
||||
val words = lines.flatMap(_.split("\\s+"))
|
||||
|
||||
val wc = words.map(word => (word, 1)).reduceByKey(_ + _)
|
||||
|
||||
wc.collect()
|
||||
|
||||
val cnt = words.map(word => 1).reduce(_ + _)
|
||||
```
|
||||
|
||||
## Browse the web UI
|
||||
|
||||
In Spark On Yarn mode, the spark jobs will occur in the Yarn web UI.
|
||||
10
spark/2.3.1-yarn/bootstrap.sh
Executable file
10
spark/2.3.1-yarn/bootstrap.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#! /bin/bash
|
||||
|
||||
## replace config
|
||||
: ${EXTRA_CONF_DIR:=/config/hadoop}
|
||||
|
||||
if [ -d "$EXTRA_CONF_DIR" ]; then
|
||||
cp $EXTRA_CONF_DIR/* /usr/local/hadoop/etc/hadoop/
|
||||
fi
|
||||
|
||||
while true; do sleep 1000; done
|
||||
28
spark/2.3.1-yarn/config/core-site.xml
Normal file
28
spark/2.3.1-yarn/config/core-site.xml
Normal file
@@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://hadoop-master:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://hadoop-master:8020</value>
|
||||
</property>
|
||||
</configuration>
|
||||
40
spark/2.3.1-yarn/config/hdfs-site.xml
Normal file
40
spark/2.3.1-yarn/config/hdfs-site.xml
Normal file
@@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>dfs.permissions</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.secondary.http-address</name>
|
||||
<value>hadoop-slave1:50090</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address</name>
|
||||
<value>hadoop-master:50070</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.datanode.max.transfer.threads</name>
|
||||
<value>8192</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
</configuration>
|
||||
32
spark/2.3.1-yarn/config/mapred-site.xml
Normal file
32
spark/2.3.1-yarn/config/mapred-site.xml
Normal file
@@ -0,0 +1,32 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.address</name>
|
||||
<value>hadoop-master:10020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>mapreduce.jobhistory.webapp.address</name>
|
||||
<value>hadoop-master:19888</value>
|
||||
</property>
|
||||
</configuration>
|
||||
3
spark/2.3.1-yarn/config/slaves
Normal file
3
spark/2.3.1-yarn/config/slaves
Normal file
@@ -0,0 +1,3 @@
|
||||
hadoop-slave1
|
||||
hadoop-slave2
|
||||
hadoop-slave3
|
||||
49
spark/2.3.1-yarn/config/yarn-site.xml
Normal file
49
spark/2.3.1-yarn/config/yarn-site.xml
Normal file
@@ -0,0 +1,49 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<!-- Site specific YARN configuration properties -->
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname</name>
|
||||
<value>hadoop-master</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services</name>
|
||||
<value>mapreduce_shuffle</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.log-aggregation-enable</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.log-aggregation.retain-seconds</name>
|
||||
<value>604800</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||
<value>2048</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||
<value>2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.minimum-allocation-mb</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
</configuration>
|
||||
Reference in New Issue
Block a user