mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-06-07 16:41:55 +00:00
update spark
This commit is contained in:
parent
0582327307
commit
94228e4944
26
spark/2.2.1-yarn/Dockerfile
Normal file
26
spark/2.2.1-yarn/Dockerfile
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
FROM newnius/hadoop:2.8.1
|
||||||
|
|
||||||
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
|
RUN wget http://mirrors.ocf.berkeley.edu/apache/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz && \
|
||||||
|
tar -xvf spark-2.2.1-bin-hadoop2.7.tgz -C /usr/local && \
|
||||||
|
rm spark-2.2.1-bin-hadoop2.7.tgz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/spark-2.2.1-bin-hadoop2.7 /usr/local/spark
|
||||||
|
|
||||||
|
ENV SPARK_HOME /usr/local/spark
|
||||||
|
|
||||||
|
ENV PATH $PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
|
# Default Conf Files
|
||||||
|
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
|
||||||
|
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
||||||
|
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
||||||
|
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
||||||
|
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
||||||
|
|
||||||
|
ADD bootstrap.sh /etc/bootstrap-spark.sh
|
||||||
|
|
||||||
|
WORKDIR /usr/local/spark
|
||||||
|
|
||||||
|
CMD ["/etc/bootstrap-spark.sh", "-d"]
|
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
## Create a spark cluster in swarm mode
|
## Create a spark cluster in swarm mode
|
||||||
|
|
||||||
|
`--hostname` needs 1.13 or higher
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name spark-master \
|
--name spark-master \
|
6
spark/2.2.1-yarn/bootstrap.sh
Executable file
6
spark/2.2.1-yarn/bootstrap.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
# replace config
|
||||||
|
|
||||||
|
cp /mnt/config/spark/* $SPARK_HOME/conf
|
||||||
|
|
||||||
|
bash -c "/etc/bootstrap.sh -d"
|
@ -1,26 +1,30 @@
|
|||||||
FROM newnius/hadoop:2.8.1
|
FROM alpine:3.8
|
||||||
|
|
||||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
RUN wget http://mirrors.ocf.berkeley.edu/apache/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz && \
|
USER root
|
||||||
tar -xvf spark-2.2.1-bin-hadoop2.7.tgz -C /usr/local && \
|
|
||||||
rm spark-2.2.1-bin-hadoop2.7.tgz
|
|
||||||
|
|
||||||
RUN ln -s /usr/local/spark-2.2.1-bin-hadoop2.7 /usr/local/spark
|
# Prerequisites
|
||||||
|
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils
|
||||||
|
|
||||||
|
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
||||||
|
|
||||||
|
ENV PATH $PATH:$JAVA_HOME/bin
|
||||||
|
|
||||||
|
ENV SPARK_VER 2.2.1
|
||||||
|
|
||||||
|
RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \
|
||||||
|
tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \
|
||||||
|
rm spark-$SPARK_VER-bin-hadoop2.7.tgz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark
|
||||||
|
|
||||||
ENV SPARK_HOME /usr/local/spark
|
ENV SPARK_HOME /usr/local/spark
|
||||||
|
|
||||||
ENV PATH $PATH:$SPARK_HOME/bin
|
ENV PATH $PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
# Default Conf Files
|
ADD bootstrap.sh /etc/bootstrap.sh
|
||||||
ADD core-site.xml $HADOOP_HOME/etc/hadoop/core-site.xml
|
|
||||||
ADD hdfs-site.xml $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
|
||||||
ADD mapred-site.xml $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
|
||||||
ADD yarn-site.xml $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
|
||||||
ADD slaves $HADOOP_HOME/etc/hadoop/slaves
|
|
||||||
|
|
||||||
ADD bootstrap.sh /etc/bootstrap-spark.sh
|
|
||||||
|
|
||||||
WORKDIR /usr/local/spark
|
WORKDIR /usr/local/spark
|
||||||
|
|
||||||
CMD ["/etc/bootstrap-spark.sh", "-d"]
|
ENTRYPOINT ["/etc/bootstrap.sh"]
|
||||||
|
@ -1,93 +1,64 @@
|
|||||||
# Spark on yarn
|
# Deploy Spark Cluster of standalone mode
|
||||||
|
|
||||||
## Create a spark cluster in swarm mode
|
## Master
|
||||||
|
|
||||||
`--hostname` needs 1.13 or higher
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name spark-master \
|
--name spark-master \
|
||||||
--hostname spark-master \
|
--hostname spark-master \
|
||||||
--detach true \
|
--network swarm-net \
|
||||||
--network swarm-net \
|
--replicas 1 \
|
||||||
--replicas 1 \
|
--detach true \
|
||||||
--endpoint-mode dnsrr \
|
--endpoint-mode dnsrr \
|
||||||
newnius/spark:2.2.1
|
newnius/spark:2.2.1 master
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Slaves
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker service create \
|
docker service create \
|
||||||
--name spark-slave1 \
|
--name spark-slave \
|
||||||
--hostname spark-slave1 \
|
--network swarm-net \
|
||||||
--detach true \
|
--replicas 5 \
|
||||||
--network swarm-net \
|
--detach true \
|
||||||
--replicas 1 \
|
--endpoint-mode dnsrr \
|
||||||
--endpoint-mode dnsrr \
|
newnius/spark:2.2.1 slave spark://spark-master:7077
|
||||||
newnius/spark:2.2.1
|
|
||||||
```
|
```
|
||||||
|
|
||||||
```bash
|
## Validate installation
|
||||||
docker service create \
|
|
||||||
--name spark-slave2 \
|
|
||||||
--hostname spark-slave2 \
|
|
||||||
--detach true \
|
|
||||||
--network swarm-net \
|
|
||||||
--replicas 1 \
|
|
||||||
--endpoint-mode dnsrr \
|
|
||||||
newnius/spark:2.2.1
|
|
||||||
```
|
|
||||||
|
|
||||||
## Init && Test
|
#### spark-submit PI
|
||||||
|
|
||||||
In the first deploy, format hdfs
|
|
||||||
|
|
||||||
### Stop HDFS (in master)
|
|
||||||
```bash
|
|
||||||
sbin/stop-dfs.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
### Format HDFS (in master)
|
|
||||||
```
|
|
||||||
bin/hadoop namenode -format
|
|
||||||
```
|
|
||||||
|
|
||||||
### Start HDFS (in master)
|
|
||||||
```bash
|
|
||||||
sbin/start-dfs.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
### Run Hello World
|
|
||||||
```bash
|
```bash
|
||||||
spark-submit \
|
spark-submit \
|
||||||
--master yarn \
|
--master spark://spark-master:7077 \
|
||||||
--deploy-mode cluster \
|
--deploy-mode cluster \
|
||||||
--class org.apache.spark.examples.JavaSparkPi \
|
--class org.apache.spark.examples.JavaSparkPi \
|
||||||
./examples/jars/spark-examples_2.11-2.2.1.jar 100
|
./examples/jars/spark-examples_2.11-2.2.1.jar 100
|
||||||
```
|
```
|
||||||
|
|
||||||
### UI
|
#### spark-shell HDFS wordcount
|
||||||
|
|
||||||
YARN: spark-master:8088
|
Enter `spark-shell --master spark://spark-master:7077` to enter shell.
|
||||||
|
|
||||||
HDFS: spark-master:50070
|
```shell
|
||||||
|
val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input")
|
||||||
|
|
||||||
_Proxy needed, e.g. [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/)_
|
val words = lines.flatMap(_.split("\\s+"))
|
||||||
|
|
||||||
## customized config
|
val wc = words.map(word => (word, 1)).reduceByKey(_ + _)
|
||||||
|
|
||||||
```bash
|
wc.collect()
|
||||||
docker service create \
|
|
||||||
--name spark-master \
|
val cnt = words.map(word => 1).reduce(_ + _)
|
||||||
--hostname spark-master \
|
|
||||||
--detach=true \
|
|
||||||
--network swarm-net \
|
|
||||||
--replicas 1 \
|
|
||||||
--mount type=bind,source=/mnt/data/spark/hdfs/master,target=/tmp/hadoop-root \
|
|
||||||
--mount type=bind,source=/mnt/data/spark/logs/master,target=/usr/local/hadoop/logs \
|
|
||||||
--mount type=bind,source=/mnt/data/spark/config/hadoop,target=/mnt/config/hadoop \
|
|
||||||
--mount type=bind,source=/mnt/data/spark/config/spark,target=/mnt/config/spark \
|
|
||||||
--endpoint-mode dnsrr \
|
|
||||||
newnius/spark:2.2.1
|
|
||||||
```
|
```
|
||||||
|
|
||||||
You dont't need to put all files in dir, only add files to be replaced.
|
## Browse the web UI
|
||||||
|
|
||||||
|
You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports.
|
||||||
|
|
||||||
|
To access the web UI, deploy another (socks5) proxy to route the traffic.
|
||||||
|
|
||||||
|
If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use.
|
||||||
|
|
||||||
|
Visit [spark-master:8080](http://spark-master:8080) to view the cluster.
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
#! /bin/bash
|
#! /bin/bash
|
||||||
# replace config
|
|
||||||
|
|
||||||
cp /mnt/config/spark/* $SPARK_HOME/conf
|
if [[ $1 == "master" ]]; then
|
||||||
|
/usr/local/spark/sbin/start-master.sh
|
||||||
|
fi
|
||||||
|
|
||||||
bash -c "/etc/bootstrap.sh -d"
|
if [[ $1 == "slave" ]]; then
|
||||||
|
/usr/local/spark/sbin/start-slave.sh $2
|
||||||
|
fi
|
||||||
|
|
||||||
|
while true; do sleep 1000; done
|
||||||
|
@ -1,30 +0,0 @@
|
|||||||
FROM alpine:3.8
|
|
||||||
|
|
||||||
MAINTAINER Newnius <newnius.cn@gmail.com>
|
|
||||||
|
|
||||||
USER root
|
|
||||||
|
|
||||||
# Prerequisites
|
|
||||||
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils
|
|
||||||
|
|
||||||
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
|
||||||
|
|
||||||
ENV PATH $PATH:$JAVA_HOME/bin
|
|
||||||
|
|
||||||
ENV SPARK_VER 2.2.1
|
|
||||||
|
|
||||||
RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \
|
|
||||||
tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \
|
|
||||||
rm spark-$SPARK_VER-bin-hadoop2.7.tgz
|
|
||||||
|
|
||||||
RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark
|
|
||||||
|
|
||||||
ENV SPARK_HOME /usr/local/spark
|
|
||||||
|
|
||||||
ENV PATH $PATH:$SPARK_HOME/bin
|
|
||||||
|
|
||||||
ADD bootstrap.sh /etc/bootstrap.sh
|
|
||||||
|
|
||||||
WORKDIR /usr/local/spark
|
|
||||||
|
|
||||||
ENTRYPOINT ["/etc/bootstrap.sh"]
|
|
@ -1,11 +0,0 @@
|
|||||||
#! /bin/bash
|
|
||||||
|
|
||||||
if [[ $1 == "master" ]]; then
|
|
||||||
/usr/local/spark/sbin/start_master.sh
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ $1 == "slave" ]]; then
|
|
||||||
/usr/local/spark/sbin/start_slave.sh $2
|
|
||||||
fi
|
|
||||||
|
|
||||||
while true; do sleep 1000; done
|
|
Loading…
Reference in New Issue
Block a user