diff --git a/spark/test/Dockerfile b/spark/test/Dockerfile new file mode 100644 index 0000000..e4f122d --- /dev/null +++ b/spark/test/Dockerfile @@ -0,0 +1,30 @@ +FROM alpine:3.8 + +MAINTAINER Newnius + +USER root + +# Prerequisites +RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps + +ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk + +ENV PATH $PATH:$JAVA_HOME/bin + +ENV SPARK_VER 2.2.1 + +RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_ENV/spark-$SPARK_ENV-bin-without-hadoop.tgz && \ + tar -xvf spark-$SPARK_ENV-bin-without-hadoop.tgz -C /usr/local && \ + rm spark-$SPARK_ENV-bin-without-hadoop.tgz + +RUN ln -s /usr/local/spark-$SPARK_ENV-bin-without-hadoop /usr/local/spark + +ENV SPARK_HOME /usr/local/spark + +ENV PATH $PATH:$SPARK_HOME/bin + +ADD bootstrap.sh /etc/bootstrap.sh + +WORKDIR /usr/local/spark + +ENTRYPOINT ["/etc/bootstrap.sh"] diff --git a/spark/test/README.md b/spark/test/README.md new file mode 100644 index 0000000..d8c6eeb --- /dev/null +++ b/spark/test/README.md @@ -0,0 +1,93 @@ +# Spark on yarn + +## Create a spark cluster in swarm mode + +`--hostname` needs 1.13 or higher + +```bash +docker service create \ +--name spark-master \ +--hostname spark-master \ +--detach true \ +--network swarm-net \ +--replicas 1 \ +--endpoint-mode dnsrr \ +newnius/spark:2.2.1 +``` + +```bash +docker service create \ +--name spark-slave1 \ +--hostname spark-slave1 \ +--detach true \ +--network swarm-net \ +--replicas 1 \ +--endpoint-mode dnsrr \ +newnius/spark:2.2.1 +``` + +```bash +docker service create \ +--name spark-slave2 \ +--hostname spark-slave2 \ +--detach true \ +--network swarm-net \ +--replicas 1 \ +--endpoint-mode dnsrr \ +newnius/spark:2.2.1 +``` + +## Init && Test + +In the first deploy, format hdfs + +### Stop HDFS (in master) +```bash +sbin/stop-dfs.sh +``` + +### Format HDFS (in master) +``` +bin/hadoop namenode -format +``` + +### Start HDFS (in master) +```bash +sbin/start-dfs.sh +``` + +### Run Hello World +```bash +spark-submit \ + --master yarn \ + --deploy-mode cluster \ + --class org.apache.spark.examples.JavaSparkPi \ + ./examples/jars/spark-examples_2.11-2.2.1.jar 100 +``` + +### UI + +YARN: spark-master:8088 + +HDFS: spark-master:50070 + +_Proxy needed, e.g. [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/)_ + +## customized config + +```bash +docker service create \ +--name spark-master \ +--hostname spark-master \ +--detach=true \ +--network swarm-net \ +--replicas 1 \ +--mount type=bind,source=/mnt/data/spark/hdfs/master,target=/tmp/hadoop-root \ +--mount type=bind,source=/mnt/data/spark/logs/master,target=/usr/local/hadoop/logs \ +--mount type=bind,source=/mnt/data/spark/config/hadoop,target=/mnt/config/hadoop \ +--mount type=bind,source=/mnt/data/spark/config/spark,target=/mnt/config/spark \ +--endpoint-mode dnsrr \ +newnius/spark:2.2.1 +``` + +You dont't need to put all files in dir, only add files to be replaced. diff --git a/spark/test/bootstrap.sh b/spark/test/bootstrap.sh new file mode 100755 index 0000000..3bd71eb --- /dev/null +++ b/spark/test/bootstrap.sh @@ -0,0 +1,11 @@ +#! /bin/bash + +if [[ $1 == "master" ]]; then + ./sbin/start_master.sh +fi + +if [[ $1 == "slave" ]]; then + ./sbin/start_slave.sh spark://$2:7077 +fi + +while true; do sleep 1000; done