diff --git a/spark/1.6.0/Dockerfile b/spark/1.6.0/Dockerfile deleted file mode 100644 index f01b9bd..0000000 --- a/spark/1.6.0/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -FROM alpine:3.8 - -MAINTAINER Newnius - -USER root - -# Prerequisites -RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils - -ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk - -ENV PATH $PATH:$JAVA_HOME/bin - -ENV SPARK_VER 1.6.0 - -RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.6.tgz && \ - tar -xvf spark-$SPARK_VER-bin-hadoop2.6.tgz -C /usr/local && \ - rm spark-$SPARK_VER-bin-hadoop2.6.tgz - -RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.6 /usr/local/spark - -ENV SPARK_HOME /usr/local/spark - -ENV PATH $PATH:$SPARK_HOME/bin - -ADD bootstrap.sh /etc/bootstrap.sh - -WORKDIR /usr/local/spark - -ENTRYPOINT ["/etc/bootstrap.sh"] diff --git a/spark/1.6.0/README.md b/spark/1.6.0/README.md deleted file mode 100644 index 2dd9442..0000000 --- a/spark/1.6.0/README.md +++ /dev/null @@ -1,64 +0,0 @@ -# Deploy Spark Cluster of standalone mode - -## Master - -```bash -docker service create \ - --name spark-master \ - --hostname spark-master \ - --network swarm-net \ - --replicas 1 \ - --detach true \ - --endpoint-mode dnsrr \ - newnius/spark:1.6.0 master -``` - -## Slaves - -```bash -docker service create \ - --name spark-slave \ - --network swarm-net \ - --replicas 5 \ - --detach true \ - --endpoint-mode dnsrr \ - newnius/spark:1.6.0 slave spark://spark-master:7077 -``` - -## Validate installation - -#### spark-submit PI - -```bash -spark-submit \ - --master spark://spark-master:7077 \ - --deploy-mode cluster \ - --class org.apache.spark.examples.JavaSparkPi \ - ./examples/jars/spark-examples_2.11-1.6.0.jar 100 -``` - -#### spark-shell HDFS wordcount - -Enter `spark-shell --master spark://spark-master:7077` to enter shell. - -```shell -val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input") - -val words = lines.flatMap(_.split("\\s+")) - -val wc = words.map(word => (word, 1)).reduceByKey(_ + _) - -wc.collect() - -val cnt = words.map(word => 1).reduce(_ + _) -``` - -## Browse the web UI - -You can expose the ports in the script, but I'd rather not since the slaves shoule occupy the same ports. - -To access the web UI, deploy another (socks5) proxy to route the traffic. - -If you don't one, try [newnius/docker-proxy](https://hub.docker.com/r/newnius/docker-proxy/), it is rather easy to use. - -Visit [spark-master:8080](http://spark-master:8080) to view the cluster. diff --git a/spark/1.6.0/bootstrap.sh b/spark/1.6.0/bootstrap.sh deleted file mode 100755 index d1b8b0b..0000000 --- a/spark/1.6.0/bootstrap.sh +++ /dev/null @@ -1,11 +0,0 @@ -#! /bin/bash - -if [[ $1 == "master" ]]; then - /usr/local/spark/sbin/start-master.sh -fi - -if [[ $1 == "slave" ]]; then - /usr/local/spark/sbin/start-slave.sh $2 -fi - -while true; do sleep 1000; done