From 72da02749e897cb300874bbe8dd95fceff27ec6f Mon Sep 17 00:00:00 2001 From: Newnius Date: Wed, 8 Aug 2018 11:25:11 +0800 Subject: [PATCH] update spark --- spark/test/Dockerfile | 34 +++++++++++++++++++++++++++++++ spark/test/README.md | 45 +++++++++++++++++++++++++++++++++++++++++ spark/test/bootstrap.sh | 16 +++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 spark/test/Dockerfile create mode 100644 spark/test/README.md create mode 100755 spark/test/bootstrap.sh diff --git a/spark/test/Dockerfile b/spark/test/Dockerfile new file mode 100644 index 0000000..ff74bd3 --- /dev/null +++ b/spark/test/Dockerfile @@ -0,0 +1,34 @@ +FROM alpine:3.8 + +MAINTAINER Newnius + +USER root + +# Prerequisites +RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils + +ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk + +ENV PATH $PATH:$JAVA_HOME/bin + +ENV SPARK_VER 2.2.1 + +RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \ + tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \ + rm spark-$SPARK_VER-bin-hadoop2.7.tgz + +RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark + +ENV SPARK_HOME /usr/local/spark + +ADD config/* /usr/local/hadoop/etc/hadoop + +ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop + +ENV PATH $PATH:$SPARK_HOME/bin + +ADD bootstrap.sh /etc/bootstrap.sh + +WORKDIR /usr/local/spark + +CMD ["/etc/bootstrap.sh", "-d"] diff --git a/spark/test/README.md b/spark/test/README.md new file mode 100644 index 0000000..b207c1f --- /dev/null +++ b/spark/test/README.md @@ -0,0 +1,45 @@ +# Deploy Spark On Yarn + +## Client + +```bash +docker service create \ + --name spark-client \ + --hostname spark-client \ + --network swarm-net \ + --replicas 1 \ + --detach true \ + newnius/spark:2.2.1-yarn +``` + +## Validate installation + +#### spark-submit PI + +```bash +spark-submit \ + --master yarn \ + --deploy-mode cluster \ + --class org.apache.spark.examples.JavaSparkPi \ + ./examples/jars/spark-examples*.jar 100 +``` + +#### spark-shell HDFS wordcount + +Enter `spark-shell --master yarn` to enter shell. + +```shell +val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input") + +val words = lines.flatMap(_.split("\\s+")) + +val wc = words.map(word => (word, 1)).reduceByKey(_ + _) + +wc.collect() + +val cnt = words.map(word => 1).reduce(_ + _) +``` + +## Browse the web UI + +In Spark On Yarn mode, the spark jobs will occur in the Yarn web UI. diff --git a/spark/test/bootstrap.sh b/spark/test/bootstrap.sh new file mode 100755 index 0000000..aaaf7b2 --- /dev/null +++ b/spark/test/bootstrap.sh @@ -0,0 +1,16 @@ +#! /bin/bash + +## replace config +: ${EXTRA_CONF_DIR:=/config/hadoop} + +if [ -d "$EXTRA_CONF_DIR" ]; then + cp $EXTRA_CONF_DIR/* /usr/local/hadoop/etc/hadoop/ +fi + +if [[ $1 == "-d" ]]; then + while true; do sleep 1000; done +fi + +if [[ $1 == "-bash" ]]; then + /bin/bash +fi