update spark

This commit is contained in:
2018-08-08 11:25:11 +08:00
parent 8750cbd8c2
commit 72da02749e
3 changed files with 95 additions and 0 deletions

34
spark/test/Dockerfile Normal file
View File

@@ -0,0 +1,34 @@
FROM alpine:3.8
MAINTAINER Newnius <newnius.cn@gmail.com>
USER root
# Prerequisites
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
ENV PATH $PATH:$JAVA_HOME/bin
ENV SPARK_VER 2.2.1
RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \
tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \
rm spark-$SPARK_VER-bin-hadoop2.7.tgz
RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark
ENV SPARK_HOME /usr/local/spark
ADD config/* /usr/local/hadoop/etc/hadoop
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
ENV PATH $PATH:$SPARK_HOME/bin
ADD bootstrap.sh /etc/bootstrap.sh
WORKDIR /usr/local/spark
CMD ["/etc/bootstrap.sh", "-d"]

45
spark/test/README.md Normal file
View File

@@ -0,0 +1,45 @@
# Deploy Spark On Yarn
## Client
```bash
docker service create \
--name spark-client \
--hostname spark-client \
--network swarm-net \
--replicas 1 \
--detach true \
newnius/spark:2.2.1-yarn
```
## Validate installation
#### spark-submit PI
```bash
spark-submit \
--master yarn \
--deploy-mode cluster \
--class org.apache.spark.examples.JavaSparkPi \
./examples/jars/spark-examples*.jar 100
```
#### spark-shell HDFS wordcount
Enter `spark-shell --master yarn` to enter shell.
```shell
val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input")
val words = lines.flatMap(_.split("\\s+"))
val wc = words.map(word => (word, 1)).reduceByKey(_ + _)
wc.collect()
val cnt = words.map(word => 1).reduce(_ + _)
```
## Browse the web UI
In Spark On Yarn mode, the spark jobs will occur in the Yarn web UI.

16
spark/test/bootstrap.sh Executable file
View File

@@ -0,0 +1,16 @@
#! /bin/bash
## replace config
: ${EXTRA_CONF_DIR:=/config/hadoop}
if [ -d "$EXTRA_CONF_DIR" ]; then
cp $EXTRA_CONF_DIR/* /usr/local/hadoop/etc/hadoop/
fi
if [[ $1 == "-d" ]]; then
while true; do sleep 1000; done
fi
if [[ $1 == "-bash" ]]; then
/bin/bash
fi