mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-12-13 01:36:43 +00:00
update spark
This commit is contained in:
34
spark/test/Dockerfile
Normal file
34
spark/test/Dockerfile
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
FROM alpine:3.8
|
||||||
|
|
||||||
|
MAINTAINER Newnius <newnius.cn@gmail.com>
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Prerequisites
|
||||||
|
RUN apk add --no-cache openssh openssl openjdk8-jre rsync bash procps coreutils
|
||||||
|
|
||||||
|
ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
|
||||||
|
|
||||||
|
ENV PATH $PATH:$JAVA_HOME/bin
|
||||||
|
|
||||||
|
ENV SPARK_VER 2.2.1
|
||||||
|
|
||||||
|
RUN wget https://archive.apache.org/dist/spark/spark-$SPARK_VER/spark-$SPARK_VER-bin-hadoop2.7.tgz && \
|
||||||
|
tar -xvf spark-$SPARK_VER-bin-hadoop2.7.tgz -C /usr/local && \
|
||||||
|
rm spark-$SPARK_VER-bin-hadoop2.7.tgz
|
||||||
|
|
||||||
|
RUN ln -s /usr/local/spark-$SPARK_VER-bin-hadoop2.7 /usr/local/spark
|
||||||
|
|
||||||
|
ENV SPARK_HOME /usr/local/spark
|
||||||
|
|
||||||
|
ADD config/* /usr/local/hadoop/etc/hadoop
|
||||||
|
|
||||||
|
ENV HADOOP_CONF_DIR /usr/local/hadoop/etc/hadoop
|
||||||
|
|
||||||
|
ENV PATH $PATH:$SPARK_HOME/bin
|
||||||
|
|
||||||
|
ADD bootstrap.sh /etc/bootstrap.sh
|
||||||
|
|
||||||
|
WORKDIR /usr/local/spark
|
||||||
|
|
||||||
|
CMD ["/etc/bootstrap.sh", "-d"]
|
||||||
45
spark/test/README.md
Normal file
45
spark/test/README.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Deploy Spark On Yarn
|
||||||
|
|
||||||
|
## Client
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker service create \
|
||||||
|
--name spark-client \
|
||||||
|
--hostname spark-client \
|
||||||
|
--network swarm-net \
|
||||||
|
--replicas 1 \
|
||||||
|
--detach true \
|
||||||
|
newnius/spark:2.2.1-yarn
|
||||||
|
```
|
||||||
|
|
||||||
|
## Validate installation
|
||||||
|
|
||||||
|
#### spark-submit PI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
spark-submit \
|
||||||
|
--master yarn \
|
||||||
|
--deploy-mode cluster \
|
||||||
|
--class org.apache.spark.examples.JavaSparkPi \
|
||||||
|
./examples/jars/spark-examples*.jar 100
|
||||||
|
```
|
||||||
|
|
||||||
|
#### spark-shell HDFS wordcount
|
||||||
|
|
||||||
|
Enter `spark-shell --master yarn` to enter shell.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
val lines = sc.textFile("hdfs://hadoop-master:8020/user/root/input")
|
||||||
|
|
||||||
|
val words = lines.flatMap(_.split("\\s+"))
|
||||||
|
|
||||||
|
val wc = words.map(word => (word, 1)).reduceByKey(_ + _)
|
||||||
|
|
||||||
|
wc.collect()
|
||||||
|
|
||||||
|
val cnt = words.map(word => 1).reduce(_ + _)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Browse the web UI
|
||||||
|
|
||||||
|
In Spark On Yarn mode, the spark jobs will occur in the Yarn web UI.
|
||||||
16
spark/test/bootstrap.sh
Executable file
16
spark/test/bootstrap.sh
Executable file
@@ -0,0 +1,16 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
## replace config
|
||||||
|
: ${EXTRA_CONF_DIR:=/config/hadoop}
|
||||||
|
|
||||||
|
if [ -d "$EXTRA_CONF_DIR" ]; then
|
||||||
|
cp $EXTRA_CONF_DIR/* /usr/local/hadoop/etc/hadoop/
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $1 == "-d" ]]; then
|
||||||
|
while true; do sleep 1000; done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $1 == "-bash" ]]; then
|
||||||
|
/bin/bash
|
||||||
|
fi
|
||||||
Reference in New Issue
Block a user