mirror of
https://github.com/newnius/Dockerfiles.git
synced 2025-12-15 18:36:44 +00:00
add spark
This commit is contained in:
29
spark/1.6.0/config/hadoop/core-site.xml
Executable file
29
spark/1.6.0/config/hadoop/core-site.xml
Executable file
@@ -0,0 +1,29 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>hdfs://spark-master:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://spark-master:8020</value>
|
||||
</property>
|
||||
</configuration>
|
||||
46
spark/1.6.0/config/hadoop/hdfs-site.xml
Executable file
46
spark/1.6.0/config/hadoop/hdfs-site.xml
Executable file
@@ -0,0 +1,46 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>dfs.permissions</name>
|
||||
<value>false</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.namenode.secondary.http-address</name>
|
||||
<value>spark-slave1:50090</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.namenode.http-address</name>
|
||||
<value>spark-master:50070</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.datanode.max.transfer.threads</name>
|
||||
<value>8192</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.replication</name>
|
||||
<value>3</value>
|
||||
</property>
|
||||
|
||||
|
||||
</configuration>
|
||||
26
spark/1.6.0/config/hadoop/mapred-site.xml
Executable file
26
spark/1.6.0/config/hadoop/mapred-site.xml
Executable file
@@ -0,0 +1,26 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
<!-- Put site-specific property overrides in this file. -->
|
||||
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>mapreduce.framework.name</name>
|
||||
<value>yarn</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
2
spark/1.6.0/config/hadoop/slaves
Executable file
2
spark/1.6.0/config/hadoop/slaves
Executable file
@@ -0,0 +1,2 @@
|
||||
spark-slave1
|
||||
spark-slave2
|
||||
57
spark/1.6.0/config/hadoop/yarn-site.xml
Executable file
57
spark/1.6.0/config/hadoop/yarn-site.xml
Executable file
@@ -0,0 +1,57 @@
|
||||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<!-- Site specific YARN configuration properties -->
|
||||
<configuration>
|
||||
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*</value>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>yarn.resourcemanager.hostname</name>
|
||||
<value>spark-master</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.nodemanager.aux-services</name>
|
||||
<value>mapreduce_shuffle</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.log-aggregation-enable</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>yarn.log-aggregation.retain-seconds</name>
|
||||
<value>604800</value>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.memory-mb</name>
|
||||
<value>2048</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.nodemanager.resource.cpu-vcores</name>
|
||||
<value>2</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.scheduler.minimum-allocation-mb</name>
|
||||
<value>1024</value>
|
||||
</property>
|
||||
</configuration>
|
||||
12
spark/1.6.0/config/spark-yarn-remote-client/core-site.xml
Executable file
12
spark/1.6.0/config/spark-yarn-remote-client/core-site.xml
Executable file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0"?>
|
||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||
<configuration>
|
||||
<property>
|
||||
<name>fs.default.name</name>
|
||||
<value>hdfs://spark-master:8020</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>dfs.client.use.legacy.blockreader</name>
|
||||
<value>true</value>
|
||||
</property>
|
||||
</configuration>
|
||||
26
spark/1.6.0/config/spark-yarn-remote-client/yarn-site.xml
Executable file
26
spark/1.6.0/config/spark-yarn-remote-client/yarn-site.xml
Executable file
@@ -0,0 +1,26 @@
|
||||
<configuration>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.scheduler.address</name>
|
||||
<value>spark-master:8030</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.address</name>
|
||||
<value>spark-master:8032</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.webapp.address</name>
|
||||
<value>spark-master:8088</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.resource-tracker.address</name>
|
||||
<value>spark-master:8031</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.resourcemanager.admin.address</name>
|
||||
<value>spark-master:8033</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>yarn.application.classpath</name>
|
||||
<value>/usr/local/hadoop/etc/hadoop, /usr/local/hadoop/share/hadoop/common/*, /usr/local/hadoop/share/hadoop/common/lib/*, /usr/local/hadoop/share/hadoop/hdfs/*, /usr/local/hadoop/share/hadoop/hdfs/lib/*, /usr/local/hadoop/share/hadoop/mapreduce/*, /usr/local/hadoop/share/hadoop/mapreduce/lib/*, /usr/local/hadoop/share/hadoop/yarn/*, /usr/local/hadoop/share/hadoop/yarn/lib/*, /usr/local/hadoop/share/spark/*</value>
|
||||
</property>
|
||||
</configuration>
|
||||
3
spark/1.6.0/config/spark/slaves
Executable file
3
spark/1.6.0/config/spark/slaves
Executable file
@@ -0,0 +1,3 @@
|
||||
# A Spark Worker will be started on each of the machines listed below.
|
||||
spark-slave1
|
||||
spark-slave2
|
||||
10
spark/1.6.0/config/spark/spark-defaults.conf
Normal file
10
spark/1.6.0/config/spark/spark-defaults.conf
Normal file
@@ -0,0 +1,10 @@
|
||||
# Default system properties included when running spark-submit.
|
||||
# This is useful for setting default environmental settings.
|
||||
|
||||
# Example:
|
||||
spark.master spark://spark-master:7077
|
||||
spark.eventLog.enabled true
|
||||
spark.eventLog.dir hdfs://spark-master:/var/log/spark
|
||||
spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||
# spark.driver.memory 5g
|
||||
# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
|
||||
52
spark/1.6.0/config/spark/spark-env.sh
Executable file
52
spark/1.6.0/config/spark/spark-env.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This file is sourced when running various Spark programs.
|
||||
# Copy it as spark-env.sh and edit that to configure Spark for your site.
|
||||
|
||||
# Options read when launching programs locally with
|
||||
# ./bin/run-example or ./bin/spark-submit
|
||||
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
|
||||
# - SPARK_CLASSPATH, default classpath entries to append
|
||||
|
||||
# Options read by executors and drivers running inside the cluster
|
||||
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
|
||||
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
|
||||
# - SPARK_CLASSPATH, default classpath entries to append
|
||||
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
|
||||
# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
|
||||
|
||||
# Options read in YARN client mode
|
||||
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
|
||||
# - SPARK_EXECUTOR_INSTANCES, Number of workers to start (Default: 2)
|
||||
# - SPARK_EXECUTOR_CORES, Number of cores for the workers (Default: 1).
|
||||
# - SPARK_EXECUTOR_MEMORY, Memory per Worker (e.g. 1000M, 2G) (Default: 1G)
|
||||
# - SPARK_DRIVER_MEMORY, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb)
|
||||
# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
|
||||
# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests (Default: ‘default’)
|
||||
# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed with the job.
|
||||
# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be distributed with the job.
|
||||
|
||||
# Options for the daemons used in the standalone deploy mode:
|
||||
# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
|
||||
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports for the master
|
||||
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g. "-Dx=y")
|
||||
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
|
||||
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
|
||||
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
|
||||
# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
|
||||
# - SPARK_WORKER_DIR, to set the working directory of worker processes
|
||||
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
|
||||
# - SPARK_HISTORY_OPTS, to set config properties only for the history server (e.g. "-Dx=y")
|
||||
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g. "-Dx=y")
|
||||
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers
|
||||
|
||||
export JAVA_HOME=/usr/java/default
|
||||
export SPARK_MASTER_IP=spark-master
|
||||
export SPARK_WORKER_CORES=1
|
||||
export SPARK_WORKER_INSTANCES=1
|
||||
export SPARK_MORKER_PORT=7077
|
||||
export SPARK_WORKER_MEMORY=2g
|
||||
export MASTER=spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
|
||||
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.retainedApplications=3 -Dspark.history.fs.logDirectory=hdfs://spark-master:/var/log/spark"
|
||||
Reference in New Issue
Block a user