Skip to content

Commit 0ee44c2

Browse files
committed
Spark standalone mode cluster scripts.
Heavily inspired by Hadoop cluster scripts ;-)
1 parent 545165e commit 0ee44c2

18 files changed

+825
-4
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ project/build/target/
2626
project/plugins/target/
2727
project/plugins/lib_managed/
2828
project/plugins/src_managed/
29+
logs/

bin/slaves.sh

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env bash
2+
3+
# Run a shell command on all slave hosts.
4+
#
5+
# Environment Variables
6+
#
7+
# SPARK_SLAVES File naming remote hosts.
8+
# Default is ${SPARK_CONF_DIR}/slaves.
9+
# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_HOME}/conf.
10+
# SPARK_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
11+
# SPARK_SSH_OPTS Options passed to ssh when running remote commands.
12+
##
13+
14+
usage="Usage: slaves.sh [--config confdir] command..."
15+
16+
# if no args specified, show usage
17+
if [ $# -le 0 ]; then
18+
echo $usage
19+
exit 1
20+
fi
21+
22+
bin=`dirname "$0"`
23+
bin=`cd "$bin"; pwd`
24+
25+
. "$bin/spark-config.sh"
26+
27+
# If the slaves file is specified in the command line,
28+
# then it takes precedence over the definition in
29+
# spark-env.sh. Save it here.
30+
HOSTLIST=$SPARK_SLAVES
31+
32+
if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
33+
. "${SPARK_CONF_DIR}/spark-env.sh"
34+
fi
35+
36+
if [ "$HOSTLIST" = "" ]; then
37+
if [ "$SPARK_SLAVES" = "" ]; then
38+
export HOSTLIST="${SPARK_CONF_DIR}/slaves"
39+
else
40+
export HOSTLIST="${SPARK_SLAVES}"
41+
fi
42+
fi
43+
44+
echo $"${@// /\\ }"
45+
46+
# By default disable strict host key checking
47+
if [ "$SPARK_SSH_OPTS" = "" ]; then
48+
SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
49+
fi
50+
51+
for slave in `cat "$HOSTLIST"|sed "s/#.*$//;/^$/d"`; do
52+
ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \
53+
2>&1 | sed "s/^/$slave: /" &
54+
if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
55+
sleep $SPARK_SLAVE_SLEEP
56+
fi
57+
done
58+
59+
wait

bin/spark-config.sh

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# included in all the spark scripts with source command
2+
# should not be executable directly
3+
# also should not be passed any arguments, since we need original $*
4+
5+
# resolve links - $0 may be a softlink
6+
this="${BASH_SOURCE-$0}"
7+
common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
8+
script="$(basename -- "$this")"
9+
this="$common_bin/$script"
10+
11+
# convert relative path to absolute path
12+
config_bin=`dirname "$this"`
13+
script=`basename "$this"`
14+
config_bin=`cd "$config_bin"; pwd`
15+
this="$config_bin/$script"
16+
17+
export SPARK_PREFIX=`dirname "$this"`/..
18+
export SPARK_HOME=${SPARK_PREFIX}
19+
export SPARK_CONF_DIR="$SPARK_HOME/conf"

bin/spark-daemon.sh

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#!/usr/bin/env bash
2+
3+
# Runs a Spark command as a daemon.
4+
#
5+
# Environment Variables
6+
#
7+
# SPARK_CONF_DIR Alternate conf dir. Default is ${SPARK_PREFIX}/conf.
8+
# SPARK_LOG_DIR Where log files are stored. PWD by default.
9+
# SPARK_MASTER host:path where spark code should be rsync'd from
10+
# SPARK_PID_DIR The pid files are stored. /tmp by default.
11+
# SPARK_IDENT_STRING A string representing this instance of spark. $USER by default
12+
# SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
13+
##
14+
15+
usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <args...>"
16+
17+
# if no args specified, show usage
18+
if [ $# -le 1 ]; then
19+
echo $usage
20+
exit 1
21+
fi
22+
23+
bin=`dirname "$0"`
24+
bin=`cd "$bin"; pwd`
25+
26+
. "$bin/spark-config.sh"
27+
28+
# get arguments
29+
startStop=$1
30+
shift
31+
command=$1
32+
shift
33+
34+
spark_rotate_log ()
35+
{
36+
log=$1;
37+
num=5;
38+
if [ -n "$2" ]; then
39+
num=$2
40+
fi
41+
if [ -f "$log" ]; then # rotate logs
42+
while [ $num -gt 1 ]; do
43+
prev=`expr $num - 1`
44+
[ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
45+
num=$prev
46+
done
47+
mv "$log" "$log.$num";
48+
fi
49+
}
50+
51+
if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
52+
. "${SPARK_CONF_DIR}/spark-env.sh"
53+
fi
54+
55+
if [ "$SPARK_IDENT_STRING" = "" ]; then
56+
export SPARK_IDENT_STRING="$USER"
57+
fi
58+
59+
# get log directory
60+
if [ "$SPARK_LOG_DIR" = "" ]; then
61+
export SPARK_LOG_DIR="$SPARK_HOME/logs"
62+
fi
63+
mkdir -p "$SPARK_LOG_DIR"
64+
touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1
65+
TEST_LOG_DIR=$?
66+
if [ "${TEST_LOG_DIR}" = "0" ]; then
67+
rm -f $SPARK_LOG_DIR/.spark_test
68+
else
69+
chown $SPARK_IDENT_STRING $SPARK_LOG_DIR
70+
fi
71+
72+
if [ "$SPARK_PID_DIR" = "" ]; then
73+
SPARK_PID_DIR=/tmp
74+
fi
75+
76+
# some variables
77+
export SPARK_LOGFILE=spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.log
78+
export SPARK_ROOT_LOGGER="INFO,DRFA"
79+
log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.out
80+
pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command.pid
81+
82+
# Set default scheduling priority
83+
if [ "$SPARK_NICENESS" = "" ]; then
84+
export SPARK_NICENESS=0
85+
fi
86+
87+
88+
case $startStop in
89+
90+
(start)
91+
92+
mkdir -p "$SPARK_PID_DIR"
93+
94+
if [ -f $pid ]; then
95+
if kill -0 `cat $pid` > /dev/null 2>&1; then
96+
echo $command running as process `cat $pid`. Stop it first.
97+
exit 1
98+
fi
99+
fi
100+
101+
if [ "$SPARK_MASTER" != "" ]; then
102+
echo rsync from $SPARK_MASTER
103+
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
104+
fi
105+
106+
spark_rotate_log $log
107+
echo starting $command, logging to $log
108+
cd "$SPARK_PREFIX"
109+
nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null &
110+
echo $! > $pid
111+
sleep 1; head "$log"
112+
;;
113+
114+
(stop)
115+
116+
if [ -f $pid ]; then
117+
if kill -0 `cat $pid` > /dev/null 2>&1; then
118+
echo stopping $command
119+
kill `cat $pid`
120+
else
121+
echo no $command to stop
122+
fi
123+
else
124+
echo no $command to stop
125+
fi
126+
;;
127+
128+
(*)
129+
echo $usage
130+
exit 1
131+
;;
132+
133+
esac
134+
135+

bin/spark-daemons.sh

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
3+
# Run a Spark command on all slave hosts.
4+
5+
usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
6+
7+
# if no args specified, show usage
8+
if [ $# -le 1 ]; then
9+
echo $usage
10+
exit 1
11+
fi
12+
13+
bin=`dirname "$0"`
14+
bin=`cd "$bin"; pwd`
15+
16+
. "$bin/spark-config.sh"
17+
18+
exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/spark-daemon.sh" "$@"

bin/start-all.sh

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
# Start all spark daemons.
4+
# Starts the master on this node.
5+
# Starts a worker on each node specified in conf/slaves
6+
7+
bin=`dirname "$0"`
8+
bin=`cd "$bin"; pwd`
9+
10+
# Load the Spark configuration
11+
. "$bin/spark-config.sh"
12+
13+
# Start Master
14+
"$bin"/start-master.sh --config $SPARK_CONF_DIR
15+
16+
# Start Workers
17+
"$bin"/start-slaves.sh --config $SPARK_CONF_DIR

bin/start-master.sh

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
3+
# Starts the master on the machine this script is executed on.
4+
5+
bin=`dirname "$0"`
6+
bin=`cd "$bin"; pwd`
7+
8+
. "$bin/spark-config.sh"
9+
10+
"$bin"/spark-daemon.sh start spark.deploy.master.Master

bin/start-slaves.sh

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/usr/bin/env bash
2+
3+
bin=`dirname "$0"`
4+
bin=`cd "$bin"; pwd`
5+
6+
. "$bin/spark-config.sh"
7+
8+
# Find the port number for the master
9+
if [ "$SPARK_MASTER_PORT" = "" ]; then
10+
SPARK_MASTER_PORT=7077
11+
fi
12+
13+
hostname=`hostname`
14+
ip=`host "$hostname" | cut -d " " -f 4`
15+
16+
"$bin"/spark-daemons.sh start spark.deploy.worker.Worker spark://$ip:$SPARK_MASTER_PORT

bin/stop-all.sh

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env bash
2+
3+
# Start all spark daemons.
4+
# Run this on the master nde
5+
6+
bin=`dirname "$0"`
7+
bin=`cd "$bin"; pwd`
8+
9+
# Load the Spark configuration
10+
. "$bin/spark-config.sh"
11+
12+
# Stop the slaves, then the master
13+
"$bin"/stop-slaves.sh
14+
"$bin"/stop-master.sh

bin/stop-master.sh

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
3+
# Starts the master on the machine this script is executed on.
4+
5+
bin=`dirname "$0"`
6+
bin=`cd "$bin"; pwd`
7+
8+
. "$bin/spark-config.sh"
9+
10+
"$bin"/spark-daemon.sh stop spark.deploy.worker.Worker

bin/stop-slaves.sh

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
3+
# Starts the master on the machine this script is executed on.
4+
5+
bin=`dirname "$0"`
6+
bin=`cd "$bin"; pwd`
7+
8+
. "$bin/spark-config.sh"
9+
10+
"$bin"/spark-daemon.sh stop spark.deploy.master.Master

conf/slaves

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# A Spark Worker will be started on each of the machines listes below.
2+
localhost

conf/spark-env.sh.template

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@
99
# - SPARK_MEM, to change the amount of memory used per node (this should
1010
# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
1111
# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
12-
12+
# - SPARK_MASTER_PORT, to start the spark master on a different port (standalone mode only)
1313

core/src/main/scala/spark/deploy/master/Master.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class Master(ip: String, port: Int, webUiPort: Int) extends Actor with Logging {
4242
def startWebUi() {
4343
val webUi = new MasterWebUI(context.system, self)
4444
try {
45-
AkkaUtils.startSprayServer(context.system, ip, webUiPort, webUi.handler)
45+
AkkaUtils.startSprayServer(context.system, "0.0.0.0", webUiPort, webUi.handler)
4646
} catch {
4747
case e: Exception =>
4848
logError("Failed to create web UI", e)

core/src/main/scala/spark/deploy/worker/Worker.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class Worker(ip: String, port: Int, webUiPort: Int, cores: Int, memory: Int, mas
8686
def startWebUi() {
8787
val webUi = new WorkerWebUI(context.system, self)
8888
try {
89-
AkkaUtils.startSprayServer(context.system, ip, webUiPort, webUi.handler)
89+
AkkaUtils.startSprayServer(context.system, "0.0.0.0", webUiPort, webUi.handler)
9090
} catch {
9191
case e: Exception =>
9292
logError("Failed to create web UI", e)

ec2/spark-ec2-standalone

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/sh
2+
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
19+
cd "`dirname $0`"
20+
PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2_standalone.py $@

0 commit comments

Comments
 (0)