Spark standalone mode cluster scripts.

Heavily inspired by Hadoop cluster scripts ;-)
figo · Aug 2, 2012 · 0ee44c2 · 0ee44c2
1 parent 545165e
commit 0ee44c2
Show file tree

Hide file tree

Showing 18 changed files with 825 additions and 4 deletions.
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,4 @@ project/build/target/
 project/plugins/target/
 project/plugins/lib_managed/
 project/plugins/src_managed/
+logs/
diff --git a/bin/slaves.sh b/bin/slaves.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# Run a shell command on all slave hosts.
+#
+# Environment Variables
+#
+#   SPARK_SLAVES    File naming remote hosts.
+#     Default is ${SPARK_CONF_DIR}/slaves.
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   SPARK_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+usage="Usage: slaves.sh [--config confdir] command..."
+
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+# If the slaves file is specified in the command line,
+# then it takes precedence over the definition in 
+# spark-env.sh. Save it here.
+HOSTLIST=$SPARK_SLAVES
+
+if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+  . "${SPARK_CONF_DIR}/spark-env.sh"
+fi
+
+if [ "$HOSTLIST" = "" ]; then
+  if [ "$SPARK_SLAVES" = "" ]; then
+    export HOSTLIST="${SPARK_CONF_DIR}/slaves"
+  else
+    export HOSTLIST="${SPARK_SLAVES}"
+  fi
+fi
+
+echo $"${@// /\\ }"
+
+# By default disable strict host key checking
+if [ "$SPARK_SSH_OPTS" = "" ]; then
+  SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"
+fi
+
+for slave in `cat "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do
+ ssh $SPARK_SSH_OPTS $slave $"${@// /\\ }" \
+   2>&1 | sed "s/^/$slave: /" &
+ if [ "$SPARK_SLAVE_SLEEP" != "" ]; then
+   sleep $SPARK_SLAVE_SLEEP
+ fi
+done
+
+wait
diff --git a/bin/spark-config.sh b/bin/spark-config.sh
@@ -0,0 +1,19 @@
+# included in all the spark scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# resolve links - $0 may be a softlink
+this="${BASH_SOURCE-$0}"
+common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+script="$(basename -- "$this")"
+this="$common_bin/$script"
+
+# convert relative path to absolute path
+config_bin=`dirname "$this"`
+script=`basename "$this"`
+config_bin=`cd "$config_bin"; pwd`
+this="$config_bin/$script"
+
+export SPARK_PREFIX=`dirname "$this"`/..
+export SPARK_HOME=${SPARK_PREFIX}
+export SPARK_CONF_DIR="$SPARK_HOME/conf"
diff --git a/bin/spark-daemon.sh b/bin/spark-daemon.sh
@@ -0,0 +1,135 @@
+#!/usr/bin/env bash
+
+# Runs a Spark command as a daemon.
+#
+# Environment Variables
+#
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_PREFIX}/conf.
+#   SPARK_LOG_DIR   Where log files are stored.  PWD by default.
+#   SPARK_MASTER    host:path where spark code should be rsync'd from
+#   SPARK_PID_DIR   The pid files are stored. /tmp by default.
+#   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
+#   SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
+##
+
+usage="Usage: spark-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] (start|stop) <spark-command> <args...>"
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+# get arguments
+startStop=$1
+shift
+command=$1
+shift
+
+spark_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then
+  . "${SPARK_CONF_DIR}/spark-env.sh"
+fi
+
+if [ "$SPARK_IDENT_STRING" = "" ]; then
+  export SPARK_IDENT_STRING="$USER"
+fi
+
+# get log directory
+if [ "$SPARK_LOG_DIR" = "" ]; then
+  export SPARK_LOG_DIR="$SPARK_HOME/logs"
+fi
+mkdir -p "$SPARK_LOG_DIR"
+touch $SPARK_LOG_DIR/.spark_test > /dev/null 2>&1
+TEST_LOG_DIR=$?
+if [ "${TEST_LOG_DIR}" = "0" ]; then
+  rm -f $SPARK_LOG_DIR/.spark_test
+else
+  chown $SPARK_IDENT_STRING $SPARK_LOG_DIR 
+fi
+
+if [ "$SPARK_PID_DIR" = "" ]; then
+  SPARK_PID_DIR=/tmp
+fi
+
+# some variables
+export SPARK_LOGFILE=spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.log
+export SPARK_ROOT_LOGGER="INFO,DRFA"
+log=$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$HOSTNAME.out
+pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command.pid
+
+# Set default scheduling priority
+if [ "$SPARK_NICENESS" = "" ]; then
+    export SPARK_NICENESS=0
+fi
+
+
+case $startStop in
+
+  (start)
+
+    mkdir -p "$SPARK_PID_DIR"
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo $command running as process `cat $pid`.  Stop it first.
+        exit 1
+      fi
+    fi
+
+    if [ "$SPARK_MASTER" != "" ]; then
+      echo rsync from $SPARK_MASTER
+      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $SPARK_MASTER/ "$SPARK_HOME"
+    fi
+
+    spark_rotate_log $log
+    echo starting $command, logging to $log
+    cd "$SPARK_PREFIX"
+    nohup nice -n $SPARK_NICENESS "$SPARK_PREFIX"/run $command "$@" > "$log" 2>&1 < /dev/null &
+    echo $! > $pid
+    sleep 1; head "$log"
+    ;;
+
+  (stop)
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo stopping $command
+        kill `cat $pid`
+      else
+        echo no $command to stop
+      fi
+    else
+      echo no $command to stop
+    fi
+    ;;
+
+  (*)
+    echo $usage
+    exit 1
+    ;;
+
+esac
+
+
diff --git a/bin/spark-daemons.sh b/bin/spark-daemons.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+# Run a Spark command on all slave hosts.
+
+usage="Usage: spark-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+exec "$bin/slaves.sh" cd "$SPARK_HOME" \; "$bin/spark-daemon.sh" "$@"
diff --git a/bin/start-all.sh b/bin/start-all.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+# Start all spark daemons.
+# Starts the master on this node.
+# Starts a worker on each node specified in conf/slaves
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+# Load the Spark configuration
+. "$bin/spark-config.sh"
+
+# Start Master
+"$bin"/start-master.sh --config $SPARK_CONF_DIR
+
+# Start Workers
+"$bin"/start-slaves.sh --config $SPARK_CONF_DIR
diff --git a/bin/start-master.sh b/bin/start-master.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# Starts the master on the machine this script is executed on.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+"$bin"/spark-daemon.sh start spark.deploy.master.Master
diff --git a/bin/start-slaves.sh b/bin/start-slaves.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+# Find the port number for the master
+if [ "$SPARK_MASTER_PORT" = "" ]; then
+  SPARK_MASTER_PORT=7077
+fi
+
+hostname=`hostname`
+ip=`host "$hostname" | cut -d " " -f 4`
+
+"$bin"/spark-daemons.sh start spark.deploy.worker.Worker spark://$ip:$SPARK_MASTER_PORT
diff --git a/bin/stop-all.sh b/bin/stop-all.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+# Start all spark daemons.
+# Run this on the master nde
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+# Load the Spark configuration
+. "$bin/spark-config.sh"
+
+# Stop the slaves, then the master
+"$bin"/stop-slaves.sh
+"$bin"/stop-master.sh
diff --git a/bin/stop-master.sh b/bin/stop-master.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# Starts the master on the machine this script is executed on.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+"$bin"/spark-daemon.sh stop spark.deploy.worker.Worker
diff --git a/bin/stop-slaves.sh b/bin/stop-slaves.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+
+# Starts the master on the machine this script is executed on.
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin/spark-config.sh"
+
+"$bin"/spark-daemon.sh stop spark.deploy.master.Master
diff --git a/conf/slaves b/conf/slaves
@@ -0,0 +1,2 @@
+# A Spark Worker will be started on each of the machines listes below.
+localhost
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
@@ -9,5 +9,5 @@
 # - SPARK_MEM, to change the amount of memory used per node (this should
 #   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
 # - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
-
+# - SPARK_MASTER_PORT, to start the spark master on a different port (standalone mode only)
 
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/spark/deploy/master/Master.scala
@@ -42,7 +42,7 @@ class Master(ip: String, port: Int, webUiPort: Int) extends Actor with Logging {
   def startWebUi() {
     val webUi = new MasterWebUI(context.system, self)
     try {
-      AkkaUtils.startSprayServer(context.system, ip, webUiPort, webUi.handler)
+      AkkaUtils.startSprayServer(context.system, "0.0.0.0", webUiPort, webUi.handler)
     } catch {
       case e: Exception =>
         logError("Failed to create web UI", e)

diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/spark/deploy/worker/Worker.scala
@@ -86,7 +86,7 @@ class Worker(ip: String, port: Int, webUiPort: Int, cores: Int, memory: Int, mas
   def startWebUi() {
     val webUi = new WorkerWebUI(context.system, self)
     try {
-      AkkaUtils.startSprayServer(context.system, ip, webUiPort, webUi.handler)
+      AkkaUtils.startSprayServer(context.system, "0.0.0.0", webUiPort, webUi.handler)
     } catch {
       case e: Exception =>
         logError("Failed to create web UI", e)

diff --git a/ec2/spark-ec2-standalone b/ec2/spark-ec2-standalone
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cd "`dirname $0`"
+PYTHONPATH="./third_party/boto-2.4.1.zip/boto-2.4.1:$PYTHONPATH" python ./spark_ec2_standalone.py $@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# A Spark Worker will be started on each of the machines listes below.
		localhost