Skip to content

Commit

Permalink
[SPARK-2960][DEPLOY] Support executing Spark from symlinks (reopen)
Browse files Browse the repository at this point in the history
This PR is based on the work of roji to support running Spark scripts from symlinks. Thanks for the great work roji . Would you mind taking a look at this PR, thanks a lot.

For releases like HDP and others, normally it will expose the Spark executables as symlinks and put in `PATH`, but current Spark's scripts do not support finding real path from symlink recursively, this will make spark fail to execute from symlink. This PR try to solve this issue by finding the absolute path from symlink.

Instead of using `readlink -f` like what this PR (apache#2386) implemented is that `-f` is not support for Mac, so here manually seeking the path through loop.

I've tested with Mac and Linux (Cent OS), looks fine.

This PR did not fix the scripts under `sbin` folder, not sure if it needs to be fixed also?

Please help to review, any comment is greatly appreciated.

Author: jerryshao <[email protected]>
Author: Shay Rojansky <[email protected]>

Closes apache#8669 from jerryshao/SPARK-2960.
  • Loading branch information
jerryshao authored and srowen committed Nov 4, 2015
1 parent 2692bdb commit 8aff36e
Show file tree
Hide file tree
Showing 31 changed files with 213 additions and 183 deletions.
8 changes: 5 additions & 3 deletions bin/beeline
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
# Enter posix mode for bash
set -o posix

# Figure out where Spark is installed
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
# Figure out if SPARK_HOME is set
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

CLASS="org.apache.hive.beeline.BeeLine"
exec "$FWDIR/bin/spark-class" $CLASS "$@"
exec "${SPARK_HOME}/bin/spark-class" $CLASS "$@"
32 changes: 18 additions & 14 deletions bin/load-spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@
# This script loads spark-env.sh if it exists, and ensures it is only loaded once.
# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's
# conf/ subdirectory.
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"

# Figure out where Spark is installed
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

if [ -z "$SPARK_ENV_LOADED" ]; then
export SPARK_ENV_LOADED=1

# Returns the parent of the directory this script lives in.
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"
parent_dir="${SPARK_HOME}"

user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"

Expand All @@ -42,18 +46,18 @@ fi

if [ -z "$SPARK_SCALA_VERSION" ]; then

ASSEMBLY_DIR2="$FWDIR/assembly/target/scala-2.11"
ASSEMBLY_DIR1="$FWDIR/assembly/target/scala-2.10"
ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"
ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.10"

if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
exit 1
fi
if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then
echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2
echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2
exit 1
fi

if [ -d "$ASSEMBLY_DIR2" ]; then
export SPARK_SCALA_VERSION="2.11"
else
export SPARK_SCALA_VERSION="2.10"
fi
if [ -d "$ASSEMBLY_DIR2" ]; then
export SPARK_SCALA_VERSION="2.11"
else
export SPARK_SCALA_VERSION="2.10"
fi
fi
14 changes: 8 additions & 6 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
# limitations under the License.
#

export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

source "$SPARK_HOME"/bin/load-spark-env.sh
source "${SPARK_HOME}"/bin/load-spark-env.sh
export _SPARK_CMD_USAGE="Usage: ./bin/pyspark [options]"

# In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
Expand Down Expand Up @@ -64,12 +66,12 @@ fi
export PYSPARK_PYTHON

# Add the PySpark classes to the Python path:
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9-src.zip:$PYTHONPATH"

# Load the PySpark shell.py script when ./pyspark is used interactively:
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
export PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"
export PYTHONSTARTUP="${SPARK_HOME}/python/pyspark/shell.py"

# For pyspark tests
if [[ -n "$SPARK_TESTING" ]]; then
Expand All @@ -82,4 +84,4 @@ fi

export PYSPARK_DRIVER_PYTHON
export PYSPARK_DRIVER_PYTHON_OPTS
exec "$SPARK_HOME"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
exec "${SPARK_HOME}"/bin/spark-submit pyspark-shell-main --name "PySparkShell" "$@"
18 changes: 10 additions & 8 deletions bin/run-example
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
# limitations under the License.
#

FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
export SPARK_HOME="$FWDIR"
EXAMPLES_DIR="$FWDIR"/examples
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

EXAMPLES_DIR="${SPARK_HOME}"/examples

. "$FWDIR"/bin/load-spark-env.sh
. "${SPARK_HOME}"/bin/load-spark-env.sh

if [ -n "$1" ]; then
EXAMPLE_CLASS="$1"
Expand All @@ -34,8 +36,8 @@ else
exit 1
fi

if [ -f "$FWDIR/RELEASE" ]; then
JAR_PATH="${FWDIR}/lib"
if [ -f "${SPARK_HOME}/RELEASE" ]; then
JAR_PATH="${SPARK_HOME}/lib"
else
JAR_PATH="${EXAMPLES_DIR}/target/scala-${SPARK_SCALA_VERSION}"
fi
Expand All @@ -44,7 +46,7 @@ JAR_COUNT=0

for f in "${JAR_PATH}"/spark-examples-*hadoop*.jar; do
if [[ ! -e "$f" ]]; then
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
echo "Failed to find Spark examples assembly in ${SPARK_HOME}/lib or ${SPARK_HOME}/examples/target" 1>&2
echo "You need to build Spark before running this program" 1>&2
exit 1
fi
Expand All @@ -67,7 +69,7 @@ if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
fi

exec "$FWDIR"/bin/spark-submit \
exec "${SPARK_HOME}"/bin/spark-submit \
--master $EXAMPLE_MASTER \
--class $EXAMPLE_CLASS \
"$SPARK_EXAMPLES_JAR" \
Expand Down
15 changes: 8 additions & 7 deletions bin/spark-class
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
# limitations under the License.
#

# Figure out where Spark is installed
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

. "$SPARK_HOME"/bin/load-spark-env.sh
. "${SPARK_HOME}"/bin/load-spark-env.sh

# Find the java binary
if [ -n "${JAVA_HOME}" ]; then
Expand All @@ -36,10 +37,10 @@ fi

# Find assembly jar
SPARK_ASSEMBLY_JAR=
if [ -f "$SPARK_HOME/RELEASE" ]; then
ASSEMBLY_DIR="$SPARK_HOME/lib"
if [ -f "${SPARK_HOME}/RELEASE" ]; then
ASSEMBLY_DIR="${SPARK_HOME}/lib"
else
ASSEMBLY_DIR="$SPARK_HOME/assembly/target/scala-$SPARK_SCALA_VERSION"
ASSEMBLY_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION"
fi

GREP_OPTIONS=
Expand All @@ -65,7 +66,7 @@ LAUNCH_CLASSPATH="$SPARK_ASSEMBLY_JAR"

# Add the launcher build dir to the classpath if requested.
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
LAUNCH_CLASSPATH="$SPARK_HOME/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
fi

export _SPARK_ASSEMBLY="$SPARK_ASSEMBLY_JAR"
Expand Down
9 changes: 6 additions & 3 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ esac
# Enter posix mode for bash
set -o posix

export FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"

# SPARK-4161: scala does not assume use of the java classpath,
Expand All @@ -47,11 +50,11 @@ function main() {
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"
fi
}

Expand Down
7 changes: 5 additions & 2 deletions bin/spark-sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
# limitations under the License.
#

export FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

export _SPARK_CMD_USAGE="Usage: ./bin/spark-sql [options] [cli option]"
exec "$FWDIR"/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"
exec "${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver "$@"
6 changes: 4 additions & 2 deletions bin/spark-submit
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
# limitations under the License.
#

SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# disable randomized hash for string in Python 3.3+
export PYTHONHASHSEED=0

exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
9 changes: 6 additions & 3 deletions bin/sparkR
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
# limitations under the License.
#

export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
source "$SPARK_HOME"/bin/load-spark-env.sh
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

source "${SPARK_HOME}"/bin/load-spark-env.sh
export _SPARK_CMD_USAGE="Usage: ./bin/sparkR [options]"
exec "$SPARK_HOME"/bin/spark-submit sparkr-shell-main "$@"
exec "${SPARK_HOME}"/bin/spark-submit sparkr-shell-main "$@"
9 changes: 5 additions & 4 deletions sbin/slaves.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ if [ $# -le 0 ]; then
exit 1
fi

sbin="`dirname "$0"`"
sbin="`cd "$sbin"; pwd`"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

. "$sbin/spark-config.sh"
. "${SPARK_HOME}/sbin/spark-config.sh"

# If the slaves file is specified in the command line,
# then it takes precedence over the definition in
Expand All @@ -65,7 +66,7 @@ then
shift
fi

. "$SPARK_PREFIX/bin/load-spark-env.sh"
. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$HOSTLIST" = "" ]; then
if [ "$SPARK_SLAVES" = "" ]; then
Expand Down
23 changes: 7 additions & 16 deletions sbin/spark-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,12 @@
# should not be executable directly
# also should not be passed any arguments, since we need original $*

# resolve links - $0 may be a softlink
this="${BASH_SOURCE:-$0}"
common_bin="$(cd -P -- "$(dirname -- "$this")" && pwd -P)"
script="$(basename -- "$this")"
this="$common_bin/$script"
# symlink and absolute path should rely on SPARK_HOME to resolve
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

# convert relative path to absolute path
config_bin="`dirname "$this"`"
script="`basename "$this"`"
config_bin="`cd "$config_bin"; pwd`"
this="$config_bin/$script"

export SPARK_PREFIX="`dirname "$this"`"/..
export SPARK_HOME="${SPARK_PREFIX}"
export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"$SPARK_HOME/conf"}"
export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
# Add the PySpark classes to the PYTHONPATH:
export PYTHONPATH="$SPARK_HOME/python:$PYTHONPATH"
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.9-src.zip:${PYTHONPATH}"
23 changes: 12 additions & 11 deletions sbin/spark-daemon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ if [ $# -le 1 ]; then
exit 1
fi

sbin="`dirname "$0"`"
sbin="`cd "$sbin"; pwd`"
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

. "$sbin/spark-config.sh"
. "${SPARK_HOME}/sbin/spark-config.sh"

# get arguments

Expand Down Expand Up @@ -86,7 +87,7 @@ spark_rotate_log ()
fi
}

. "$SPARK_PREFIX/bin/load-spark-env.sh"
. "${SPARK_HOME}/bin/load-spark-env.sh"

if [ "$SPARK_IDENT_STRING" = "" ]; then
export SPARK_IDENT_STRING="$USER"
Expand All @@ -97,7 +98,7 @@ export SPARK_PRINT_LAUNCH_COMMAND="1"

# get log directory
if [ "$SPARK_LOG_DIR" = "" ]; then
export SPARK_LOG_DIR="$SPARK_HOME/logs"
export SPARK_LOG_DIR="${SPARK_HOME}/logs"
fi
mkdir -p "$SPARK_LOG_DIR"
touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
Expand Down Expand Up @@ -137,20 +138,20 @@ run_command() {

if [ "$SPARK_MASTER" != "" ]; then
echo rsync from "$SPARK_MASTER"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "$SPARK_HOME"
rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
fi

spark_rotate_log "$log"
echo "starting $command, logging to $log"

case "$mode" in
(class)
nohup nice -n "$SPARK_NICENESS" "$SPARK_PREFIX"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
nohup nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null &
newpid="$!"
;;

(submit)
nohup nice -n "$SPARK_NICENESS" "$SPARK_PREFIX"/bin/spark-submit --class $command "$@" >> "$log" 2>&1 < /dev/null &
nohup nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-submit --class $command "$@" >> "$log" 2>&1 < /dev/null &
newpid="$!"
;;

Expand Down Expand Up @@ -205,13 +206,13 @@ case $option in
else
echo $pid file is present but $command not running
exit 1
fi
fi
else
echo $command not running.
exit 2
fi
fi
;;

(*)
echo $usage
exit 1
Expand Down
9 changes: 5 additions & 4 deletions sbin/spark-daemons.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ if [ $# -le 1 ]; then
exit 1
fi

sbin=`dirname "$0"`
sbin=`cd "$sbin"; pwd`
if [ -z "${SPARK_HOME}" ]; then
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
fi

. "$sbin/spark-config.sh"
. "${SPARK_HOME}/sbin/spark-config.sh"

exec "$sbin/slaves.sh" cd "$SPARK_HOME" \; "$sbin/spark-daemon.sh" "$@"
exec "${SPARK_HOME}/sbin/slaves.sh" cd "${SPARK_HOME}" \; "${SPARK_HOME}/sbin/spark-daemon.sh" "$@"
Loading

0 comments on commit 8aff36e

Please sign in to comment.