Skip to content

Commit e16a8e7

Browse files
ScrapCodesandrewor14
authored andcommitted
SPARK-3337 Paranoid quoting in shell to allow install dirs with spaces within.
... Tested ! TBH, it isn't a great idea to have directory with spaces within. Because emacs doesn't like it then hadoop doesn't like it. and so on... Author: Prashant Sharma <[email protected]> Closes apache#2229 from ScrapCodes/SPARK-3337/quoting-shell-scripts and squashes the following commits: d4ad660 [Prashant Sharma] SPARK-3337 Paranoid quoting in shell to allow install dirs with spaces within.
1 parent 711356b commit e16a8e7

30 files changed

+130
-128
lines changed

bin/beeline

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
set -o posix
2525

2626
# Figure out where Spark is installed
27-
FWDIR="$(cd `dirname $0`/..; pwd)"
27+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2828

2929
CLASS="org.apache.hive.beeline.BeeLine"
3030
exec "$FWDIR/bin/spark-class" $CLASS "$@"

bin/compute-classpath.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
SCALA_VERSION=2.10
2424

2525
# Figure out where Spark is installed
26-
FWDIR="$(cd `dirname $0`/..; pwd)"
26+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2727

28-
. $FWDIR/bin/load-spark-env.sh
28+
. "$FWDIR"/bin/load-spark-env.sh
2929

3030
# Build up classpath
3131
CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:$FWDIR/conf"
@@ -63,7 +63,7 @@ else
6363
assembly_folder="$ASSEMBLY_DIR"
6464
fi
6565

66-
num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
66+
num_jars="$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)"
6767
if [ "$num_jars" -eq "0" ]; then
6868
echo "Failed to find Spark assembly in $assembly_folder"
6969
echo "You need to build Spark before running this program."
@@ -77,7 +77,7 @@ if [ "$num_jars" -gt "1" ]; then
7777
exit 1
7878
fi
7979

80-
ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
80+
ASSEMBLY_JAR="$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)"
8181

8282
# Verify that versions of java used to build the jars and run Spark are compatible
8383
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
@@ -103,8 +103,8 @@ else
103103
datanucleus_dir="$FWDIR"/lib_managed/jars
104104
fi
105105

106-
datanucleus_jars=$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")
107-
datanucleus_jars=$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)
106+
datanucleus_jars="$(find "$datanucleus_dir" 2>/dev/null | grep "datanucleus-.*\\.jar")"
107+
datanucleus_jars="$(echo "$datanucleus_jars" | tr "\n" : | sed s/:$//g)"
108108

109109
if [ -n "$datanucleus_jars" ]; then
110110
hive_files=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" org/apache/hadoop/hive/ql/exec 2>/dev/null)

bin/load-spark-env.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ if [ -z "$SPARK_ENV_LOADED" ]; then
2525
export SPARK_ENV_LOADED=1
2626

2727
# Returns the parent of the directory this script lives in.
28-
parent_dir="$(cd `dirname $0`/..; pwd)"
28+
parent_dir="$(cd "`dirname "$0"`"/..; pwd)"
2929

30-
user_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"}
30+
user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"
3131

3232
if [ -f "${user_conf_dir}/spark-env.sh" ]; then
3333
# Promote all variable declarations to environment (exported) variables

bin/pyspark

+10-10
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,18 @@
1818
#
1919

2020
# Figure out where Spark is installed
21-
FWDIR="$(cd `dirname $0`/..; pwd)"
21+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2222

2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

26-
source $FWDIR/bin/utils.sh
26+
source "$FWDIR/bin/utils.sh"
2727

2828
SCALA_VERSION=2.10
2929

3030
function usage() {
3131
echo "Usage: ./bin/pyspark [options]" 1>&2
32-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
32+
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3333
exit 0
3434
}
3535

@@ -48,7 +48,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then
4848
fi
4949
fi
5050

51-
. $FWDIR/bin/load-spark-env.sh
51+
. "$FWDIR"/bin/load-spark-env.sh
5252

5353
# Figure out which Python executable to use
5454
if [[ -z "$PYSPARK_PYTHON" ]]; then
@@ -57,12 +57,12 @@ fi
5757
export PYSPARK_PYTHON
5858

5959
# Add the PySpark classes to the Python path:
60-
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
61-
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
60+
export PYTHONPATH="$SPARK_HOME/python/:$PYTHONPATH"
61+
export PYTHONPATH="$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH"
6262

6363
# Load the PySpark shell.py script when ./pyspark is used interactively:
64-
export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
65-
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
64+
export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
65+
export PYTHONSTARTUP="$FWDIR/python/pyspark/shell.py"
6666

6767
# If IPython options are specified, assume user wants to run IPython
6868
if [[ -n "$IPYTHON_OPTS" ]]; then
@@ -99,10 +99,10 @@ fi
9999
if [[ "$1" =~ \.py$ ]]; then
100100
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
101101
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
102-
primary=$1
102+
primary="$1"
103103
shift
104104
gatherSparkSubmitOpts "$@"
105-
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
105+
exec "$FWDIR"/bin/spark-submit "${SUBMISSION_OPTS[@]}" "$primary" "${APPLICATION_OPTS[@]}"
106106
else
107107
# PySpark shell requires special handling downstream
108108
export PYSPARK_SHELL=1

bin/run-example

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
SCALA_VERSION=2.10
2121

22-
FWDIR="$(cd `dirname $0`/..; pwd)"
22+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2323
export SPARK_HOME="$FWDIR"
2424
EXAMPLES_DIR="$FWDIR"/examples
2525

@@ -35,12 +35,12 @@ else
3535
fi
3636

3737
if [ -f "$FWDIR/RELEASE" ]; then
38-
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
38+
export SPARK_EXAMPLES_JAR="`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`"
3939
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
40-
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`
40+
export SPARK_EXAMPLES_JAR="`ls "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar`"
4141
fi
4242

43-
if [[ -z $SPARK_EXAMPLES_JAR ]]; then
43+
if [[ -z "$SPARK_EXAMPLES_JAR" ]]; then
4444
echo "Failed to find Spark examples assembly in $FWDIR/lib or $FWDIR/examples/target" 1>&2
4545
echo "You need to build Spark before running this program" 1>&2
4646
exit 1

bin/spark-class

+10-10
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ esac
2727
SCALA_VERSION=2.10
2828

2929
# Figure out where Spark is installed
30-
FWDIR="$(cd `dirname $0`/..; pwd)"
30+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
3131

3232
# Export this as SPARK_HOME
3333
export SPARK_HOME="$FWDIR"
3434

35-
. $FWDIR/bin/load-spark-env.sh
35+
. "$FWDIR"/bin/load-spark-env.sh
3636

3737
if [ -z "$1" ]; then
3838
echo "Usage: spark-class <class> [<args>]" 1>&2
@@ -105,7 +105,7 @@ else
105105
exit 1
106106
fi
107107
fi
108-
JAVA_VERSION=$($RUNNER -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
108+
JAVA_VERSION=$("$RUNNER" -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
109109

110110
# Set JAVA_OPTS to be able to load native libraries and to set heap size
111111
if [ "$JAVA_VERSION" -ge 18 ]; then
@@ -117,7 +117,7 @@ JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
117117

118118
# Load extra JAVA_OPTS from conf/java-opts, if it exists
119119
if [ -e "$FWDIR/conf/java-opts" ] ; then
120-
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
120+
JAVA_OPTS="$JAVA_OPTS `cat "$FWDIR"/conf/java-opts`"
121121
fi
122122

123123
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
@@ -126,21 +126,21 @@ TOOLS_DIR="$FWDIR"/tools
126126
SPARK_TOOLS_JAR=""
127127
if [ -e "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar ]; then
128128
# Use the JAR from the SBT build
129-
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`
129+
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/scala-$SCALA_VERSION/spark-tools*[0-9Tg].jar`"
130130
fi
131131
if [ -e "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar ]; then
132132
# Use the JAR from the Maven build
133133
# TODO: this also needs to become an assembly!
134-
export SPARK_TOOLS_JAR=`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`
134+
export SPARK_TOOLS_JAR="`ls "$TOOLS_DIR"/target/spark-tools*[0-9Tg].jar`"
135135
fi
136136

137137
# Compute classpath using external script
138-
classpath_output=$($FWDIR/bin/compute-classpath.sh)
138+
classpath_output=$("$FWDIR"/bin/compute-classpath.sh)
139139
if [[ "$?" != "0" ]]; then
140140
echo "$classpath_output"
141141
exit 1
142142
else
143-
CLASSPATH=$classpath_output
143+
CLASSPATH="$classpath_output"
144144
fi
145145

146146
if [[ "$1" =~ org.apache.spark.tools.* ]]; then
@@ -153,9 +153,9 @@ if [[ "$1" =~ org.apache.spark.tools.* ]]; then
153153
fi
154154

155155
if $cygwin; then
156-
CLASSPATH=`cygpath -wp $CLASSPATH`
156+
CLASSPATH="`cygpath -wp "$CLASSPATH"`"
157157
if [ "$1" == "org.apache.spark.tools.JavaAPICompletenessChecker" ]; then
158-
export SPARK_TOOLS_JAR=`cygpath -w $SPARK_TOOLS_JAR`
158+
export SPARK_TOOLS_JAR="`cygpath -w "$SPARK_TOOLS_JAR"`"
159159
fi
160160
fi
161161
export CLASSPATH

bin/spark-shell

+5-5
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,19 @@ esac
2929
set -o posix
3030

3131
## Global script variables
32-
FWDIR="$(cd `dirname $0`/..; pwd)"
32+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
3333

3434
function usage() {
3535
echo "Usage: ./bin/spark-shell [options]"
36-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
36+
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3737
exit 0
3838
}
3939

4040
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
4141
usage
4242
fi
4343

44-
source $FWDIR/bin/utils.sh
44+
source "$FWDIR"/bin/utils.sh
4545
SUBMIT_USAGE_FUNCTION=usage
4646
gatherSparkSubmitOpts "$@"
4747

@@ -54,11 +54,11 @@ function main() {
5454
# (see https://github.com/sbt/sbt/issues/562).
5555
stty -icanon min 1 -echo > /dev/null 2>&1
5656
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
57-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
57+
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
5858
stty icanon echo > /dev/null 2>&1
5959
else
6060
export SPARK_SUBMIT_OPTS
61-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
61+
"$FWDIR"/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
6262
fi
6363
}
6464

bin/spark-sql

+4-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
2727
CLASS_NOT_FOUND_EXIT_STATUS=1
2828

2929
# Figure out where Spark is installed
30-
FWDIR="$(cd `dirname $0`/..; pwd)"
30+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
3131

3232
function usage {
3333
echo "Usage: ./bin/spark-sql [options] [cli option]"
@@ -38,18 +38,18 @@ function usage {
3838
pattern+="\|--help"
3939
pattern+="\|======="
4040

41-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
41+
"$FWDIR"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
4242
echo
4343
echo "CLI options:"
44-
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
44+
"$FWDIR"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
4545
}
4646

4747
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
4848
usage
4949
exit 0
5050
fi
5151

52-
source $FWDIR/bin/utils.sh
52+
source "$FWDIR"/bin/utils.sh
5353
SUBMIT_USAGE_FUNCTION=usage
5454
gatherSparkSubmitOpts "$@"
5555

bin/spark-submit

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
# NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala!
2121

22-
export SPARK_HOME="$(cd `dirname $0`/..; pwd)"
22+
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
2323
ORIG_ARGS=("$@")
2424

2525
while (($#)); do
@@ -59,5 +59,5 @@ if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FI
5959
fi
6060
fi
6161

62-
exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
62+
exec "$SPARK_HOME"/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"
6363

dev/check-license

+8-8
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@ acquire_rat_jar () {
2323
URL1="http://search.maven.org/remotecontent?filepath=org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
2424
URL2="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
2525

26-
JAR=$rat_jar
26+
JAR="$rat_jar"
2727

2828
if [[ ! -f "$rat_jar" ]]; then
2929
# Download rat launch jar if it hasn't been downloaded yet
3030
if [ ! -f "$JAR" ]; then
3131
# Download
3232
printf "Attempting to fetch rat\n"
33-
JAR_DL=${JAR}.part
33+
JAR_DL="${JAR}.part"
3434
if hash curl 2>/dev/null; then
35-
(curl --silent ${URL1} > "$JAR_DL" || curl --silent ${URL2} > "$JAR_DL") && mv "$JAR_DL" "$JAR"
35+
(curl --silent "${URL1}" > "$JAR_DL" || curl --silent "${URL2}" > "$JAR_DL") && mv "$JAR_DL" "$JAR"
3636
elif hash wget 2>/dev/null; then
3737
(wget --quiet ${URL1} -O "$JAR_DL" || wget --quiet ${URL2} -O "$JAR_DL") && mv "$JAR_DL" "$JAR"
3838
else
@@ -50,7 +50,7 @@ acquire_rat_jar () {
5050
}
5151

5252
# Go to the Spark project root directory
53-
FWDIR="$(cd `dirname $0`/..; pwd)"
53+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
5454
cd "$FWDIR"
5555

5656
if test -x "$JAVA_HOME/bin/java"; then
@@ -60,17 +60,17 @@ else
6060
fi
6161

6262
export RAT_VERSION=0.10
63-
export rat_jar=$FWDIR/lib/apache-rat-${RAT_VERSION}.jar
64-
mkdir -p $FWDIR/lib
63+
export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
64+
mkdir -p "$FWDIR"/lib
6565

6666
[[ -f "$rat_jar" ]] || acquire_rat_jar || {
6767
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
6868
exit 1
6969
}
7070

71-
$java_cmd -jar $rat_jar -E $FWDIR/.rat-excludes -d $FWDIR > rat-results.txt
71+
$java_cmd -jar "$rat_jar" -E "$FWDIR"/.rat-excludes -d "$FWDIR" > rat-results.txt
7272

73-
ERRORS=$(cat rat-results.txt | grep -e "??")
73+
ERRORS="$(cat rat-results.txt | grep -e "??")"
7474

7575
if test ! -z "$ERRORS"; then
7676
echo "Could not find Apache license headers in the following files:"

dev/lint-python

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
#
1919

2020
SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
21-
SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
21+
SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
2222
PEP8_REPORT_PATH="$SPARK_ROOT_DIR/dev/pep8-report.txt"
2323

24-
cd $SPARK_ROOT_DIR
24+
cd "$SPARK_ROOT_DIR"
2525

2626
# Get pep8 at runtime so that we don't rely on it being installed on the build server.
2727
#+ See: https://github.com/apache/spark/pull/1744#issuecomment-50982162
@@ -45,7 +45,7 @@ fi
4545
#+ first, but we do so so that the check status can
4646
#+ be output before the report, like with the
4747
#+ scalastyle and RAT checks.
48-
python $PEP8_SCRIPT_PATH $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
48+
python "$PEP8_SCRIPT_PATH" $PEP8_PATHS_TO_CHECK > "$PEP8_REPORT_PATH"
4949
pep8_status=${PIPESTATUS[0]} #$?
5050

5151
if [ $pep8_status -ne 0 ]; then

dev/mima

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ set -o pipefail
2121
set -e
2222

2323
# Go to the Spark project root directory
24-
FWDIR="$(cd `dirname $0`/..; pwd)"
24+
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
2525
cd "$FWDIR"
2626

2727
echo -e "q\n" | sbt/sbt oldDeps/update
2828

29-
export SPARK_CLASSPATH=`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`
29+
export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
3030
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"
3131

3232
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore

dev/run-tests

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#
1919

2020
# Go to the Spark project root directory
21-
FWDIR="$(cd `dirname $0`/..; pwd)"
21+
FWDIR="$(cd "`dirname $0`"/..; pwd)"
2222
cd "$FWDIR"
2323

2424
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then

0 commit comments

Comments
 (0)