-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdiscount-aws.sh
executable file
·37 lines (26 loc) · 1.28 KB
/
discount-aws.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/bash
#Script to submit as an Amazon AWS EMR step.
#For this script to work, it is necessary to install and configure the AWS CLI.
#The first argument is the cluster ID. The remaining arguments will be passed to the Discount driver process.
CLUSTER=$1
shift
#Bucket to store discount jars and data files
BUCKET=s3://my-bucket/discount
DISCOUNT_HOME="$(dirname -- "$(readlink "${BASH_SOURCE}")")"
aws s3 cp "$DISCOUNT_HOME/target/scala-2.12/Discount-assembly-3.1.0.jar" $BUCKET/
#aws s3 sync "$DISCOUNT_HOME/resources/PASHA" $BUCKET/PASHA/
#Max size of input splits in bytes. A smaller number reduces memory usage but increases the number of
#partitions for the first stage. If this variable is unset, Spark's default of 128 MB will be used.
#SPLIT="spark.hadoop.mapreduce.input.fileinputformat.split.maxsize=$((64 * 1024 * 1024))"
#To set SPLIT or other variables, uncomment below.
COMMAND=( \
# --conf $SPLIT \
--class com.jnpersson.discount.spark.Discount $BUCKET/Discount-assembly-3.1.0.jar $*)
#Turn off paging for output
export AWS_PAGER=""
RUNNER_ARGS="spark-submit"
for PARAM in ${COMMAND[@]}
do
RUNNER_ARGS="$RUNNER_ARGS,$PARAM"
done
aws emr add-steps --cluster $CLUSTER --steps Type=CUSTOM_JAR,Name=Discount,ActionOnFailure=CONTINUE,Jar=command-runner.jar,Args=\[$RUNNER_ARGS\]