Skip to content

Commit

Permalink
Adding more scripts, config examples and unit tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
avitorovic committed Dec 5, 2014
1 parent 67f7dc1 commit 72ffc72
Show file tree
Hide file tree
Showing 264 changed files with 33,639 additions and 409 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,15 @@ bin/extract_constants
bin/old
experiments
test/squall_plan_runner/confs/special_purpose
contrib/0.2.5
resources/storm-linux_*

test/data/tpch/small
test/data/tpch/0.1G
test/data/tpch/0.01G_z1
test/data/tpch/0.01G_z2
test/data/tpch/0.01G_z3
test/data/tpch/0.01G_z4
test/data/tpch/0.05G
test/data/tpch/1G
test/data/tpch/4G
Expand Down
7 changes: 7 additions & 0 deletions bin/Solaris/adjust_storm_yaml_locally.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

FILENAME=~/.storm/storm.yaml
FIND=-priv
REPLACE=

echo "$FILENAME" | xargs perl -pi -e "s/$FIND/$REPLACE/g"
40 changes: 40 additions & 0 deletions bin/Solaris/all_latency_estimator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

function usage() {
echo "Usage: ./all_latency_estimator.sh <FOLDER>"
exit
}

# Check correct number of command line arguments
if [ $# -ne 1 ]; then
echo "Error: Illegal number of command line arguments. Required 1 argument and got $#. Exiting..."
usage
fi

# Check correctness of provided folder
FOLDER=$1
if [ ! -d $FOLDER ]; then
echo "Provided argument $FOLDER is not a folder (or folder doesn't exist). Exiting..."
usage
fi


SUBDIRS=( `ls ${FOLDER}/` )
ALL_LAT_FILE=all_latencies.txt
ALL_EXC_FILE=all_exceptions.txt

#cleaning up old content
echo "" > $FOLDER/$ALL_LAT_FILE
echo "" > $FOLDER/$ALL_EXC_FILE

for SUBDIR in ${SUBDIRS[@]}
do
CUR_DIR=$FOLDER/$SUBDIR
if [ -d $CUR_DIR ] && [ $SUBDIR != "cluster" ]; then
echo "NEW CONFIGURATION: Latencies for $SUBDIR :" >> $FOLDER/$ALL_LAT_FILE
./latency_estimator.sh $CUR_DIR >> $FOLDER/$ALL_LAT_FILE

echo "NEW CONFIGURATION: Exceptions for $SUBDIR :" >> $FOLDER/$ALL_EXC_FILE
./exception_locator.sh $CUR_DIR >> $FOLDER/$ALL_EXC_FILE
fi
done
4 changes: 4 additions & 0 deletions bin/Solaris/copy_to_git.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

cp -r ../src-public-git/* ../../squall/src
rm -rf `find ../../squall/src -type d -name .svn`
19 changes: 19 additions & 0 deletions bin/Solaris/create_dbs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

#The tutorial is from http://zookeeper.apache.org/doc/r3.3.3/zookeeperAdmin.html#Ongoing+Data+Directory+Cleanup.
MACHINE=squalldata@icdatasrv
MACHINE_DBGEN=${MACHINE}1

SIZE=$1

HOME=/data/squall_blade/
QUERY_PATH=data/tpchdb/
QUERY_NAME=${SIZE}G
FULL_PATH=${HOME}${QUERY_PATH}${QUERY_NAME}

ssh $MACHINE_DBGEN 'cd ' $HOME/$QUERY_PATH '; mkdir -p ' $QUERY_NAME '; cd ' $HOME '; ./dbgen -vf -s ' $SIZE ' 2>&1; mv *.tbl ' $FULL_PATH

for blade in {2..10}
do
ssh $MACHINE_DBGEN 'scp -r ' $FULL_PATH ${MACHINE}${blade}':'$HOME/$QUERY_PATH
done
3 changes: 3 additions & 0 deletions bin/Solaris/create_more_dbs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

./create_dbs.sh 80
30 changes: 30 additions & 0 deletions bin/Solaris/delete_logs_and_storage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

MACHINE=squalldata@icdatasrv
MACHINE5=squalldata@icdatasrv5
STORAGE_REMOTE_PATH=/data/squall_zone/storage
LOGS_REMOTE_PATH=/data/squall_zone/logs

# TODO Uncomment once you start dealing with BDB again
echo "NOT Deleting BDB storage..."
# deleting BDB storage
# for blade in {1..10}
# do
# for port in {1001..1022}
# do
# ssh -p "$port" $MACHINE${blade} 'rm -r ' $STORAGE_REMOTE_PATH'/*'
# done
# done

echo "Deleting Squall logs..."
# deleting logs
# TODO, we don't delete because Storm behaves strangely
ssh $MACHINE5 'echo "" > ' $LOGS_REMOTE_PATH'/nimbus.log'

for blade in {1..10}
do
for port in {1001..1022}
do
ssh -p "$port" $MACHINE${blade} 'rm -r ' $LOGS_REMOTE_PATH'/*'
done
done
20 changes: 20 additions & 0 deletions bin/Solaris/delete_outputs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

. ./storm_version.sh

MACHINE=squalldata@icdatasrv
STORM_HOME=/data/squall_zone

STORM_DATA=$STORM_HOME/storm_data
ZOOKEEPER_DATA=$STORM_HOME/zookeeper_data
STORM_LOGS=$STORM_HOME/logs

#Deleting all the Storm output on master + zones
for blade in {1..10}
do
ssh $MACHINE$blade 'rm -r ' $STORM_DATA'/*;rm -r ' $ZOOKEEPER_DATA'/*;rm -r ' $STORM_LOGS'/*'
for port in {1001..1022}
do
ssh -p "$port" $MACHINE${blade} 'rm -r ' $STORM_DATA'/*;rm -r ' $STORM_LOGS'/*'
done
done
19 changes: 19 additions & 0 deletions bin/Solaris/delete_snapshots.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
. ./storm_version.sh

MACHINE=squalldata@icdatasrv

REMOTE_SNAP=/data/squall_zone/profiling/output
SNAP_LOG=/export/home/squalldata/.yjp/log

for blade in {1..10}
do
#Deleting all the Storm output on master node
ssh $MACHINE$blade 'rm -r ' $REMOTE_SNAP'/*'
#Deleting log of yjp
ssh $MACHINE$blade 'rm -r ' $SNAP_LOG'/*'
for port in {1001..1022}
do
ssh -p "$port" $MACHINE$blade 'rm -r ' $REMOTE_SNAP'/*'
done
done
59 changes: 59 additions & 0 deletions bin/Solaris/download_latest_snapshot.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

#Args begin
MACHINE=$1
PORT=$2
REMOTE_FOLDER=$3
LOCAL_FOLDER=$4

#For example,
#MACHINE=squalldata@icdatasrv2
#PORT=22
#REMOTE_FOLDER=/data/squall_zone/profiling/output
#LOCAL_FOLDER=snapshots
#Args end

### Methods begin
get_file_serial() {
# first get rid of extension, and then take the 5th part when spliting with '-'
# filename(arg 1) is in the form: worker-2013-03-21-332.snapshot
declare -i SERIAL
SERIAL=`echo ${1%%.*} | cut -d'-' -f5`
return $SERIAL
}
### Methods end

# get a list
ALL_FILES=`ssh -p $PORT $MACHINE 'ls '$REMOTE_FOLDER`
#echo $ALL_FILES

# find the size of the list
declare -i NUM_FILES
NUM_FILES=0
for x in $ALL_FILES
do
NUM_FILES+=1
done
#echo $NUM_FILES

# If there are more than 1 file inside, then we have to do download something
if [ $NUM_FILES -gt 0 ]; then
declare -i MAX_SERIAL
MAX_SERIAL=-1

declare -i SERIAL
for file in ${ALL_FILES} ; do
filename=${file##*/}
#echo $filename
get_file_serial $filename
SERIAL=$?
#echo "SERIAL is $SERIAL"
if [ $MAX_SERIAL -lt $SERIAL ]; then
MAX_SERIAL=$SERIAL
MAX_FILE=$filename
#echo "MAX_FILE is $MAX_FILE"
fi
done
echo "File with max index is $MAX_FILE"
scp -P $PORT $MACHINE:$REMOTE_FOLDER/$MAX_FILE $LOCAL_FOLDER
fi
23 changes: 23 additions & 0 deletions bin/Solaris/ewh_loop_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/python
import subprocess
import sys
import os

if len(sys.argv) != 2:
print "ERROR! Missing argument!"
print "Invocation: ./ewh_loop_cluster.py RUN_PATH"
sys.exit(1)

RUN_PATH = sys.argv[1]
if os.path.isdir(RUN_PATH) == False:
print "ERROR! %s is not a directory!" % RUN_PATH
sys.exit(1)

subdirs = os.walk(RUN_PATH).next()[1]
subdirs.sort()
for subdir in subdirs:
if subdir != "exclude":
fullSquallPath = RUN_PATH + "/" + subdir
print "Running loop_squall_cluster on %s" % fullSquallPath
#./loop_squall_cluster.sh <MODE> <PROFILING> <RESTART_BEFORE> <RESTART_AFTER_EACH> <GET_KEY_REGIONS> <BASE_PATH>"
subprocess.call("./loop_squall_cluster.sh PLAN_RUNNER NO NO NO YES %s" % fullSquallPath, shell=True)
38 changes: 38 additions & 0 deletions bin/Solaris/exception_locator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
# From STORM_DATA_DIR (which contains all the Storm log files), it extracts the Exceptions

function usage() {
echo "Usage: ./exception_locator.sh <FOLDER>"
exit
}

# Check correct number of command line arguments
if [ $# -ne 1 ]; then
echo "Error: Illegal number of command line arguments. Required 1 argument and got $#. Exiting..."
usage
fi

# Check correctness of provided folder
FOLDER=$1
if [ ! -d $FOLDER ]; then
echo "Provided argument $FOLDER is not a folder (or folder doesn't exist). Exiting..."
usage
fi

# Process files in folder one a time and update total latency
echo ""
for FILEPATH in `find $FOLDER -name \*.log`
do
# EXCEPTIONS=`cat $FILEPATH | grep "Exception" | cut -d':' -f2 | sort | uniq`
EXCEPTIONS=`cat $FILEPATH | grep "Exception" `
if [ "$EXCEPTIONS" != "" ]; then
echo "$FILEPATH reported the following exceptions:"
echo -e "\t$EXCEPTIONS"
fi
WARNINGLAT=`cat $FILEPATH | grep "WARNINGLAT" `
if [ "$WARNINGLAT" != "" ]; then
echo "$FILEPATH reported the following exceptions:"
echo -e "\t$WARNINGLAT"
fi
done
echo ""
62 changes: 62 additions & 0 deletions bin/Solaris/excl_measure.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/bin/bash

. ./storm_version.sh

KILL_TIME=35
WAIT_STAT=180

if [ $# -ne 2 ]
then
echo "Should provide an input file and output directory. Exiting..."
exit
fi

if [ ! -f $1 ]; then
echo "$1 is not a file. Exiting..."
exit
fi

if [ ! -d $2 ]; then
echo "$2 is not a directory. Exiting..."
exit
fi

EXCL_FILE=$1
OUTPUT_DIR=$2/${EXCL_FILE##*/}
OUTPUT_DIR=${OUTPUT_DIR%%.*}
STARTER_DIR=../$STORMNAME/storm-starter
STORM_BIN=../$STORMNAME/bin
CURR_DIR=`pwd`

#compiling
echo "Compiling $EXCL_FILE ..."
cp $EXCL_FILE $STARTER_DIR/src/jvm/storm/starter/ExclamationTopology.java
cd $STARTER_DIR
lein uberjar
cd $CURR_DIR

#running new code
echo "Running storm_starter Exclamation $EXCL_FILE topology ..."
$STORM_BIN/storm jar $STARTER_DIR/storm-starter-0.0.1-SNAPSHOT-standalone.jar storm.starter.ExclamationTopology Exclamation

#grasping statistics
sleep $WAIT_STAT
mkdir -p $OUTPUT_DIR
echo "Grasping statistics for $EXCL_FILE ..."
./get_topology_stats.sh > $OUTPUT_DIR/StormLike.statistics

#killing the topology
echo "Killing the topology $EXCL_FILE ..."
$STORM_BIN/storm kill Exclamation
sleep $KILL_TIME
echo "Topology killed $EXCL_FILE ."

#grasping output of our MACE-like timestamp mechanism, and removing it for not spoiling further results
for blade in {1..10}
do
for zone in {1001..1022}
do
scp -P $zone squalldata@icdatasrv${blade}:/data/squall_zone/logs/worker* $OUTPUT_DIR/worker-${blade}-${zone}
ssh -p $zone squalldata@icdatasrv${blade} 'cd /data/squall_zone/logs; rm -rf worker*'
done
done
Loading

0 comments on commit 72ffc72

Please sign in to comment.