Skip to content

Commit

Permalink
Merge "add weights for supporting SSD; add num_omp_threads for specif…
Browse files Browse the repository at this point in the history
…ying NO of openmp threads; fix issues in script"
  • Loading branch information
fzou1 authored and Gerrit Code Review committed Nov 23, 2017
2 parents db59f82 + 905f4e5 commit 260967f
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 15 deletions.
58 changes: 45 additions & 13 deletions scripts/run_intelcaffe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ snapshot=""
# parameters for training
solver_file=""

# weights for finetuning
weight_file=""

# number of OpenMP threads
num_omp_threads=0

# specify engine for running caffe
engine="MKLDNN"

Expand All @@ -49,6 +55,7 @@ function usage
script_name=$0
echo "Usage:"
echo " $script_name [--hostfile host_file] [--solver solver_file]"
echo " [--weights weight_file] [--num_omp_threads num_omp_threads]"
echo " [--network opa/tcp] [--netmask tcp_netmask] [--debug on/off]"
echo " [--mode train/time/none] [--benchmark all/qperf/mpi/none]"
echo " [--iteration iter] [--model_file deploy.prototxt]"
Expand All @@ -59,8 +66,10 @@ function usage
echo " [--mpibench_param mpibench_param]"
echo ""
echo " Parameters:"
echo " host: host file includes list of nodes. Only used when you're running multinodes mode"
echo " hostfile: host file includes list of nodes. Only used if you're running with multinode"
echo " solver: need to be specified a solver file if mode is train"
echo " weight_file: weight file for finetuning"
echo " num_omp_threads: number of OpenMP threads"
echo " network: opa(default), tcp"
echo " netmask: only used if network is tcp"
echo " debug: off(default). MLSL debug information is outputed if it's on"
Expand Down Expand Up @@ -229,9 +238,11 @@ function run_mpi_bench
return
fi

mpi_iter=10
max_msglog=29
xeonbin="$mpibench_bin $mpibench_param"
if [ "$mpibench_bin" == "IMB-MPI1" ] || [ "$mpibench_bin" == "IMB-NBC" ]; then
xeonbin+=" -msglog 29 -iter 10 -iter_policy off"
xeonbin+=" -msglog $max_msglog -iter $mpi_iter -iter_policy off"
fi

mpibench_bin_bname=`basename $mpibench_bin`
Expand Down Expand Up @@ -302,6 +313,9 @@ function run_caffe
if [ "${snapshot}" != "" ]; then
xeonbin+=" --snapshot=${snapshot}"
fi
if [ "${weight_file}" != "" ]; then
xeonbin+=" --weights ${weight_file}"
fi
fi

execute_command "$xeonbin" $result_dir
Expand All @@ -315,7 +329,9 @@ function test_ssh_connection
for host in ${host_list[@]}
do
hostname=`ssh $host "hostname"`
ssh $hostname "ls"
# prompt user to input password and no password should be
# needed in the following commands
ssh $hostname "ls" >/dev/null
done
fi
}
Expand All @@ -330,7 +346,7 @@ function get_model_fname
function check_lmdb_files
{
model_file_=$1

lmdb_dirs=(ilsvrc12_train_lmdb ilsvrc12_val_lmdb)
is_missing_lmdb=0
for lmdb_dir in "${lmdb_dirs[@]}"
Expand Down Expand Up @@ -414,6 +430,14 @@ do
snapshot=$2
shift
;;
--weights)
weight_file=$2
shift
;;
--num_omp_threads)
num_omp_threads=$2
shift
;;
--engine)
engine=$2
shift
Expand Down Expand Up @@ -443,6 +467,8 @@ do
shift
done

detect_cpu

echo ""
echo "CPUs with optimal settings:"
for ((i=0; i<${#cpu_list[@]}; i++))
Expand All @@ -451,6 +477,7 @@ do
done
echo ""
echo "Settings:"
echo " CPU: $cpu_model"
echo " Host file: $host_file"
echo " Running mode: $mode"
echo " Benchmark: $benchmark_mode"
Expand Down Expand Up @@ -502,13 +529,13 @@ if [ "$mode" == "time" ]; then
fi

# check source data exists
grep "backend" $model_file | grep -i "LMDB" >/dev/null
if [ $? -eq 0 ]; then
check_lmdb_files $model_file
if [ "$model_file" != "" ]; then
grep "backend" $model_file | grep -i "LMDB" >/dev/null
if [ $? -eq 0 ]; then
check_lmdb_files $model_file
fi
fi

exit 0

echo " Network: $network"
if [ "$network" == "tcp" ]; then
if [ "$tcp_netmask" == "" ]; then
Expand All @@ -533,19 +560,24 @@ echo " Number of nodes: $numnodes"
# test connection between nodes via ssh
test_ssh_connection $host_file

detect_cpu

set_numa_node

if [ ! -d $result_dir ]; then
echo "Create result directory: $result_dir"
mkdir -p $result_dir
fi

env_params="--debug $debug --network $network --netmask $tcp_netmask --num_mlsl_servers $num_mlsl_servers"
env_params="--cpu $cpu_model --debug $debug --network $network --num_mlsl_servers $num_mlsl_servers"
if [ "$network" == "tcp" ]; then
env_params+=" --netmask $tcp_netmask"
fi
if [ "$host_file" != "" ]; then
env_params+=" --hostfile $host_file"
fi
fi
if [ ${num_omp_threads} -ne 0 ]; then
env_params+=" --num_omp_threads ${num_omp_threads}"
fi

source ${script_dir}/set_env.sh $env_params

if [ "${benchmark_mode}" != "none" ]; then
Expand Down
31 changes: 29 additions & 2 deletions scripts/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ debug="off"
# it's assigned by detect_cpu
cpu_model="skx"

# number of OpenMP threads
num_omp_threads=0

nodenames=""
# a list of nodes
host_file=""
Expand Down Expand Up @@ -157,9 +160,21 @@ function set_openmp_envs
# OMP configuration
# For multinodes
if [ ${numnodes} -gt 1 ]; then
reserved_cores=0
if [ ${num_omp_threads} -ne 0 ]; then
if [ $numthreads_per_proc -lt $num_omp_threads ]; then
echo "Too large number of OpenMP thread: $num_omp_threads"
echo " should be less than or equal to $numthreads_per_proc"
exit 1
fi
let reserved_cores=numthreads_per_proc-num_omp_threads
echo "Reserve number of cores: $reserved_cores"
let numthreads_per_proc=${num_omp_threads}
fi

export OMP_NUM_THREADS=${numthreads_per_proc}
export KMP_HW_SUBSET=1t
affinitystr="proclist=[0-5,$((5+numservers+1))-$((maxcores-1))],granularity=thread,explicit"
affinitystr="proclist=[0-5,$((5+numservers+reserved_cores+1))-$((maxcores-1))],granularity=thread,explicit"
else
# For single node only set for KNM
if [ "${cpu_model}" == "knm" ]; then
Expand All @@ -170,6 +185,8 @@ function set_openmp_envs
fi
fi
export KMP_AFFINITY=$affinitystr

echo "Number of OpenMP threads: ${numthreads_per_proc}"
}

function set_env_vars
Expand Down Expand Up @@ -205,6 +222,14 @@ do
num_mlsl_servers=$2
shift
;;
--cpu)
cpu_model=$2
shift
;;
--num_omp_threads)
num_omp_threads=$2
shift
;;
*)
echo "Unknown option: $key"
usage
Expand All @@ -222,8 +247,10 @@ if [[ $host_file != "" ]]; then
echo "Error: empty host file! Exit."
exit 0
fi
numnodes=${#nodenames[@]}
else
nodenames=(`hostname`)
fi
numnodes=${#nodenames[@]}
echo " Number of nodes: $numnodes"

clear_envs
Expand Down

0 comments on commit 260967f

Please sign in to comment.