Skip to content

Commit

Permalink
fix an issue in run_intelcaffe.sh for single node; print message in s…
Browse files Browse the repository at this point in the history
…tdout; set MLSL environment if MLSL_ROOT is not defined; update next available ID in caffe.proto
  • Loading branch information
fzou1 committed Nov 10, 2017
1 parent 9a565d6 commit 4e5556e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 19 deletions.
4 changes: 2 additions & 2 deletions scripts/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ function usage
echo " Parameters:"
echo " topology: network topology used to benchmark, support alexnet, googlenet, googlenet_v2, resnet_50"
echo " , by specifying it as 'all', we run all supported topologies."
echo " host_file: host_file needed in multinodes mode, should contain list of nodes ips or hostnames"
echo " hostfile: host_file needed in multinodes mode, should contain list of nodes ips or hostnames"
echo " network: opa(default), tcp, used in multinodes mode to specify the network type"
echo " netmask: only used if network is tcp, set as the net work card name within your network"
echo ""
Expand Down Expand Up @@ -112,7 +112,7 @@ function run_specific_model
else
result_log_file="result-unknown-${model}-`date +%Y%m%d%H%M%S`.log"
fi
$exec_command > $result_log_file 2>&1
$exec_command 2>&1 | tee $result_log_file
obtain_intelcaffe_log $result_log_file
calculate_images_per_second $intelcaffe_log_file
}
Expand Down
23 changes: 15 additions & 8 deletions scripts/run_intelcaffe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,18 @@ function run_caffe
execute_command "$xeonbin" $result_dir
}

function test_ssh_connection
{
host_file_=$1
if [ "$host_file_" != "" ]; then
host_list=( `cat $host_file_ | sort | uniq ` )
for host in ${host_list[@]}
do
hostname=`ssh $host "hostname"`
ssh $hostname "ls"
done
fi
}

if [ $# -le 1 ]; then
usage
Expand Down Expand Up @@ -473,6 +485,9 @@ if [[ $host_file != "" ]]; then
fi
echo " Number of nodes: $numnodes"

# test connection between nodes via ssh
test_ssh_connection $host_file

detect_cpu

set_numa_node
Expand All @@ -488,14 +503,6 @@ if [ "$host_file" != "" ]; then
fi
source ${script_dir}/set_env.sh $env_params

# test connection between nodes via ssh
host_list=( `cat $host_file | sort | uniq ` )
for host in ${host_list[@]}
do
hostname=`ssh $host "hostname"`
ssh $hostname "ls"
done


if [ "${benchmark_mode}" != "none" ]; then
run_benchmark
Expand Down
22 changes: 14 additions & 8 deletions scripts/set_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ tcp_netmask=""

# specify number of MLSL ep servers in command
num_mlsl_servers=-1

numservers=0

function init_mpi_envs
{
Expand Down Expand Up @@ -104,15 +104,21 @@ function clear_envs

function set_mlsl_vars
{
if [ ${numnodes} -eq 1 ]; then
return
fi

if [ -z $MLSL_ROOT ]; then
# use built-in mlsl if nothing is specified in ini
mlslvars_sh=`find external/mlsl/ -name mlslvars.sh`
source $mlslvars_sh
fi

if [ ${num_mlsl_servers} -eq -1 ]; then
if [ ${numnodes} -eq 1 ]; then
numservers=0
if [ "${cpu_model}" == "bdw" ] || [ "${cpu_model}" == "skx" ]; then
numservers=2
else
if [ "${cpu_model}" == "bdw" ] || [ "${cpu_model}" == "skx" ]; then
numservers=2
else
numservers=4
fi
numservers=4
fi
else
numservers=$((num_mlsl_servers))
Expand Down
2 changes: 1 addition & 1 deletion src/caffe/proto/caffe.proto
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ message SolverBatchSizePair {
// NOTE
// Update the next available ID when you add a new SolverParameter field.
//
// SolverParameter next available ID: 50 (last added: warm_up_start_lr)
// SolverParameter next available ID: 52 (last added: local_gw_ratio)
message SolverParameter {
//////////////////////////////////////////////////////////////////////////////
// Specifying the train and test networks
Expand Down

0 comments on commit 4e5556e

Please sign in to comment.