Skip to content

Commit

Permalink
Fix two perf problems
Browse files Browse the repository at this point in the history
1. On Linux, use ACML MP as default ACML library.
2. For the parallel end-to-end tests, limit concurrency based on
   available hardware threads.
  • Loading branch information
mahilleb-msft committed Nov 5, 2015
1 parent e31960e commit 88026d5
Show file tree
Hide file tree
Showing 8 changed files with 22 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ endif
ifeq ("$(MATHLIB)","acml")
INCLUDEPATH += $(ACML_PATH)/include
LIBPATH += $(ACML_PATH)/lib
LIBS += -lacml -lm -lpthread
LIBS += -lacml_mp -liomp5 -lm -lpthread
CPPFLAGS += -DUSE_ACML
endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

ConfigDir=$TEST_DIR/../..
LogFileName=stderr
Instances=4
NumCPUThreads=$(threadsPerInstance $Instances)

# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun "-n 4" SimpleMultiGPU.config "precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]"
cntkmpirun "-n $Instances" SimpleMultiGPU.config "numCPUThreads=$NumCPUThreads precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]"
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

ConfigDir=$TEST_DIR/../..
LogFileName=stderr
Instances=4
NumCPUThreads=$(threadsPerInstance $Instances)

# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun "-n 4" SimpleMultiGPU.config "precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]"
cntkmpirun "-n $Instances" SimpleMultiGPU.config "numCPUThreads=$NumCPUThreads precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]"
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
Expand Down
2 changes: 0 additions & 2 deletions Tests/Speech/DNN/DiscriminativePreTraining/run-test
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/bin/bash

#!/bin/bash

. $TEST_ROOT_DIR/run-test-common

# cntkrun <CNTK config file name> <additional CNTK args>
Expand Down
4 changes: 3 additions & 1 deletion Tests/Speech/DNN/Parallel1BitQuantization/run-test
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

ConfigDir=$TEST_DIR/..
LogFileName=stderr
Instances=3
NumCPUThreads=$(threadsPerInstance $Instances)

# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun "-n 3" cntk.config "precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
cntkmpirun "-n $Instances" cntk.config "numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
Expand Down
4 changes: 3 additions & 1 deletion Tests/Speech/DNN/ParallelNoQuantization/run-test
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

ConfigDir=$TEST_DIR/..
LogFileName=stderr
Instances=3
NumCPUThreads=$(threadsPerInstance $Instances)

# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun "-n 3" cntk.config
cntkmpirun "-n $Instances" cntk.config "numCPUThreads=$NumCPUThreads"
ExitCode=$?
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
Expand Down
8 changes: 8 additions & 0 deletions Tests/run-test-common
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ cntkrun()
return $?
}

# Given number of instances, return number of hardware threads we can use per
# instance
threadsPerInstance()
{
local threads=$((`nproc` / $1))
[[ $threads -eq 0 ]] && echo 1 || echo $threads
}

# Function for launching a parallel CNTK run with MPI
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
cntkmpirun()
Expand Down
2 changes: 1 addition & 1 deletion configure
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ mathlib=
default_path_list="/usr /usr/local /opt /opt/local"

# List from best to worst choice
default_acmls="acml5.3.1/ifort64"
default_acmls="acml5.3.1/ifort64_mp"
default_mkls=""

# NOTE: Will get compilation errors with cuda-6.0
Expand Down

0 comments on commit 88026d5

Please sign in to comment.