Added a LSTM full utterance training E2E test

ylxqll · Oct 1, 2015 · 86c2836 · 86c2836
1 parent d371fed
commit 86c2836
Show file tree

Hide file tree

Showing 15 changed files with 11,035 additions and 20 deletions.
diff --git a/CNTK.sln b/CNTK.sln
@@ -195,14 +195,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{5F733BBA-F
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{19EE975B-232D-49F0-94C7-6F1C6424FB53}"
 	ProjectSection(SolutionItems) = preProject
-		Tests\Speech\LSTM\baseline.cpu.txt = Tests\Speech\LSTM\baseline.cpu.txt
-		Tests\Speech\LSTM\baseline.gpu.txt = Tests\Speech\LSTM\baseline.gpu.txt
-		Tests\Speech\LSTM\baseline.windows.cpu.txt = Tests\Speech\LSTM\baseline.windows.cpu.txt
-		Tests\Speech\LSTM\baseline.windows.gpu.txt = Tests\Speech\LSTM\baseline.windows.gpu.txt
 		Tests\Speech\LSTM\cntk.config = Tests\Speech\LSTM\cntk.config
 		Tests\Speech\LSTM\lstmp-3layer_WithSelfStab.ndl = Tests\Speech\LSTM\lstmp-3layer_WithSelfStab.ndl
-		Tests\Speech\LSTM\run-test = Tests\Speech\LSTM\run-test
-		Tests\Speech\LSTM\testcases.yml = Tests\Speech\LSTM\testcases.yml
 	EndProjectSection
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ParseConfig", "MachineLearning\ParseConfig\ParseConfig.vcxproj", "{7C4E77C9-6B17-4B02-82C1-DB62EEE2635B}"
@@ -334,6 +328,26 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Kaldi2Reader", "Kaldi2Reade
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SequenceTraining", "MachineLearning\SequenceTraining\SequenceTraining.vcxproj", "{EAD17188-072C-4726-B840-A769C36DAD1B}"
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Truncated", "Truncated", "{88F85A64-105D-4CDA-8199-B7A312FC8A27}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\Speech\LSTM\Truncated\baseline.cpu.txt = Tests\Speech\LSTM\Truncated\baseline.cpu.txt
+		Tests\Speech\LSTM\Truncated\baseline.gpu.txt = Tests\Speech\LSTM\Truncated\baseline.gpu.txt
+		Tests\Speech\LSTM\Truncated\baseline.windows.cpu.txt = Tests\Speech\LSTM\Truncated\baseline.windows.cpu.txt
+		Tests\Speech\LSTM\Truncated\baseline.windows.gpu.txt = Tests\Speech\LSTM\Truncated\baseline.windows.gpu.txt
+		Tests\Speech\LSTM\Truncated\run-test = Tests\Speech\LSTM\Truncated\run-test
+		Tests\Speech\LSTM\Truncated\testcases.yml = Tests\Speech\LSTM\Truncated\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUtterance", "{8241108A-7824-4FF2-BECA-7521A9D89DCF}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\Speech\LSTM\FullUtterance\baseline.cpu.txt = Tests\Speech\LSTM\FullUtterance\baseline.cpu.txt
+		Tests\Speech\LSTM\FullUtterance\baseline.gpu.txt = Tests\Speech\LSTM\FullUtterance\baseline.gpu.txt
+		Tests\Speech\LSTM\FullUtterance\baseline.windows.cpu.txt = Tests\Speech\LSTM\FullUtterance\baseline.windows.cpu.txt
+		Tests\Speech\LSTM\FullUtterance\baseline.windows.gpu.txt = Tests\Speech\LSTM\FullUtterance\baseline.windows.gpu.txt
+		Tests\Speech\LSTM\FullUtterance\run-test = Tests\Speech\LSTM\FullUtterance\run-test
+		Tests\Speech\LSTM\FullUtterance\testcases.yml = Tests\Speech\LSTM\FullUtterance\testcases.yml
+	EndProjectSection
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Win32 = Debug|Win32
@@ -503,5 +517,7 @@ Global
 		{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
 		{C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601}
 		{EAD17188-072C-4726-B840-A769C36DAD1B} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
+		{88F85A64-105D-4CDA-8199-B7A312FC8A27} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
+		{8241108A-7824-4FF2-BECA-7521A9D89DCF} = {19EE975B-232D-49F0-94C7-6F1C6424FB53}
 	EndGlobalSection
 EndGlobal
diff --git a/Tests/Speech/LSTM/FullUtterance/baseline.cpu.txt b/Tests/Speech/LSTM/FullUtterance/baseline.cpu.txt
diff --git a/Tests/Speech/LSTM/FullUtterance/baseline.gpu.txt b/Tests/Speech/LSTM/FullUtterance/baseline.gpu.txt
diff --git a/Tests/Speech/LSTM/FullUtterance/baseline.windows.cpu.txt b/Tests/Speech/LSTM/FullUtterance/baseline.windows.cpu.txt
diff --git a/Tests/Speech/LSTM/FullUtterance/baseline.windows.gpu.txt b/Tests/Speech/LSTM/FullUtterance/baseline.windows.gpu.txt
diff --git a/Tests/Speech/LSTM/FullUtterance/run-test b/Tests/Speech/LSTM/FullUtterance/run-test
@@ -0,0 +1,29 @@
+#!/bin/bash
+if [ "$TEST_DEVICE" == "cpu" ]; then
+  CNTK_DEVICE_ID=-1
+elif [ "$TEST_DEVICE" == "gpu" ]; then
+  CNTK_DEVICE_ID=0
+else
+  echo "Error: Unknown TEST_DEVICE specified!"
+  exit 3
+fi
+
+configFile=$TEST_DIR/../cntk.config
+RunDir=$TEST_RUN_DIR
+DataDir=$TEST_DATA_DIR
+NDLDir=$TEST_DIR/..
+
+if [ "$OS" == "Windows_NT" ]; then
+  # When running on cygwin translating /cygdrive/xxx paths to proper windows paths:
+  configFile=$(cygpath -aw $configFile)
+  RunDir=$(cygpath -aw $RunDir)
+  DataDir=$(cygpath -aw $DataDir)
+  NDLDir=$(cygpath -aw $NDLDir)
+fi
+
+CNTK_ARGS="configFile=$configFile RunDir=$RunDir DataDir=$DataDir DeviceId=$CNTK_DEVICE_ID NDLDir=$NDLDir Truncated=false speechTrain=[reader=[nbruttsineachrecurrentiter=1]] speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]] speechTrain=[SGD=[numMBsToShowResult=1]]"
+MODELS_DIR=$TEST_RUN_DIR/models
+[ -d $MODELS_DIR ] && rm -rf $MODELS_DIR
+mkdir -p $MODELS_DIR || exit $?
+echo === Running $TEST_CNTK_BINARY $CNTK_ARGS
+$TEST_CNTK_BINARY $CNTK_ARGS || exit $?
diff --git a/Tests/Speech/LSTM/testcases.yml → ...s/Speech/LSTM/FullUtterance/testcases.yml b/Tests/Speech/LSTM/testcases.yml → ...s/Speech/LSTM/FullUtterance/testcases.yml
@@ -1,4 +1,4 @@
-dataDir: ../Data
+dataDir: ../../Data
 tags:
      # running on every BVT job in 'L' (LSTM) leg in Debug-GPU and Release-CPU configurations:
      - bvt-l  (flavor=='debug') ^ (device=='cpu')
@@ -19,14 +19,14 @@ testCases:
   Epochs must be finished with expected results:
     patterns:
       - ^Finished Epoch[{{integer}} of {{integer}}]
-      - TrainLossPerSample = {{float,tolerance=1%}}
-      - EvalErrPerSample = {{float,tolerance=1%}}
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErrPerSample = {{float,tolerance=.1%}}
       - Ave LearnRatePerSample = {{float,tolerance=0%}}
 
   Per-minibatch training results must match:
     patterns:
       - ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} of {{integer}}]
       - SamplesSeen = {{integer}}
-      - TrainLossPerSample = {{float,tolerance=1%}}
-      - EvalErr[0]PerSample = {{float,tolerance=1%}}
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErr[0]PerSample = {{float,tolerance=.1%}}
 
diff --git a/Tests/Speech/LSTM/baseline.cpu.txt → Tests/Speech/LSTM/Truncated/baseline.cpu.txt b/Tests/Speech/LSTM/baseline.cpu.txt → Tests/Speech/LSTM/Truncated/baseline.cpu.txt
diff --git a/Tests/Speech/LSTM/baseline.gpu.txt → Tests/Speech/LSTM/Truncated/baseline.gpu.txt b/Tests/Speech/LSTM/baseline.gpu.txt → Tests/Speech/LSTM/Truncated/baseline.gpu.txt
diff --git a/Tests/Speech/LSTM/baseline.windows.cpu.txt → ...h/LSTM/Truncated/baseline.windows.cpu.txt b/Tests/Speech/LSTM/baseline.windows.cpu.txt → ...h/LSTM/Truncated/baseline.windows.cpu.txt
diff --git a/Tests/Speech/LSTM/baseline.windows.gpu.txt → ...h/LSTM/Truncated/baseline.windows.gpu.txt b/Tests/Speech/LSTM/baseline.windows.gpu.txt → ...h/LSTM/Truncated/baseline.windows.gpu.txt
diff --git a/Tests/Speech/LSTM/run-test → Tests/Speech/LSTM/Truncated/run-test b/Tests/Speech/LSTM/run-test → Tests/Speech/LSTM/Truncated/run-test
@@ -8,10 +8,10 @@ else
   exit 3
 fi
 
-configFile=$TEST_DIR/cntk.config
+configFile=$TEST_DIR/../cntk.config
 RunDir=$TEST_RUN_DIR
 DataDir=$TEST_DATA_DIR
-NDLDir=$TEST_DIR
+NDLDir=$TEST_DIR/..
 
 if [ "$OS" == "Windows_NT" ]; then
   # When running on cygwin translating /cygdrive/xxx paths to proper windows paths:

diff --git a/Tests/Speech/LSTM/Truncated/testcases.yml b/Tests/Speech/LSTM/Truncated/testcases.yml
@@ -0,0 +1,32 @@
+dataDir: ../../Data
+tags:
+     # running on every BVT job in 'L' (LSTM) leg in Debug-GPU and Release-CPU configurations:
+     - bvt-l  (flavor=='debug') ^ (device=='cpu')
+     # running unconditionally on every Nightly job in 'L' leg
+     - nightly-l
+
+testCases:
+  CNTK Run must be completed:
+    patterns:
+      - ^COMPLETED
+
+  Must train epochs in exactly same order and parameters:
+    patterns:
+      - ^Starting Epoch {{integer}}
+      - learning rate per sample = {{float}}
+      - momentum = {{float}}
+
+  Epochs must be finished with expected results:
+    patterns:
+      - ^Finished Epoch[{{integer}} of {{integer}}]
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErrPerSample = {{float,tolerance=.1%}}
+      - Ave LearnRatePerSample = {{float,tolerance=0%}}
+
+  Per-minibatch training results must match:
+    patterns:
+      - ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} of {{integer}}]
+      - SamplesSeen = {{integer}}
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErr[0]PerSample = {{float,tolerance=.1%}}
+
diff --git a/Tests/Speech/QuickE2E/testcases.yml b/Tests/Speech/QuickE2E/testcases.yml
@@ -19,14 +19,14 @@ testCases:
   Epochs must be finished with expected results:
     patterns:
       - ^Finished Epoch[{{integer}} of {{integer}}]
-      - TrainLossPerSample = {{float,tolerance=1%}}
-      - EvalErrPerSample = {{float,tolerance=1%}}
-      - Ave LearnRatePerSample = {{float,tolerance=1%}}
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErrPerSample = {{float,tolerance=.1%}}
+      - Ave LearnRatePerSample = {{float,tolerance=0%}}
 
   Per-minibatch training results must match:
     patterns:
       - ^ Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} of {{integer}}]
       - SamplesSeen = {{integer}}
-      - TrainLossPerSample = {{float,tolerance=1%}}
-      - EvalErr[0]PerSample = {{float,tolerance=1%}}
+      - TrainLossPerSample = {{float,tolerance=.1%}}
+      - EvalErr[0]PerSample = {{float,tolerance=.1%}}
 
diff --git a/Tests/Speech/README.txt b/Tests/Speech/README.txt
@@ -21,10 +21,15 @@ bin/cntk configFile=Tests/Speech/QuickE2E/cntk.config RunDir=Tests/Speech/RunDir
 
 # TODO: can stderr refer to RunDir?
 
---- LSTM:
+--- LSTM\Truncated:
 
 WORKING DIR: $(SolutionDir)Tests\Speech\Data
-COMMAND:     configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config  stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\models\cntkSpeech.dnn.log  RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM  NdlDir=$(SolutionDir)Tests\Speech\LSTM  DataDir=$(SolutionDir)Tests\Speech\Data  DeviceId=Auto
+COMMAND:     configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config  stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\Truncated\models\cntkSpeech.dnn.log  RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM\Truncated  NdlDir=$(SolutionDir)Tests\Speech\LSTM  DataDir=$(SolutionDir)Tests\Speech\Data  DeviceId=Auto
+
+--- LSTM\FullUtterance:
+
+WORKING DIR: $(SolutionDir)Tests\Speech\Data
+COMMAND:     configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config  stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\FullUtterance\models\cntkSpeech.dnn.log  RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM\FullUtterance  NdlDir=$(SolutionDir)Tests\Speech\LSTM  DataDir=$(SolutionDir)Tests\Speech\Data  DeviceId=Auto Truncated=false speechTrain=[reader=[nbruttsineachrecurrentiter=1]] speechTrain=[SGD=[epochSize=2560]] speechTrain=[SGD=[maxEpochs=2]]  speechTrain=[SGD=[numMBsToShowResult=1]]
 
 --- MNIST: