updated ExperimentalHtkmlfReader/LSTM test w.r.t. the stabilizer test

ivannp · May 6, 2016 · 2b60c41 · 2b60c41
1 parent 5203d32
commit 2b60c41
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 13 deletions.
diff --git a/CNTK.sln b/CNTK.sln
@@ -1126,6 +1126,29 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "G2P", "G2P", "{4AD12278-970
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Source\Extensibility\CPPEvalClient\CPPEvalClient.vcxproj", "{578D52A0-3928-4405-A016-F016E8B49031}"
 EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ExperimentalHtkmlfReader", "ExperimentalHtkmlfReader", "{977ECCB7-598D-4548-B95B-BACA9CC7D98B}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DNN", "DNN", "{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "LSTM", "LSTM", "{772A0DB3-4710-4281-8AA9-A9F1F7C543D3}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "QuickE2E", "QuickE2E", "{FE3592CF-3EB9-4502-BB95-E2AB974C0FB5}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SVD", "SVD", "{BA6A65C5-92A2-4040-ADC3-0727A45694F6}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "FullUtterance", "FullUtterance", "{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4}"
+	ProjectSection(SolutionItems) = preProject
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.cpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.cpu.txt
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.gpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.gpu.txt
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.cpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.cpu.txt
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.gpu.txt = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\baseline.windows.gpu.txt
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\cntk.cntk = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\cntk.cntk
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\run-test = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\run-test
+		Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\testcases.yml = Tests\EndToEndTests\Speech\ExperimentalHtkmlfReader\LSTM\FullUtterance\testcases.yml
+	EndProjectSection
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Truncated", "Truncated", "{1141DC61-E014-4DEC-9157-F6B1FC055C7A}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@@ -1526,5 +1549,12 @@ Global
 		{85A05261-41D0-41DF-80B5-ADB6ABB54632} = {A1521DC4-C8EC-47BD-9E63-7BE30ED2EC26}
 		{4AD12278-9705-4BBA-B2C3-D6D5856AADC3} = {85A05261-41D0-41DF-80B5-ADB6ABB54632}
 		{578D52A0-3928-4405-A016-F016E8B49031} = {60F87E25-BC87-4782-8E20-1621AAEBB113}
+		{977ECCB7-598D-4548-B95B-BACA9CC7D98B} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}
+		{1DBB2575-F5C8-43F4-B982-D05D6ADC2F9B} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
+		{772A0DB3-4710-4281-8AA9-A9F1F7C543D3} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
+		{FE3592CF-3EB9-4502-BB95-E2AB974C0FB5} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
+		{BA6A65C5-92A2-4040-ADC3-0727A45694F6} = {977ECCB7-598D-4548-B95B-BACA9CC7D98B}
+		{3BDF52CD-7F3C-42BC-AB78-CF5BBC5F4AB4} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
+		{1141DC61-E014-4DEC-9157-F6B1FC055C7A} = {772A0DB3-4710-4281-8AA9-A9F1F7C543D3}
 	EndGlobalSection
 EndGlobal
diff --git a/Source/Math/GPUTensor.cu b/Source/Math/GPUTensor.cu
@@ -549,8 +549,11 @@ static void LaunchTensorOpWithReduction(ElemType beta, array<ElemType*, N> point
     SyncGuard syncGuard;
 
     // do some optimization for reductions
+    //  - example: 30 GPU procs, warp size 32 --> 960 GPU cores
+    //  - NN elements must be computed, each involving a reduction over reductionDim elements
     // Cases:
-    //  - #output elements >= GPU procs  -->  use one proc per element, do reduction in inner loop
+    //  - #output elements NN >= GPU cores  -->  use one proc per element, do reduction in inner loop
+    //    E.g. if >960 elements are computed, each gets its own GPU thread.
     //  - reduction dimension fits into a single kernel  -->  launch it that way
     //  - reduction dimension requires multiple kernels  -->  use atomic add, to avoid temp mem alloc
     //     - PlusNode: reducing to a bias for small matrices

diff --git a/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/FullUtterance/cntk.cntk b/Tests/EndToEndTests/Speech/ExperimentalHtkmlfReader/LSTM/FullUtterance/cntk.cntk
@@ -90,6 +90,14 @@ speechTrain = [
         featNorm = MeanVarNorm(feashift)
 
         // we define the LSTM locally for now, since the one in CNTK.core.bs has a slightly changed configuration that breaks this test
+        Stabilize (x, enabled=true) =
+            if enabled
+            then [
+                beta = Exp (BS.Parameters.BiasParam ((1))) # init value is 0
+                result = beta .* x
+            ].result
+            else x
+
         LSTMP (outputDim, cellDim=outputDim, x, inputDim=x.dim, prevState, enableSelfStabilization=false) =
         [
             _privateInnards = [       // encapsulate the inner workings
@@ -99,9 +107,9 @@ speechTrain = [
                 // parameter macros--these carry their own weight matrices
                 B() = BS.Parameters.BiasParam (cellDim)
 
-                W(v) = BS.Parameters.WeightParam (cellDim, inputDim)  * BS.Parameters.Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
-                H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * BS.Parameters.Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
-                C(c) = BS.Parameters.DiagWeightParam (cellDim)       .* BS.Parameters.Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
+                W(v) = BS.Parameters.WeightParam (cellDim, inputDim)  * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
+                H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
+                C(c) = BS.Parameters.DiagWeightParam (cellDim)       .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)
 
                 // note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
                 it = Sigmoid (W(x) + B() + H(dh) + C(dc))          // input gate(t)
@@ -121,7 +129,7 @@ speechTrain = [
             h = if outputDim != cellDim     // output/hidden state
                 then [                      // project
                     Wmr = BS.Parameters.WeightParam (outputDim, cellDim);
-                    htp = Wmr * BS.Parameters.Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
+                    htp = Wmr * Stabilize (_privateInnards.ht, enabled=enableSelfStabilization)
                 ].htp         // TODO: ^^ extend BS syntax to allow to say: then [ Wmr = WeightParam(outputDim, cellDim) ] in Wmr * Stabilize (...)
                 else _privateInnards.ht     // no projection
             dim = outputDim
@@ -135,7 +143,7 @@ speechTrain = [
         ].lstmState // we return the state record (h,c)
 
         // define the stack of hidden LSTM layers  --TODO: change to RecurrentLSTMPStack(), change stabilizer config
-        S(x) = BS.Parameters.Stabilize (x, enabled=useSelfStabilization)
+        S(x) = Stabilize (x, enabled=useSelfStabilization)
         LSTMoutput[k:1..numLSTMLayers] =
             if k == 1
             then /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (featNorm),        inputDim=baseFeatDim, enableSelfStabilization=useSelfStabilization).h
@@ -158,15 +166,18 @@ speechTrain = [
         #ce = CrossEntropyWithSoftmax(labels, z, tag='criterion')
         # ^^^ PROBABLY OUTDATED
 
-        useExplicitCriterion = false
-        crNode = CrossEntropyWithSoftmax(labels, z)                 // this is the objective, as a node
-        crExplicit = -(ReduceSum (labels .* LogSoftmax (z)))        // manually-defined per-sample objective
-        ce = Pass (if useExplicitCriterion then crExplicit else crNode, tag='criterion')
+        #ce = CrossEntropyWithSoftmax(labels, z, tag='criterion') // this is the objective, as a node
+        #err = ErrorPrediction(labels, z, tag='evaluation')       // this also gets tracked
 
-        err = ErrorPrediction(labels, z, tag='evaluation')          // this also gets tracked
+        # this shows how both CE and frame error rate can be constructed as BS expressions
+        # BUGBUG: The per-sample criterion will trigger a bug fix in momentum computation
+        # which leads to a slightly better objective value than the baseline.
+        # For now, we will use SumElements() to neutralize this. Once we have a chance to update
+        # the baselines, we should remove SumElements() below.
+        ce  = /*Pass*/ SumElements (ReduceLogSum (z) - TransposeTimes (labels,          z),  tag='criterion')  // manually-defined per-sample objective
+        err = /*Pass*/ SumElements (BS.Constants.One - TransposeTimes (labels, Hardmax (z)), tag='evaluation') // also track frame errors
 
         // decoding
         logPrior = LogPrior(labels)	 
-        ScaledLogLikelihood = Pass (z - logPrior, tag='output')     // using Pass() since we can't assign a tag to x - y
-    ]
+        ScaledLogLikelihood = Pass (z - logPrior, tag='output') // using Pass() since we can't assign a tag to x - y
 ]