From b5b1c13d6822bc1df44c47f456e3e6b10f15d6a0 Mon Sep 17 00:00:00 2001 From: Yu Date: Fri, 17 Jul 2015 17:48:03 -0400 Subject: [PATCH 1/2] Fix error when setting perMB momentum. --- MachineLearning/CNTK/SGD.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/MachineLearning/CNTK/SGD.h b/MachineLearning/CNTK/SGD.h index 59d4f0efe263..aaae08cc3d1f 100644 --- a/MachineLearning/CNTK/SGD.h +++ b/MachineLearning/CNTK/SGD.h @@ -143,6 +143,7 @@ class SGD : ComputationNetworkHelper { ConfigArray learningRatesPerMBStr = configSGD("learningRatesPerMB", ""); m_needToNormalizeLRByParallUtterance = false; + m_needToNormalizeMomentumByParallUtterance = false; floatargvector learningRatesPerMB = learningRatesPerMBStr; ConfigArray learningRatesPerSampleStr = configSGD("learningRatesPerSample", ""); @@ -437,6 +438,8 @@ class SGD : ComputationNetworkHelper } m_momentumPerSample[i] = (float)pow(momentumPerMB[i], 1.0 / m_mbSize[i]); } + + m_needToNormalizeMomentumByParallUtterance = true; } else { @@ -770,6 +773,15 @@ class SGD : ComputationNetworkHelper x /= trainSetDataReader->NumberSlicesInEachRecurrentIter(); } } + + // first, we need to normalize the effect of nbruttsineachrecurrentiter for momemtum + if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) + { + for (auto& x : m_momentumPerSample) + { + x = (float)pow(x, 1.0 / trainSetDataReader->NumberSlicesInEachRecurrentIter()); + } + } bool learnRateInitialized = false; if (startEpoch > 0) @@ -857,6 +869,7 @@ class SGD : ComputationNetworkHelper INT32 mySamples = (INT32) #endif size_t chosenMinibatchSize; + size_t actualMinibatchSize; // Through the command line or config file the user can set minibatch sizes on a per epoch // basis for a set number of epochs. For epochs after that point, m_mbSize.size(), either @@ -884,10 +897,15 @@ class SGD : ComputationNetworkHelper { // use the explicitly set minibatch size chosenMinibatchSize = m_mbSize[i]; + if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) + { + actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->NumberSlicesInEachRecurrentIter(); + } + } fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n", - i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], chosenMinibatchSize)); + i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize)); TrainOneEpoch(net, refNet, @@ -2310,6 +2328,7 @@ class SGD : ComputationNetworkHelper // only true when the user specify LearningRatePerMB and the number of parallel utterances in Reader > 1 bool m_needToNormalizeLRByParallUtterance; + bool m_needToNormalizeMomentumByParallUtterance; intargvector m_mbSize; From 329d3bc6fdbcdd9542fb245e6b0cf80e6656f481 Mon Sep 17 00:00:00 2001 From: Yu Date: Fri, 17 Jul 2015 19:26:24 -0400 Subject: [PATCH 2/2] Fix the bug for print correct momentum value. --- MachineLearning/CNTK/SGD.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/MachineLearning/CNTK/SGD.h b/MachineLearning/CNTK/SGD.h index aaae08cc3d1f..37f5795515cc 100644 --- a/MachineLearning/CNTK/SGD.h +++ b/MachineLearning/CNTK/SGD.h @@ -897,12 +897,15 @@ class SGD : ComputationNetworkHelper { // use the explicitly set minibatch size chosenMinibatchSize = m_mbSize[i]; - if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) - { - actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->NumberSlicesInEachRecurrentIter(); - } - } + + actualMinibatchSize = chosenMinibatchSize; + if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) + { + actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->NumberSlicesInEachRecurrentIter(); + } + + fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n", i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize));