From 19f6653a267a7e191448c520e4b877d166f7601c Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 08:08:23 -0700 Subject: [PATCH 01/44] (fixed another #include order in MatrixQuantizerGPU.h) --- CNTK.sln | 26 +++++++++----------------- Math/Math/MatrixQuantizerGPU.h | 2 +- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/CNTK.sln b/CNTK.sln index 8c6f069f492e..4e364b16dab1 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.30324.0 +VisualStudioVersion = 12.0.21005.1 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}" ProjectSection(ProjectDependencies) = postProject @@ -171,9 +171,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CheckInSuites", "CheckInSui EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SparsePCReader", "DataReader\SparsePCReader\SparsePCReader.vcxproj", "{CE429AA2-3778-4619-8FD1-49BA3B81197B}" - ProjectSection(ProjectDependencies) = postProject - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} - EndProjectSection EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Speech", "Speech", "{C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8}" ProjectSection(SolutionItems) = preProject @@ -335,10 +332,13 @@ Global HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution - {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} {6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68} - {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94} {668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68} {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68} {DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68} @@ -348,30 +348,22 @@ Global {E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94} {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94} {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68} - {B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94} {9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94} {014DA766-B37B-4581-BC26-963EA5507931} = {33EBFE78-A1A8-4961-8938-92A271941F94} {D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601} + {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94} {065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601} + {3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601} {98D2C32B-0C1F-4E19-A626-65F7BA4600CF} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {EA67F51F-1FE8-462D-9F3E-01161685AD59} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {DE1A06BA-EC5C-4E0D-BCA8-3EA555310C58} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {63024704-A2D7-497E-AD4B-5C10C6AA1374} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {F9BEB27E-8AF5-464E-8D45-0000D5AFA2D3} = {EA67F51F-1FE8-462D-9F3E-01161685AD59} {889C1CCF-92B3-450B-B00D-FC9A9D5BE464} = {EA67F51F-1FE8-462D-9F3E-01161685AD59} - {DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68} - {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68} {4BBF2950-3DBD-469A-AD57-6CACBEBAF541} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} {5F733BBA-FE83-4668-8F83-8B0E78A36619} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} {19EE975B-232D-49F0-94C7-6F1C6424FB53} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} - {7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68} - {928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68} {6D1353D6-F196-466F-B886-F16D48759B20} = {5E666C53-2D82-49C9-9127-3FDDC321C741} {B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741} {B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884} diff --git a/Math/Math/MatrixQuantizerGPU.h b/Math/Math/MatrixQuantizerGPU.h index 058b75eaf132..9247bcbf1ce5 100644 --- a/Math/Math/MatrixQuantizerGPU.h +++ b/Math/Math/MatrixQuantizerGPU.h @@ -1,7 +1,7 @@ #pragma once -#include "QuantizedMatrix.h" #include "MatrixQuantizer.h" +#include "QuantizedMatrix.h" #include "ColumnQuantizer.h" #include "GPUMatrix.h" #ifndef CPUONLY From 3451b660b652d6c432d511ff5bfd286663965edc Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 09:48:25 -0700 Subject: [PATCH 02/44] renamed MBLayout::IsEmpty() to IsAllNone() (a unique name), to be able to identify easily where it is used --- .../CNTKComputationNetworkLib/ComputationNetwork.h | 3 ++- .../CNTKComputationNetworkLib/ComputationNode.h | 2 +- .../CNTKComputationNetworkLib/TrainingCriterionNodes.h | 3 ++- Math/Math/Matrix.h | 7 ++++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 41dd769b2089..dfb819948d5e 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -238,9 +238,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // TODO: describe what this function does //this is a temp solution since some nodes such as plus can be just aggregate of two scalar values //in which case the packing info is not available (and not meaningful) for them + // TODO: Does this belong into MBLayout? size_t GetNumSamplesWithLabel(const size_t numAllSamples) { - if (!m_pMBLayout->IsEmpty()) + if (!m_pMBLayout->IsAllNone()) { size_t numTimeSteps = m_pMBLayout->GetNumFrames(); size_t numSequences = m_pMBLayout->GetNumStreams(); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 56f35641ab62..61e3441d1553 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -900,7 +900,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed - if (m_pMBLayout && !m_pMBLayout->IsEmpty()) + if (m_pMBLayout && !m_pMBLayout->IsAllNone()) { size_t nT = matrixToBeMasked.GetNumCols(); size_t nS = m_pMBLayout->GetNumStreams(); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 84b325617eb9..1e99bfc168c0 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -1090,11 +1090,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labels */ + // TODO: This has overlap with ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(), should call that instead. bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t t) { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed - if (m_pMBLayout && !m_pMBLayout->IsEmpty()) + if (m_pMBLayout && !m_pMBLayout->IsAllNone()) { // 't' is not a time but rather a column index that encodes (time stamp, stream) size_t nS = m_pMBLayout->GetNumStreams(); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index eb5733b3f1a7..cef4d3143489 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -213,7 +213,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { void NormalGrad(Matrix& gradients, Matrix& functionValues, const ElemType learnRatePerSample, const ElemType momentum); ElemType Adagrad(Matrix& gradients, const bool needAveMultiplier); ElemType RmsProp(Matrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier); - + + // TODO: should Reshape() return a new Matrix object that contains a reference to the original? void Reshape(const size_t numRows, const size_t numCols); void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 10000, bool growOnly = true); //by default we only reallocate if need to grow /// similarly to the repmat operation in matlab or octave @@ -597,10 +598,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { // these accessors were for now just collected from actual usage; need to be cleaned up once this compiles again size_t GetNumFrames() const { validate(); return m_sentenceBoundaryFlags.GetNumCols(); } - size_t GetNumStreams() const { return IsEmpty() ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix + size_t GetNumStreams() const { return IsAllNone() ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix size_t GetSize() const { validate(); return m_minibatchPackingFlags.size(); } // ^^ TODO: add a check whether Size() == GetNumFrames(); it really should, unless I misunderstood - bool IsEmpty() const { validate(); return m_minibatchPackingFlags.empty(); } + bool IsAllNone() const { validate(); return m_minibatchPackingFlags.empty(); } #if 0 // we have this pattern often: // TODO: mbSize and #slices must also move into MBLayout evalnet->SetActualMiniBatchSize(mbSize); From 4fe273dc3dc5fb7b514c3d9f67032e3a4d7205fd Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 10:01:06 -0700 Subject: [PATCH 03/44] (changed some more ComputationNode:: to Base::) --- .../CNTKComputationNetworkLib/ComputationNode.h | 6 ++---- .../CNTKComputationNetworkLib/RecurrentNodes.h | 4 ++-- .../CNTKComputationNetworkLib/TrainingCriterionNodes.h | 10 +++++----- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 61e3441d1553..45abce8277c6 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -896,9 +896,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labele */ - bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) + bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) const { - bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed + bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed if (m_pMBLayout && !m_pMBLayout->IsAllNone()) { @@ -908,8 +908,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (m_pMBLayout->GetSize() != nT / nS) LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. "); - //Matrix colSeg(m_pMBLayout->m_sentenceBoundaryFlags.GetDeviceId()); - size_t startT = (timeIdxInSeq == (size_t)-1) ? 0 : timeIdxInSeq * nS; // TODO: misnomer; startT, endT, and utt_t are not times but columns in the packed matrix size_t endT = (timeIdxInSeq == (size_t)-1) ? nT : timeIdxInSeq * nS + nS; for (size_t utt_t = startT; utt_t < endT; utt_t += nS) diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index ed1fdfc28d15..c038ecc2e31b 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -1105,7 +1105,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix::Multiply(state.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() - nsamples, nsamples), false, colSeg, false, newPrevState); } - ComputationNode::SetToInitStateValueForResetSeg(sentenceBegin->ColumnSlice(utt_t, 1), nStream, initStateValue, newPrevState); + Base::SetToInitStateValueForResetSeg(sentenceBegin->ColumnSlice(utt_t, 1), nStream, initStateValue, newPrevState); slicePrevOutput.ColumnSlice(0, nsamples).SetValue(newPrevOutput); slicePrevState.ColumnSlice(0, nsamples).SetValue(newPrevState); @@ -1354,7 +1354,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //boundary.ColumnSlice(0, 1).SetValue(((int) MinibatchPackingFlags::SequenceStart)); //minibatchPackingFlags[1] = MinibatchPackingFlags::SequenceStart; pMBLayout->Set(0, 1, MinibatchPackingFlags::SequenceStart); // TODO: strange--start at frame[1] instead of [0]? - ComputationNode::ResetBound(pMBLayout); + Base::ResetBound(pMBLayout); f0 = Inputs(0)->FunctionValues(); f1 = Inputs(1)->FunctionValues(); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 1e99bfc168c0..67e0613a7dc9 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -174,7 +174,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { ComputeInputPartialRight(m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues()); - ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(Inputs(inputIndex)->GradientValues()); + Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(inputIndex)->GradientValues()); } } @@ -503,7 +503,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void EvaluateThisNode() { - ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); + Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues()); } @@ -599,7 +599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void EvaluateThisNode() { - ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); + Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues()); } @@ -1090,8 +1090,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labels */ - // TODO: This has overlap with ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(), should call that instead. - bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t t) + // TODO: This has overlap with ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(), should call that instead. Note: This one does only one stream, while Base:: one does all streams. + bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t t) const { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed From 960e0fe64411d0880f88347d43476f1be79958a8 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Fri, 18 Sep 2015 12:15:19 -0700 Subject: [PATCH 04/44] Add Kaldi sources to the VS solution --- CNTK.sln | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/CNTK.sln b/CNTK.sln index 8c6f069f492e..20b96006ee31 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.30324.0 +VisualStudioVersion = 12.0.31101.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}" ProjectSection(ProjectDependencies) = postProject @@ -255,6 +255,90 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePr Tests\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml = Tests\ParallelTraining\NoQuantization\DoublePrecision\testcases.yml EndProjectSection EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "KaldiReader", "KaldiReader", "{3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4}" + ProjectSection(SolutionItems) = preProject + DataReader\KaldiReader\basetypes.h = DataReader\KaldiReader\basetypes.h + DataReader\KaldiReader\basetypes.old.h = DataReader\KaldiReader\basetypes.old.h + DataReader\KaldiReader\biggrowablevectors.h = DataReader\KaldiReader\biggrowablevectors.h + DataReader\KaldiReader\chunkevalsource.h = DataReader\KaldiReader\chunkevalsource.h + DataReader\KaldiReader\DataReader.cpp = DataReader\KaldiReader\DataReader.cpp + DataReader\KaldiReader\DataWriter.cpp = DataReader\KaldiReader\DataWriter.cpp + DataReader\KaldiReader\dllmain.cpp = DataReader\KaldiReader\dllmain.cpp + DataReader\KaldiReader\fileutil.cpp = DataReader\KaldiReader\fileutil.cpp + DataReader\KaldiReader\fileutil.h = DataReader\KaldiReader\fileutil.h + DataReader\KaldiReader\fileutil.old.h = DataReader\KaldiReader\fileutil.old.h + DataReader\KaldiReader\htkfeatio.h = DataReader\KaldiReader\htkfeatio.h + DataReader\KaldiReader\HTKMLFReader.cpp = DataReader\KaldiReader\HTKMLFReader.cpp + DataReader\KaldiReader\HTKMLFReader.h = DataReader\KaldiReader\HTKMLFReader.h + DataReader\KaldiReader\HTKMLFWriter.cpp = DataReader\KaldiReader\HTKMLFWriter.cpp + DataReader\KaldiReader\HTKMLFWriter.h = DataReader\KaldiReader\HTKMLFWriter.h + DataReader\KaldiReader\latticearchive.cpp = DataReader\KaldiReader\latticearchive.cpp + DataReader\KaldiReader\latticearchive.h = DataReader\KaldiReader\latticearchive.h + DataReader\KaldiReader\latticestorage.h = DataReader\KaldiReader\latticestorage.h + DataReader\KaldiReader\minibatchiterator.h = DataReader\KaldiReader\minibatchiterator.h + DataReader\KaldiReader\minibatchsourcehelpers.h = DataReader\KaldiReader\minibatchsourcehelpers.h + DataReader\KaldiReader\msra_mgram.h = DataReader\KaldiReader\msra_mgram.h + DataReader\KaldiReader\numahelpers.h = DataReader\KaldiReader\numahelpers.h + DataReader\KaldiReader\pplhelpers.h = DataReader\KaldiReader\pplhelpers.h + DataReader\KaldiReader\readaheadsource.h = DataReader\KaldiReader\readaheadsource.h + DataReader\KaldiReader\rollingwindowsource.h = DataReader\KaldiReader\rollingwindowsource.h + DataReader\KaldiReader\simple_checked_arrays.h = DataReader\KaldiReader\simple_checked_arrays.h + DataReader\KaldiReader\simplesenonehmm.h = DataReader\KaldiReader\simplesenonehmm.h + DataReader\KaldiReader\simplethread.h = DataReader\KaldiReader\simplethread.h + DataReader\KaldiReader\ssefloat4.h = DataReader\KaldiReader\ssefloat4.h + DataReader\KaldiReader\ssematrix.h = DataReader\KaldiReader\ssematrix.h + DataReader\KaldiReader\stdafx.cpp = DataReader\KaldiReader\stdafx.cpp + DataReader\KaldiReader\stdafx.h = DataReader\KaldiReader\stdafx.h + DataReader\KaldiReader\targetver.h = DataReader\KaldiReader\targetver.h + DataReader\KaldiReader\utterancesource.h = DataReader\KaldiReader\utterancesource.h + DataReader\KaldiReader\utterancesourcemulti.h = DataReader\KaldiReader\utterancesourcemulti.h + EndProjectSection +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Kaldi2Reader", "Kaldi2Reader", "{C70E1572-20FF-496C-A0A9-10AA6755A07C}" + ProjectSection(SolutionItems) = preProject + DataReader\Kaldi2Reader\basetypes.h = DataReader\Kaldi2Reader\basetypes.h + DataReader\Kaldi2Reader\biggrowablevectors.h = DataReader\Kaldi2Reader\biggrowablevectors.h + DataReader\Kaldi2Reader\chunkevalsource.h = DataReader\Kaldi2Reader\chunkevalsource.h + DataReader\Kaldi2Reader\DataReader.cpp = DataReader\Kaldi2Reader\DataReader.cpp + DataReader\Kaldi2Reader\DataWriter.cpp = DataReader\Kaldi2Reader\DataWriter.cpp + DataReader\Kaldi2Reader\dllmain.cpp = DataReader\Kaldi2Reader\dllmain.cpp + DataReader\Kaldi2Reader\DOCUMENTATION.txt = DataReader\Kaldi2Reader\DOCUMENTATION.txt + DataReader\Kaldi2Reader\fileutil.cpp = DataReader\Kaldi2Reader\fileutil.cpp + DataReader\Kaldi2Reader\fileutil.h = DataReader\Kaldi2Reader\fileutil.h + DataReader\Kaldi2Reader\htkfeatio.h = DataReader\Kaldi2Reader\htkfeatio.h + DataReader\Kaldi2Reader\htkfeatio_utils.h = DataReader\Kaldi2Reader\htkfeatio_utils.h + DataReader\Kaldi2Reader\HTKMLFReader.cpp = DataReader\Kaldi2Reader\HTKMLFReader.cpp + DataReader\Kaldi2Reader\HTKMLFReader.h = DataReader\Kaldi2Reader\HTKMLFReader.h + DataReader\Kaldi2Reader\HTKMLFWriter.cpp = DataReader\Kaldi2Reader\HTKMLFWriter.cpp + DataReader\Kaldi2Reader\HTKMLFWriter.h = DataReader\Kaldi2Reader\HTKMLFWriter.h + DataReader\Kaldi2Reader\kaldi.h = DataReader\Kaldi2Reader\kaldi.h + DataReader\Kaldi2Reader\KaldiSequenceTrainingDerivative.cpp = DataReader\Kaldi2Reader\KaldiSequenceTrainingDerivative.cpp + DataReader\Kaldi2Reader\KaldiSequenceTrainingDerivative.h = DataReader\Kaldi2Reader\KaldiSequenceTrainingDerivative.h + DataReader\Kaldi2Reader\latticearchive.cpp = DataReader\Kaldi2Reader\latticearchive.cpp + DataReader\Kaldi2Reader\latticearchive.h = DataReader\Kaldi2Reader\latticearchive.h + DataReader\Kaldi2Reader\latticestorage.h = DataReader\Kaldi2Reader\latticestorage.h + DataReader\Kaldi2Reader\minibatchiterator.h = DataReader\Kaldi2Reader\minibatchiterator.h + DataReader\Kaldi2Reader\minibatchsourcehelpers.h = DataReader\Kaldi2Reader\minibatchsourcehelpers.h + DataReader\Kaldi2Reader\msra_mgram.h = DataReader\Kaldi2Reader\msra_mgram.h + DataReader\Kaldi2Reader\notes.txt = DataReader\Kaldi2Reader\notes.txt + DataReader\Kaldi2Reader\numahelpers.h = DataReader\Kaldi2Reader\numahelpers.h + DataReader\Kaldi2Reader\pplhelpers.h = DataReader\Kaldi2Reader\pplhelpers.h + DataReader\Kaldi2Reader\readaheadsource.h = DataReader\Kaldi2Reader\readaheadsource.h + DataReader\Kaldi2Reader\rollingwindowsource.h = DataReader\Kaldi2Reader\rollingwindowsource.h + DataReader\Kaldi2Reader\simple_checked_arrays.h = DataReader\Kaldi2Reader\simple_checked_arrays.h + DataReader\Kaldi2Reader\simplesenonehmm.h = DataReader\Kaldi2Reader\simplesenonehmm.h + DataReader\Kaldi2Reader\simplethread.h = DataReader\Kaldi2Reader\simplethread.h + DataReader\Kaldi2Reader\ssefloat4.h = DataReader\Kaldi2Reader\ssefloat4.h + DataReader\Kaldi2Reader\ssematrix.h = DataReader\Kaldi2Reader\ssematrix.h + DataReader\Kaldi2Reader\stdafx.cpp = DataReader\Kaldi2Reader\stdafx.cpp + DataReader\Kaldi2Reader\stdafx.h = DataReader\Kaldi2Reader\stdafx.h + DataReader\Kaldi2Reader\targetver.h = DataReader\Kaldi2Reader\targetver.h + DataReader\Kaldi2Reader\UtteranceDerivativeBuffer.cpp = DataReader\Kaldi2Reader\UtteranceDerivativeBuffer.cpp + DataReader\Kaldi2Reader\UtteranceDerivativeBuffer.h = DataReader\Kaldi2Reader\UtteranceDerivativeBuffer.h + DataReader\Kaldi2Reader\UtteranceDerivativeComputationInterface.h = DataReader\Kaldi2Reader\UtteranceDerivativeComputationInterface.h + DataReader\Kaldi2Reader\utterancesourcemulti.h = DataReader\Kaldi2Reader\utterancesourcemulti.h + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -376,5 +460,7 @@ Global {B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741} {B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884} {A4884465-CFBB-4A64-A9DE-690E1A63EF7E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884} + {3E9C89B1-C045-4F42-92B2-F9FFFFC2DBD4} = {39E42C4B-A078-4CA4-9D92-B883D8129601} + {C70E1572-20FF-496C-A0A9-10AA6755A07C} = {39E42C4B-A078-4CA4-9D92-B883D8129601} EndGlobalSection EndGlobal From af7409309cea8b2a065de4eb7fc23df9c87ed89e Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 16:04:20 -0700 Subject: [PATCH 05/44] temporarily renamed m_nbrSlicesInEachRecurrentIteration to the distinct m_nbrSlicesInEachRecurrentIterationx, as this is supposed to disappear soon; GetActualMBSize() is now const, and renamem to DetermineActualMBSizeFromFeatures() --- .../ComputationNetwork.cpp | 2 +- .../ComputationNetwork.h | 58 ++++++++++--------- MachineLearning/CNTKSGDLib/MultiNetworksSGD.h | 9 ++- MachineLearning/CNTKSGDLib/SGD.cpp | 7 ++- MachineLearning/CNTKSGDLib/SimpleEvaluator.h | 29 ++++++---- .../CNTKSGDLib/SimpleOutputWriter.h | 4 +- Math/Math/Matrix.h | 8 ++- 7 files changed, 66 insertions(+), 51 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index c4492746c1d8..d5b08827e64e 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -193,7 +193,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList"); - size_t actualMBSize = GetActualMBSize(); + size_t actualMBSize = DetermineActualMBSizeFromFeatures(); SetActualMiniBatchSize(actualMBSize); if (requireValidation) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index dfb819948d5e..4454f83499fc 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -80,7 +80,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb m_randomSeedOffset = 0; m_actMiniBSize = 0; SetDeviceId(deviceId); - m_nbrSlicesInEachRecurrentIteration = 1; + m_nbrSlicesInEachRecurrentIterationx = 1; } virtual ~ComputationNetwork() @@ -199,7 +199,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // evaluation // ----------------------------------------------------------------------- - size_t GetActualMBSize() + // determine the actual MB size from the feature nodes + // This returns max number of columns over the feature nodes. + // Note that if we have multiple slices, MB size != #frames. + size_t DetermineActualMBSizeFromFeatures() const { size_t actualMBSize = 0; @@ -555,7 +558,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) (*nodeIter)->SetFunctionAndGradientSize(m_actMiniBSize); - int iMBSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIteration; + int iMBSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIterationx; if (m_recurrentInfo[iLoopId].m_isForwardLoop) { @@ -595,9 +598,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { // checks that will disappear once we complete the refactoring. If this passes for a while, we will eliminate one // If this fails, comment this out (it is safe) and tell fseide@microsoft.com. - if (m_pMBLayout && m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumStreams()) + if (m_nbrSlicesInEachRecurrentIterationx != m_pMBLayout->GetNumStreams()) LogicError("Evaluate: detected that m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumStreams()"); - if (m_pMBLayout && m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()) + if (m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()) LogicError("Evaluate: detected that m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()"); // prepare to compute with the subnetwork that this rootNode depends on, including @@ -622,7 +625,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { // TODO: nbrSlices set once to the same value for all nodes each evaluation--is it ever changed later? - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIteration); + (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); if ((*nodeIter)->ReqMultiSeqHandling()) (*nodeIter)->ResetBound(m_pMBLayout); } @@ -650,7 +653,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb } } - void SetActualMiniBatchSize(const size_t aSize, vector* featNodes = nullptr) + // resize entire network to handle a given MB size + // TODO: Is this always called with the result of DetermineActualMBSizeFromFeatures()? Why would it ever not? + void SetActualMiniBatchSize(const size_t aSize) { m_actMiniBSize = (int) aSize; @@ -664,24 +669,22 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (int i = 0; i < m_recurrentInfo.size(); i++) for (auto nodeIter = m_recurrentInfo[i].m_recurrentNodes.begin(); nodeIter != m_recurrentInfo[i].m_recurrentNodes.end(); nodeIter++) (*nodeIter)->SetFunctionAndGradientSize(m_actMiniBSize); - - if (featNodes) - { - for (auto ptr = featNodes->begin(); ptr != featNodes->end(); ptr++) - { - size_t nr = (*ptr)->GetNumRows(); - (*ptr)->Resize(nr, aSize); - } - } } // GetMaxMBSize - Get the maximum minibatch size that will be seen in a training run - // returns the result from SetActualMiniBatchSize(). Note GetActualMBSize() also exists but returns a value derived from the inputs dimensions + // returns the result from SetActualMiniBatchSize(). Note DetermineActualMBSizeFromFeatures() also exists but returns a value derived from the inputs dimensions size_t GetMaxMBSize() { return m_actMiniBSize; } + // always called in this pattern: +#if 0 + evalnet->SetActualMiniBatchSize(mbSize); + evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); + dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); + // well... most of the time. Not in TrainOneEpoch(). +#endif void SetActualNbrSlicesInEachRecurentIteration(const size_t aSize) { - m_nbrSlicesInEachRecurrentIteration = aSize; + m_nbrSlicesInEachRecurrentIterationx = aSize; } void ComputeGradientLoop(std::list& /*allNodes*/, const ComputationNodeBasePtr startNode) @@ -692,14 +695,14 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { if (m_recurrentInfo[iLoopId].m_completedGradient == false) { - int mbSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIteration; + int mbSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIterationx; if (m_recurrentInfo[iLoopId].m_isForwardLoop) { for (int timeIndex = mbSize - 1; timeIndex >= 0; timeIndex--) { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIteration); // TODO: move to FrameRange object + (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); // TODO: move to FrameRange object (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -710,7 +713,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIteration); + (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -856,9 +859,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb return m_learnableParameters[rootNode]; } - inline std::vector & FeatureNodes() { return m_features; } - inline std::vector & LabelNodes() { return m_labels; } - inline std::vector & FinalCriterionNodes() { return m_finalCriteria; } + inline std::vector & FeatureNodes() { return m_features; } + inline const std::vector & FeatureNodes() const { return m_features; } + inline std::vector & LabelNodes() { return m_labels; } + inline std::vector & FinalCriterionNodes() { return m_finalCriteria; } inline std::vector CriterionNodesFrom(const wstring & criterionNodeName) { @@ -1101,7 +1105,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb if (!allowFragment) FormRecurrentLoops(node); PrintComputationTree(node, false); - size_t actualMBSize = this->GetActualMBSize(); + size_t actualMBSize = this->DetermineActualMBSizeFromFeatures(); this->SetActualMiniBatchSize(actualMBSize); ValidateSubNetwork(node); } @@ -1273,7 +1277,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { if (!allowFragment) FormRecurrentLoops(node); - size_t actualMBSize = this->GetActualMBSize(); + size_t actualMBSize = this->DetermineActualMBSizeFromFeatures(); this->SetActualMiniBatchSize(actualMBSize); if (!UnitTest(node)) vErrors.push_back(node->NodeName().c_str()); @@ -1559,7 +1563,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb MBLayoutPtr m_pMBLayout; int m_actMiniBSize; - size_t m_nbrSlicesInEachRecurrentIteration; + size_t m_nbrSlicesInEachRecurrentIterationx; // main node holder std::map m_nameToNodeMap; // [name] -> node; this is the main container that holds this networks' nodes diff --git a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h index 9199c0a0254a..499230767b45 100644 --- a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h +++ b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h @@ -880,7 +880,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!bContinueDecoding) break; - size_t actualMBSize = decoderNet->GetActualMBSize(); + size_t actualMBSize = decoderNet->DetermineActualMBSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but decoderNet reports no data read"); @@ -1157,7 +1157,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& localEpochEvalErrors ) { - size_t actualMBSize = encoderNet->GetActualMBSize(); + size_t actualMBSize = encoderNet->DetermineActualMBSizeFromFeatures(); encoderNet->SetActualMiniBatchSize(actualMBSize); encoderNet->SetActualNbrSlicesInEachRecurentIteration(encoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); @@ -1165,13 +1165,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { encoderNet->Evaluate(encoderEvaluationNodes[0]); - actualMBSize = decoderNet->GetActualMBSize(); + actualMBSize = decoderNet->DetermineActualMBSizeFromFeatures(); decoderNet->SetActualMiniBatchSize(actualMBSize); decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); - - /// not the sentence begining, because the initial hidden layer activity is from the encoder network decoderTrainSetDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); + /// not the sentence begining, because the initial hidden layer activity is from the encoder network if (decoderCriterionNodes.size() == 0 && decoderEvaluationNodes.size() == 0) { diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index c555eae333d5..9775718f0de4 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -1306,7 +1306,7 @@ template ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - size_t actualMBSize = net.GetActualMBSize(); + size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); net.SetActualMiniBatchSize(actualMBSize); net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); @@ -1766,7 +1766,7 @@ template if (outputNodes.empty()) LogicError("no output node was found."); - size_t actualMBSize = net.GetActualMBSize(); + size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); net.SetActualMiniBatchSize(actualMBSize); net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); @@ -1943,7 +1943,7 @@ template } } - actualMBSize = net.GetActualMBSize(); + actualMBSize = net.DetermineActualMBSizeFromFeatures(); if (actualMBSize != 0) { nSamplesSinceLastModelSync += actualMBSize; @@ -1972,6 +1972,7 @@ template { refNet.SetActualMiniBatchSize(actualMBSize); refNet.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); + // TODO: not setting MBLayout? refNet.Evaluate(refNode); Matrix::ScaleAndAdd((ElemType)m_adaptationRegWeight, dynamic_pointer_cast>(refNode)->FunctionValues(), diff --git a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h index afad9252e16a..3f2b1445806c 100644 --- a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h +++ b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h @@ -127,7 +127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - actualMBSize = m_net.GetActualMBSize(); + actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); m_net.SetActualMiniBatchSize(actualMBSize); m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr()); @@ -445,7 +445,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { auto preader = dataReaders.begin(); for (auto ptr = nets.begin(); ptr != nets.end(); ptr++, preader++) { - actualMBSize = (*ptr)->GetActualMBSize(); + actualMBSize = (*ptr)->DetermineActualMBSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but encoderNet reports no data read"); @@ -460,10 +460,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { decoderNet = nets[iNumNets - 1]; /// not the sentence begining, because the initial hidden layer activity is from the encoder network - actualMBSize = decoderNet->GetActualMBSize(); - decoderNet->SetActualMiniBatchSize(actualMBSize); + actualMBSize = decoderNet->DetermineActualMBSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but decoderNet reports no data read"); + decoderNet->SetActualMiniBatchSize(actualMBSize); decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderDataReader->NumberSlicesInEachRecurrentIter()); decoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); @@ -663,10 +663,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (auto ptr = nets.begin(); ptr != nets.end() - 1; ptr++, ptrreader++) { /// evaluate on the encoder networks - actualMBSize = (*ptr)->GetActualMBSize(); + actualMBSize = (*ptr)->DetermineActualMBSizeFromFeatures(); - (*ptr)->SetActualMiniBatchSize(actualMBSize); mNutt = (*ptrreader)->NumberSlicesInEachRecurrentIter(); + (*ptr)->SetActualMiniBatchSize(actualMBSize); (*ptr)->SetActualNbrSlicesInEachRecurentIteration(mNutt); (*ptrreader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr()); @@ -771,7 +771,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - size_t actualMBSize = net.GetActualMBSize(); + size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); net.SetActualMiniBatchSize(actualMBSize); for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++) { @@ -846,7 +846,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - actualMBSize = m_net.GetActualMBSize(); + actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); m_net.SetActualMiniBatchSize(actualMBSize); vector best_path; @@ -904,7 +904,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { vector evalResults; size_t mbSize; - mbSize = evalnet->GetActualMBSize(); + mbSize = evalnet->DetermineActualMBSizeFromFeatures(); size_t maxMbSize = 2 * mbSize; /// use reader to initialize evalnet's sentence start information to let it know that this @@ -930,7 +930,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence - evalnet->SetActualMiniBatchSize(1, &featureNodes); + evalnet->SetActualMiniBatchSize(1/*, &featureNodes*/); + for (auto ptr = featureNodes.begin(); ptr != featureNodes.end(); ptr++) + { + size_t nr = (*ptr)->GetNumRows(); + (*ptr)->Resize(nr, 1); + } + dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); /// need to set the sentence begining segmentation info evalnet->GetMBLayoutPtr()->GetM().SetValue(((int) MinibatchPackingFlags::SequenceStart)); @@ -1067,9 +1073,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { maxMbSize = 2; #endif /// use reader to initialize evalnet's sentence start information to let it know that this - /// is the begining of sentence + /// is the beginning of sentence evalnet->SetActualMiniBatchSize(mbSize); evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); + // TODO: not setting MBLayout? clock_t start, now; start = clock(); diff --git a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h index fcce99d34e17..1fc6cc8bd9ea 100644 --- a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h +++ b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h @@ -72,7 +72,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - size_t actualMBSize = m_net.GetActualMBSize(); + size_t actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); // TODO: should this be dataReader.DetermineActualMBSizeFromFeatures()? m_net.SetActualMiniBatchSize(actualMBSize); m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader.NumberSlicesInEachRecurrentIter()); dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr()); @@ -154,7 +154,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - size_t actualMBSize = m_net.GetActualMBSize(); + size_t actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); m_net.SetActualMiniBatchSize(actualMBSize); dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr()); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index cef4d3143489..317e7387e6bf 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -515,11 +515,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef Matrix SingleMatrix; typedef Matrix DoubleMatrix; - // TODO: move this to an appropriate place and name it properly // MBLayout -- layout information of minibatch // Currently this is to bind the two somewhat inconsistent boundary flags and packing flags. - // Once that is unified, we can clean it up further. For now, it's just moving the data members. + // Once that is unified, we can clean it up further. For now, it's just moving the data members and encapsulating access to them where possible. // This should probably also contain m_actualNbrSlicesInEachRecIter (which should be node-dependent). + // TODO: move this to an appropriate place and name it properly + // NOTE: This class represents an abstraction of an originally distributed/code-duped way of defining and accessing the MB layout. + // The code below represents the actual use cases I encountered. Not all are, I believe, needed to be as they are; this class could be simplified/streamlined much further. + // Some wackiness below is explained by this. + // TODO: frame-randoized MBs are now represented as one stream of many frames. This is wrong; they should be one-frame utterances with many streams. Once we fully abstract out Data access, this can be changed easily. struct MBLayout { MBLayout() : m_sentenceBoundaryFlags(CPUDEVICE) { } From 37f39ac1efd0b9236e893d5797696e852e10890d Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 16:32:49 -0700 Subject: [PATCH 06/44] streamlined pattern { actualMBSize = DetermineActualMBSizeFromFeatures(); SetActualMiniBatchSize(actualMBSize); } to SetActualMiniBatchSizeFromFeatures() (keeps things a little more encapsulated. Unfortunately, there are still a few exceptions) --- .../ComputationNetwork.cpp | 3 +-- .../ComputationNetwork.h | 24 ++++++++++++------ MachineLearning/CNTKSGDLib/MultiNetworksSGD.h | 8 ++---- MachineLearning/CNTKSGDLib/SGD.cpp | 16 ++++++------ MachineLearning/CNTKSGDLib/SimpleEvaluator.h | 25 ++++++------------- .../CNTKSGDLib/SimpleOutputWriter.h | 6 ++--- 6 files changed, 36 insertions(+), 46 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index d5b08827e64e..960cf8c45d54 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -193,8 +193,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList"); - size_t actualMBSize = DetermineActualMBSizeFromFeatures(); - SetActualMiniBatchSize(actualMBSize); + SetActualMiniBatchSizeFromFeatures(); if (requireValidation) ValidateNetwork(); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 4454f83499fc..5bd1ba82bf11 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -654,6 +654,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb } // resize entire network to handle a given MB size + // TODO: actually it only updates nodes in m_recurrentInfo. Why? Because without recurrence, size never changes? // TODO: Is this always called with the result of DetermineActualMBSizeFromFeatures()? Why would it ever not? void SetActualMiniBatchSize(const size_t aSize) { @@ -666,18 +667,27 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb m_recurrentInfo[i].m_completedGradient = false; } + // resize function values and gradients of everything in m_recurrentInfo for (int i = 0; i < m_recurrentInfo.size(); i++) - for (auto nodeIter = m_recurrentInfo[i].m_recurrentNodes.begin(); nodeIter != m_recurrentInfo[i].m_recurrentNodes.end(); nodeIter++) - (*nodeIter)->SetFunctionAndGradientSize(m_actMiniBSize); + for (auto nodeIter : m_recurrentInfo[i].m_recurrentNodes) + nodeIter->SetFunctionAndGradientSize(m_actMiniBSize); + } + + // it is used this way most of the time + size_t SetActualMiniBatchSizeFromFeatures() + { + size_t aSize = DetermineActualMBSizeFromFeatures(); + SetActualMiniBatchSize(aSize); + return aSize; } // GetMaxMBSize - Get the maximum minibatch size that will be seen in a training run // returns the result from SetActualMiniBatchSize(). Note DetermineActualMBSizeFromFeatures() also exists but returns a value derived from the inputs dimensions size_t GetMaxMBSize() { return m_actMiniBSize; } - // always called in this pattern: #if 0 - evalnet->SetActualMiniBatchSize(mbSize); + // always called in this pattern: + evalnet->SetActualMiniBatchSizeFromFeatures(); evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); // well... most of the time. Not in TrainOneEpoch(). @@ -1105,8 +1115,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb if (!allowFragment) FormRecurrentLoops(node); PrintComputationTree(node, false); - size_t actualMBSize = this->DetermineActualMBSizeFromFeatures(); - this->SetActualMiniBatchSize(actualMBSize); + SetActualMiniBatchSizeFromFeatures(); ValidateSubNetwork(node); } } @@ -1277,8 +1286,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { if (!allowFragment) FormRecurrentLoops(node); - size_t actualMBSize = this->DetermineActualMBSizeFromFeatures(); - this->SetActualMiniBatchSize(actualMBSize); + this->SetActualMiniBatchSizeFromFeatures(); if (!UnitTest(node)) vErrors.push_back(node->NodeName().c_str()); } diff --git a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h index 499230767b45..2763b02a20f1 100644 --- a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h +++ b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h @@ -1157,17 +1157,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& localEpochEvalErrors ) { - size_t actualMBSize = encoderNet->DetermineActualMBSizeFromFeatures(); - - encoderNet->SetActualMiniBatchSize(actualMBSize); + encoderNet->SetActualMiniBatchSizeFromFeatures(); encoderNet->SetActualNbrSlicesInEachRecurentIteration(encoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); encoderTrainSetDataReader->CopyMBLayoutTo(encoderNet->GetMBLayoutPtr()); encoderNet->Evaluate(encoderEvaluationNodes[0]); - actualMBSize = decoderNet->DetermineActualMBSizeFromFeatures(); - - decoderNet->SetActualMiniBatchSize(actualMBSize); + decoderNet->SetActualMiniBatchSizeFromFeatures(); decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); decoderTrainSetDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); /// not the sentence begining, because the initial hidden layer activity is from the encoder network diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index 9775718f0de4..4adcf874fbaf 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -1306,8 +1306,7 @@ template ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); - net.SetActualMiniBatchSize(actualMBSize); + net.SetActualMiniBatchSizeFromFeatures(); net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); @@ -1766,8 +1765,7 @@ template if (outputNodes.empty()) LogicError("no output node was found."); - size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); - net.SetActualMiniBatchSize(actualMBSize); + net.SetActualMiniBatchSizeFromFeatures(); net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); net.Evaluate(outputNodes[0]); // Only evaluate the first output @@ -1943,23 +1941,23 @@ template } } - actualMBSize = net.DetermineActualMBSizeFromFeatures(); + actualMBSize = net.SetActualMiniBatchSizeFromFeatures(); if (actualMBSize != 0) { - nSamplesSinceLastModelSync += actualMBSize; - net.SetActualMiniBatchSize(actualMBSize); - net.SetActualNbrSlicesInEachRecurentIteration(nSlices); - if (!useDistributedMBReading && useParallelTrain && trainSetDataReader->RequireSentenceSeg()) { + net.SetActualNbrSlicesInEachRecurentIteration(nSlices); *net.GetMBLayoutPtr() = *pMBLayout; // TODO: ^^ we should just pass pointers; this current code is semantically identical to before the change to MBLayout } else { + net.SetActualNbrSlicesInEachRecurentIteration(nSlices); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); } + nSamplesSinceLastModelSync += actualMBSize; + ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); diff --git a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h index 3f2b1445806c..6306903b36b1 100644 --- a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h +++ b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h @@ -127,8 +127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); - m_net.SetActualMiniBatchSize(actualMBSize); + actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr()); @@ -445,11 +444,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { auto preader = dataReaders.begin(); for (auto ptr = nets.begin(); ptr != nets.end(); ptr++, preader++) { - actualMBSize = (*ptr)->DetermineActualMBSizeFromFeatures(); + actualMBSize = (*ptr)->SetActualMiniBatchSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but encoderNet reports no data read"); - - (*ptr)->SetActualMiniBatchSize(actualMBSize); (*ptr)->SetActualNbrSlicesInEachRecurentIteration((*preader)->NumberSlicesInEachRecurrentIter()); (*preader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr()); @@ -460,10 +457,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { decoderNet = nets[iNumNets - 1]; /// not the sentence begining, because the initial hidden layer activity is from the encoder network - actualMBSize = decoderNet->DetermineActualMBSizeFromFeatures(); + actualMBSize = decoderNet->SetActualMiniBatchSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but decoderNet reports no data read"); - decoderNet->SetActualMiniBatchSize(actualMBSize); decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderDataReader->NumberSlicesInEachRecurrentIter()); decoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); @@ -657,16 +653,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featNodes); } - auto ptrreader = readers.begin(); size_t mNutt = 0; for (auto ptr = nets.begin(); ptr != nets.end() - 1; ptr++, ptrreader++) { /// evaluate on the encoder networks - actualMBSize = (*ptr)->DetermineActualMBSizeFromFeatures(); + actualMBSize = (*ptr)->SetActualMiniBatchSizeFromFeatures(); mNutt = (*ptrreader)->NumberSlicesInEachRecurrentIter(); - (*ptr)->SetActualMiniBatchSize(actualMBSize); (*ptr)->SetActualNbrSlicesInEachRecurentIteration(mNutt); (*ptrreader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr()); @@ -771,12 +765,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - size_t actualMBSize = net.DetermineActualMBSizeFromFeatures(); - net.SetActualMiniBatchSize(actualMBSize); + net.SetActualMiniBatchSizeFromFeatures(); for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++) - { net.Evaluate(*nodeIter); - } //mark done for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++) @@ -846,8 +837,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); - m_net.SetActualMiniBatchSize(actualMBSize); + actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); vector best_path; @@ -930,7 +920,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence - evalnet->SetActualMiniBatchSize(1/*, &featureNodes*/); + evalnet->SetActualMiniBatchSize(1); for (auto ptr = featureNodes.begin(); ptr != featureNodes.end(); ptr++) { size_t nr = (*ptr)->GetNumRows(); @@ -1097,6 +1087,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence + // BUGBUG: This is almost certainly wrong; slice != MB size evalnet->SetActualMiniBatchSize(dataReader->NumberSlicesInEachRecurrentIter()); double best_score = -numeric_limits::infinity(); diff --git a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h index 1fc6cc8bd9ea..9adb3184c820 100644 --- a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h +++ b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h @@ -72,8 +72,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - size_t actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); // TODO: should this be dataReader.DetermineActualMBSizeFromFeatures()? - m_net.SetActualMiniBatchSize(actualMBSize); + size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader.NumberSlicesInEachRecurrentIter()); dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr()); @@ -154,8 +153,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - size_t actualMBSize = m_net.DetermineActualMBSizeFromFeatures(); - m_net.SetActualMiniBatchSize(actualMBSize); + size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr()); for (int i=0; i Date: Fri, 18 Sep 2015 17:35:07 -0700 Subject: [PATCH 07/44] changed { SetActualNbrSlicesInEachRecurentIteration(); CopyMBLayoutTo(); } to { CopyMBLayoutTo(); VerifyActualNumParallelSequences(); } and removed m_actualNbrSlicesInEachRecurentIteration; renamed ...NbrSlicesEachRecurrentIter to ...NumParallelSequences; also MBLayout::GetNumStreams() to GetNumTimeSteps() and GetNumFrames() to GetNumTimeSteps() --- Common/DataReader.cpp | 15 +++---- Common/Include/DataReader.h | 6 +-- DataReader/BinaryReader/BinaryReader.h | 5 +-- DataReader/DSSMReader/DSSMReader.h | 4 +- DataReader/HTKMLFReader/HTKMLFReader.cpp | 1 + DataReader/HTKMLFReader/HTKMLFReader.h | 4 +- .../LMSequenceReader/SequenceReader.cpp | 2 +- DataReader/LMSequenceReader/SequenceReader.h | 2 +- .../LUSequenceReader/LUSequenceReader.cpp | 14 +++---- .../LUSequenceReader/LUSequenceReader.h | 8 ++-- .../LibSVMBinaryReader/LibSVMBinaryReader.h | 4 +- DataReader/SparsePCReader/SparsePCReader.h | 4 +- DataReader/UCIFastReader/UCIFastReader.cpp | 2 +- DataReader/UCIFastReader/UCIFastReader.h | 4 +- .../ComputationNetwork.h | 42 ++++++++++++------- .../ComputationNode.h | 8 ++-- .../RecurrentNodes.h | 2 +- .../TrainingCriterionNodes.h | 2 +- MachineLearning/CNTKEval/EvalReader.h | 4 +- MachineLearning/CNTKSGDLib/MultiNetworksSGD.h | 4 +- MachineLearning/CNTKSGDLib/SGD.cpp | 29 ++++++------- MachineLearning/CNTKSGDLib/SimpleEvaluator.h | 27 ++++++------ .../CNTKSGDLib/SimpleOutputWriter.h | 5 ++- Math/Math/Matrix.h | 12 +++--- 24 files changed, 112 insertions(+), 98 deletions(-) diff --git a/Common/DataReader.cpp b/Common/DataReader.cpp index 81f61c5e513a..764a7ae1ac9b 100644 --- a/Common/DataReader.cpp +++ b/Common/DataReader.cpp @@ -110,7 +110,7 @@ DataReader::DataReader(const ConfigParameters& config) for (size_t i = 0; i < m_ioNames.size(); i++) { m_dataReader[m_ioNames[i]]->Init(m_configure[m_ioNames[i]]); - m_dataReader[m_ioNames[i]]->SetNbrSlicesEachRecurrentIter(mNbrUttPerMinibatch); + m_dataReader[m_ioNames[i]]->SetNumParallelSequences(mNbrUttPerMinibatch); } } @@ -191,9 +191,9 @@ bool DataReader::GetMinibatch(std::map* for (size_t i = 0; i < m_ioNames.size(); i++) { if (nbr > 0) - m_dataReader[m_ioNames[i]]->SetNbrSlicesEachRecurrentIter(nbr); + m_dataReader[m_ioNames[i]]->SetNumParallelSequences(nbr); bRet &= m_dataReader[m_ioNames[i]]->GetMinibatch(matrices); - thisNbr = m_dataReader[m_ioNames[i]]->NumberSlicesInEachRecurrentIter(); + thisNbr = m_dataReader[m_ioNames[i]]->GetNumParallelSequences(); if (nbr > 0 && thisNbr != nbr) LogicError("DataReader::GetMinibatch: The specified number of utterances per minibatch is not consistent to the actual number of utterances per minibatch"); nbr = thisNbr; @@ -202,16 +202,16 @@ bool DataReader::GetMinibatch(std::map* } template -size_t DataReader::NumberSlicesInEachRecurrentIter() +size_t DataReader::GetNumParallelSequences() { size_t nNbr = 0; for (size_t i = 0; i < m_ioNames.size(); i++) { IDataReader * ptr = m_dataReader[m_ioNames[i]]; if (nNbr == 0) - nNbr = ptr->NumberSlicesInEachRecurrentIter(); - if (nNbr != ptr->NumberSlicesInEachRecurrentIter()) - LogicError("NumberSlicesInEachRecurrentIter: number of slices in each minibatch not consistent for these streams"); + nNbr = ptr->GetNumParallelSequences(); + if (nNbr != ptr->GetNumParallelSequences()) + LogicError("GetNumParallelSequences: number of slices in each minibatch not consistent for these streams"); } return nNbr; } @@ -244,6 +244,7 @@ bool DataReader::GetProposalObs(std::map void DataReader::CopyMBLayoutTo(MBLayoutPtr pMBLayout) { + // BUGBUG: This copies all data reader's layout info on top of each other, keeping only the last one; likely not what was intended. for (size_t i = 0; i < m_ioNames.size(); i++) m_dataReader[m_ioNames[i]]->CopyMBLayoutTo(pMBLayout); } diff --git a/Common/Include/DataReader.h b/Common/Include/DataReader.h index 0901826b0039..9e7007071549 100644 --- a/Common/Include/DataReader.h +++ b/Common/Include/DataReader.h @@ -78,9 +78,9 @@ class DATAREADER_API IDataReader } virtual bool GetMinibatch(std::map*>& matrices) = 0; - virtual size_t NumberSlicesInEachRecurrentIter() = 0; + virtual size_t GetNumParallelSequences() = 0; virtual int GetSentenceEndIdFromOutputLabel() { return -1; }; - virtual void SetNbrSlicesEachRecurrentIter(const size_t sz) { mBlgSize = sz; }; + virtual void SetNumParallelSequences(const size_t sz) { mBlgSize = sz; }; virtual bool RequireSentenceSeg() { return false; }; virtual const std::map& GetLabelMapping(const std::wstring&) { NOT_IMPLEMENTED; }; virtual void SetLabelMapping(const std::wstring&, const std::map&) { NOT_IMPLEMENTED; }; @@ -198,7 +198,7 @@ class DataReader: public IDataReader, protected Plugin // returns - true if there are more minibatches, false if no more minibatchs remain virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter(); + size_t GetNumParallelSequences(); int GetSentenceEndIdFromOutputLabel(); // GetLabelMapping - Gets the label mapping from integer index to label type diff --git a/DataReader/BinaryReader/BinaryReader.h b/DataReader/BinaryReader/BinaryReader.h index 17061d5caf1a..1cb5c979394c 100644 --- a/DataReader/BinaryReader/BinaryReader.h +++ b/DataReader/BinaryReader/BinaryReader.h @@ -419,10 +419,9 @@ class BinaryReader : public IDataReader virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter() { return 1 ;} - void SetNbrSlicesEachRecurrentIter(const size_t) { }; + size_t GetNumParallelSequences() { return 1 ;} + void SetNumParallelSequences(const size_t) { }; void CopyMBLayoutTo(MBLayoutPtr) {}; - virtual const std::map& GetLabelMapping(const std::wstring& sectionName); virtual void SetLabelMapping(const std::wstring& sectionName, const std::map::LabelIdType, typename BinaryReader::LabelType>& labelMapping); virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0); diff --git a/DataReader/DSSMReader/DSSMReader.h b/DataReader/DSSMReader/DSSMReader.h index b6bf9059e7c2..679fb3012331 100644 --- a/DataReader/DSSMReader/DSSMReader.h +++ b/DataReader/DSSMReader/DSSMReader.h @@ -141,8 +141,8 @@ class DSSMReader : public IDataReader virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter() { return 1 ;} - void SetNbrSlicesEachRecurrentIter(const size_t) { }; + size_t GetNumParallelSequences() { return 1 ;} + void SetNumParallelSequences(const size_t) { }; void CopyMBLayoutTo(MBLayoutPtr) {}; virtual const std::map& GetLabelMapping(const std::wstring& sectionName); diff --git a/DataReader/HTKMLFReader/HTKMLFReader.cpp b/DataReader/HTKMLFReader/HTKMLFReader.cpp index 13b2131fe7ab..ec015a65e51d 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.cpp +++ b/DataReader/HTKMLFReader/HTKMLFReader.cpp @@ -1607,6 +1607,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!m_framemode) *pMBLayout = *m_pMBLayout; + // TODO: what about frame mode? Should we create a dummy one? Or Clear() it? Reader should not know what ComputationNetworks' defaults are. } diff --git a/DataReader/HTKMLFReader/HTKMLFReader.h b/DataReader/HTKMLFReader/HTKMLFReader.h index a6bc99aa4784..6091572e4e3b 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.h +++ b/DataReader/HTKMLFReader/HTKMLFReader.h @@ -89,8 +89,8 @@ class HTKMLFReader : public IDataReader bool ReNewBufferForMultiIO(size_t i); - size_t NumberSlicesInEachRecurrentIter() { return m_numberOfuttsPerMinibatch ;} - void SetNbrSlicesEachRecurrentIter(const size_t) { }; + size_t GetNumParallelSequences() { return m_numberOfuttsPerMinibatch; } + void SetNumParallelSequences(const size_t) { }; void GetDataNamesFromConfig(const ConfigParameters& readerConfig, std::vector& features, std::vector& labels); diff --git a/DataReader/LMSequenceReader/SequenceReader.cpp b/DataReader/LMSequenceReader/SequenceReader.cpp index 47c14b472b1a..854705ece8cd 100644 --- a/DataReader/LMSequenceReader/SequenceReader.cpp +++ b/DataReader/LMSequenceReader/SequenceReader.cpp @@ -1812,7 +1812,7 @@ bool BatchSequenceReader::EnsureDataAvailable(size_t /*mbStartSample*/ } template -size_t BatchSequenceReader::NumberSlicesInEachRecurrentIter() +size_t BatchSequenceReader::GetNumParallelSequences() { size_t sz = mToProcess.size(); if (sz == 0) diff --git a/DataReader/LMSequenceReader/SequenceReader.h b/DataReader/LMSequenceReader/SequenceReader.h index 4c774a2ddbc5..bb85f48a8f14 100644 --- a/DataReader/LMSequenceReader/SequenceReader.h +++ b/DataReader/LMSequenceReader/SequenceReader.h @@ -393,7 +393,7 @@ class BatchSequenceReader : public SequenceReader void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); bool GetMinibatch(std::map*>& matrices); bool EnsureDataAvailable(size_t mbStartSample); - size_t NumberSlicesInEachRecurrentIter(); + size_t GetNumParallelSequences(); void SetSentenceSegBatch(std::vector &sentenceEnd); void CopyMBLayoutTo(MBLayoutPtr); diff --git a/DataReader/LUSequenceReader/LUSequenceReader.cpp b/DataReader/LUSequenceReader/LUSequenceReader.cpp index 8070ed17ef2f..e50eab054989 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.cpp +++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp @@ -799,7 +799,7 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample } template -size_t BatchLUSequenceReader::NumberSlicesInEachRecurrentIter() +size_t BatchLUSequenceReader::GetNumParallelSequences() { size_t sz = (mSentenceBeginAt.size() == 0)?mBlgSize : mSentenceBeginAt.size(); if (mSentenceBeginAt.size() == 0) @@ -814,7 +814,7 @@ size_t BatchLUSequenceReader::NumberSlicesInEachRecurrentIter() } template -void BatchLUSequenceReader::SetNbrSlicesEachRecurrentIter(const size_t mz) +void BatchLUSequenceReader::SetNumParallelSequences(const size_t mz) { mBlgSize = mz; } @@ -1276,19 +1276,19 @@ void MultiIOBatchLUSequenceReader::CopyMBLayoutTo(MBLayoutPtr pMBLayou { p.second->CopyMBLayoutTo(pMBLayout); if (rows == 0) - rows = pMBLayout->GetNumStreams(); - else if (rows != pMBLayout->GetNumStreams()) + rows = pMBLayout->GetNumParallelSequences(); + else if (rows != pMBLayout->GetNumParallelSequences()) LogicError("multiple streams for LU sequence reader must have the same number of rows for sentence begining"); - size_t this_col = pMBLayout->GetNumFrames(); + size_t this_col = pMBLayout->GetNumTimeSteps(); col.push_back(this_col); cols += this_col; } } template -size_t MultiIOBatchLUSequenceReader::NumberSlicesInEachRecurrentIter() +size_t MultiIOBatchLUSequenceReader::GetNumParallelSequences() { - return mReader.begin()->second->NumberSlicesInEachRecurrentIter(); + return mReader.begin()->second->GetNumParallelSequences(); } template diff --git a/DataReader/LUSequenceReader/LUSequenceReader.h b/DataReader/LUSequenceReader/LUSequenceReader.h index e6f8eb7641b6..affc627ff7f1 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.h +++ b/DataReader/LUSequenceReader/LUSequenceReader.h @@ -184,7 +184,7 @@ class LUSequenceReader : public IDataReader ~LUSequenceReader(){}; void StartMinibatchLoop(size_t , size_t , size_t = requestDataSize) {}; - void SetNbrSlicesEachRecurrentIter(const size_t /*mz*/) {}; + void SetNumParallelSequences(const size_t /*mz*/) {}; void SentenceEnd(std::vector &/*sentenceEnd*/) {}; virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0); @@ -298,8 +298,8 @@ class BatchLUSequenceReader : public LUSequenceReader bool GetMinibatch(std::map*>& matrices); bool EnsureDataAvailable(size_t mbStartSample); - size_t NumberSlicesInEachRecurrentIter(); - void SetNbrSlicesEachRecurrentIter(const size_t mz); + size_t GetNumParallelSequences(); + void SetNumParallelSequences(const size_t mz); void CopyMBLayoutTo(MBLayoutPtr pMBLayout); @@ -386,7 +386,7 @@ class MultiIOBatchLUSequenceReader : public BatchLUSequenceReader void CopyMBLayoutTo(MBLayoutPtr pMBLayout); - size_t NumberSlicesInEachRecurrentIter(); + size_t GetNumParallelSequences(); void Init(const ConfigParameters& readerConfig); diff --git a/DataReader/LibSVMBinaryReader/LibSVMBinaryReader.h b/DataReader/LibSVMBinaryReader/LibSVMBinaryReader.h index 9ca9a6f0ba32..98f2d04a2780 100644 --- a/DataReader/LibSVMBinaryReader/LibSVMBinaryReader.h +++ b/DataReader/LibSVMBinaryReader/LibSVMBinaryReader.h @@ -143,8 +143,8 @@ class LibSVMBinaryReader : public IDataReader virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter() { return 1 ;} - void SetNbrSlicesEachRecurrentIter(const size_t) { }; + size_t GetNumParallelSequences() { return 1; } + void SetNumParallelSequences(const size_t) { }; void CopyMBLayoutTo(MBLayoutPtr){}; virtual const std::map& GetLabelMapping(const std::wstring& sectionName); virtual void SetLabelMapping(const std::wstring& sectionName, const std::map& labelMapping); diff --git a/DataReader/SparsePCReader/SparsePCReader.h b/DataReader/SparsePCReader/SparsePCReader.h index ac5b6ea00006..2ae72bbccc82 100644 --- a/DataReader/SparsePCReader/SparsePCReader.h +++ b/DataReader/SparsePCReader/SparsePCReader.h @@ -56,8 +56,8 @@ class SparsePCReader : public IDataReader virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter() { return 1 ;} - void SetNbrSlicesEachRecurrentIter(const size_t) { }; + size_t GetNumParallelSequences() { return 1 ;} + void SetNumParallelSequences(const size_t) { }; void CopyMBLayoutTo(MBLayoutPtr) {}; virtual const std::map& GetLabelMapping(const std::wstring& sectionName); virtual void SetLabelMapping(const std::wstring& sectionName, const std::map& labelMapping); diff --git a/DataReader/UCIFastReader/UCIFastReader.cpp b/DataReader/UCIFastReader/UCIFastReader.cpp index 26ad512a151c..e7aeea2a8871 100644 --- a/DataReader/UCIFastReader/UCIFastReader.cpp +++ b/DataReader/UCIFastReader/UCIFastReader.cpp @@ -630,7 +630,7 @@ size_t RoundUp(size_t value, size_t size) } template -void UCIFastReader::SetNbrSlicesEachRecurrentIter(const size_t sz) +void UCIFastReader::SetNumParallelSequences(const size_t sz) { mBlgSize = sz; if (mOneLinePerFile) diff --git a/DataReader/UCIFastReader/UCIFastReader.h b/DataReader/UCIFastReader/UCIFastReader.h index 29228e5b025d..c0b6ccb57b46 100644 --- a/DataReader/UCIFastReader/UCIFastReader.h +++ b/DataReader/UCIFastReader/UCIFastReader.h @@ -111,7 +111,7 @@ class UCIFastReader : public IDataReader virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize); virtual bool GetMinibatch(std::map*>& matrices); - size_t NumberSlicesInEachRecurrentIter() { return mBlgSize; } + size_t GetNumParallelSequences() { return mBlgSize; } void CopyMBLayoutTo(MBLayoutPtr){}; virtual const std::map& GetLabelMapping(const std::wstring& sectionName); virtual void SetLabelMapping(const std::wstring& sectionName, const std::map& labelMapping); @@ -120,7 +120,7 @@ class UCIFastReader : public IDataReader virtual bool DataEnd(EndDataType endDataType); void SetSentenceSegBatch(Matrix&, Matrix&) { }; - void SetNbrSlicesEachRecurrentIter(const size_t sz); + void SetNumParallelSequences(const size_t sz); void SetRandomSeed(int) { NOT_IMPLEMENTED; } }; diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 5bd1ba82bf11..adf9136b9ad5 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -80,7 +80,6 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb m_randomSeedOffset = 0; m_actMiniBSize = 0; SetDeviceId(deviceId); - m_nbrSlicesInEachRecurrentIterationx = 1; } virtual ~ComputationNetwork() @@ -246,8 +245,8 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { if (!m_pMBLayout->IsAllNone()) { - size_t numTimeSteps = m_pMBLayout->GetNumFrames(); - size_t numSequences = m_pMBLayout->GetNumStreams(); + size_t numTimeSteps = m_pMBLayout->GetNumTimeSteps(); + size_t numSequences = m_pMBLayout->GetNumParallelSequences(); if (m_pMBLayout->GetSize() != numTimeSteps) LogicError("GetNumSamplesWithLabel(): m_pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams.Check feature reader. "); @@ -558,7 +557,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) (*nodeIter)->SetFunctionAndGradientSize(m_actMiniBSize); - int iMBSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIterationx; + int iMBSize = m_actMiniBSize / GetNumParallelSequences(); if (m_recurrentInfo[iLoopId].m_isForwardLoop) { @@ -598,10 +597,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { // checks that will disappear once we complete the refactoring. If this passes for a while, we will eliminate one // If this fails, comment this out (it is safe) and tell fseide@microsoft.com. - if (m_nbrSlicesInEachRecurrentIterationx != m_pMBLayout->GetNumStreams()) - LogicError("Evaluate: detected that m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumStreams()"); - if (m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()) - LogicError("Evaluate: detected that m_pMBLayout->GetNumFrames() != m_pMBLayout->GetSize()"); + if (GetNumParallelSequences() != m_pMBLayout->GetNumParallelSequences()) + LogicError("Evaluate: detected that m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumParallelSequences()"); + if (m_pMBLayout->GetNumTimeSteps() != m_pMBLayout->GetSize()) + LogicError("Evaluate: detected that m_pMBLayout->GetNumTimeSteps() != m_pMBLayout->GetSize()"); // prepare to compute with the subnetwork that this rootNode depends on, including // - auto-detecting recurrent loops @@ -625,7 +624,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { // TODO: nbrSlices set once to the same value for all nodes each evaluation--is it ever changed later? - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); + (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); if ((*nodeIter)->ReqMultiSeqHandling()) (*nodeIter)->ResetBound(m_pMBLayout); } @@ -688,13 +687,25 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb #if 0 // always called in this pattern: evalnet->SetActualMiniBatchSizeFromFeatures(); - evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); + evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); // well... most of the time. Not in TrainOneEpoch(). + void SetActualNumParallelSequencesInEachRecurentIteration(const size_t aSize) + { + m_nbrSlicesInEachRecurrentIteration() = aSize; // TODO: this has to go + } #endif - void SetActualNbrSlicesInEachRecurentIteration(const size_t aSize) + size_t GetNumParallelSequences() const + { + return m_pMBLayout->GetNumParallelSequences(); + } + // temporary function: Call this after CopyMBLayoutTo(evalnet->GetMBLayoutPtr()) to ensure everything is consistent as expected + // It is actually called after every CopyMBLayoutTo() in the entire system (except for multi-reader CopyMBLayoutTo() itself). + // Remove this function after a few weeks of not firing. + void VerifyActualNumParallelSequences(const size_t aSize) { - m_nbrSlicesInEachRecurrentIterationx = aSize; + if (GetNumParallelSequences() != aSize) + LogicError("VerifyActualNumParallelSequences: mismatching MB size in MBLayout"); } void ComputeGradientLoop(std::list& /*allNodes*/, const ComputationNodeBasePtr startNode) @@ -705,14 +716,14 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { if (m_recurrentInfo[iLoopId].m_completedGradient == false) { - int mbSize = m_actMiniBSize / m_nbrSlicesInEachRecurrentIterationx; + int mbSize = m_actMiniBSize / GetNumParallelSequences(); if (m_recurrentInfo[iLoopId].m_isForwardLoop) { for (int timeIndex = mbSize - 1; timeIndex >= 0; timeIndex--) { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); // TODO: move to FrameRange object + (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); // TODO: move to FrameRange object (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -723,7 +734,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNbrSlicesInEachRecurrentIteration(m_nbrSlicesInEachRecurrentIterationx); + (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -1571,7 +1582,6 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb MBLayoutPtr m_pMBLayout; int m_actMiniBSize; - size_t m_nbrSlicesInEachRecurrentIterationx; // main node holder std::map m_nameToNodeMap; // [name] -> node; this is the main container that holds this networks' nodes diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 45abce8277c6..b3542c092fe5 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -168,7 +168,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void ResetBound(MBLayoutPtr pMBLayout) { - assert(pMBLayout->GetNumFrames() == pMBLayout->GetSize()); // TODO: move this check into MBLayout + assert(pMBLayout->GetNumTimeSteps() == pMBLayout->GetSize()); // TODO: move this check into MBLayout m_pMBLayout = pMBLayout; } @@ -248,7 +248,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: these two will disappear once the information is correctly held in a FrameRange record // This is called at 3 places; two are directly before ComputeGradientForChildren(). - void SetNbrSlicesInEachRecurrentIteration(size_t bsz) + void SetNumParallelSequences(size_t bsz) { m_samplesInRecurrentStep = bsz; } @@ -260,7 +260,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // This expression will be turned into a function call to right here, so that we compute this only at one place // and can also handle the full-minibatch case. // Let us try to get this member out of this class altogether; it belongs elsewhere. - size_t GetNbrSlicesInEachRecurrentIteration() const + size_t GetNumParallelSequences() const { return m_samplesInRecurrentStep; } @@ -903,7 +903,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (m_pMBLayout && !m_pMBLayout->IsAllNone()) { size_t nT = matrixToBeMasked.GetNumCols(); - size_t nS = m_pMBLayout->GetNumStreams(); + size_t nS = m_pMBLayout->GetNumParallelSequences(); if (m_pMBLayout->GetSize() != nT / nS) LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. "); diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index c038ecc2e31b..df94ccecfe7f 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -114,7 +114,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // then this becomes // S S X X E S S X X X E N N - size_t numRows = pMBLayout->GetNumStreams(); + size_t numRows = pMBLayout->GetNumParallelSequences(); // each row has a number to indicate how many values should be reset for that utterance vector numResetLeft(numRows, 0); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 67e0613a7dc9..ddafb73b04de 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -1098,7 +1098,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (m_pMBLayout && !m_pMBLayout->IsAllNone()) { // 't' is not a time but rather a column index that encodes (time stamp, stream) - size_t nS = m_pMBLayout->GetNumStreams(); + size_t nS = m_pMBLayout->GetNumParallelSequences(); size_t j = t / nS; // this is the time stamp size_t i = t % nS; // this is the stream if (m_pMBLayout->Is(j, MinibatchPackingFlags::NoLabel)) // TODO: this outer test is redundant here, no? diff --git a/MachineLearning/CNTKEval/EvalReader.h b/MachineLearning/CNTKEval/EvalReader.h index 50f7421fda65..c51525e3bca9 100644 --- a/MachineLearning/CNTKEval/EvalReader.h +++ b/MachineLearning/CNTKEval/EvalReader.h @@ -161,9 +161,9 @@ class EvalReader : public IDataReader return true; } - size_t NumberSlicesInEachRecurrentIter() {return 1;} + size_t GetNumParallelSequences() { return 1; } - void SetNbrSlicesEachRecurrentIter(const size_t ) {} + void SetNumParallelSequences(const size_t ) {} void SetSentenceSegBatch(std::vector &sentenceEnd) { sentenceEnd.resize(m_switchFrame.size()); diff --git a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h index 2763b02a20f1..829398c3015d 100644 --- a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h +++ b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h @@ -1158,14 +1158,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { ) { encoderNet->SetActualMiniBatchSizeFromFeatures(); - encoderNet->SetActualNbrSlicesInEachRecurentIteration(encoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); encoderTrainSetDataReader->CopyMBLayoutTo(encoderNet->GetMBLayoutPtr()); + encoderNet->VerifyActualNumParallelSequences(encoderTrainSetDataReader->GetNumParallelSequences()); encoderNet->Evaluate(encoderEvaluationNodes[0]); decoderNet->SetActualMiniBatchSizeFromFeatures(); - decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderTrainSetDataReader->NumberSlicesInEachRecurrentIter()); decoderTrainSetDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); + decoderNet->VerifyActualNumParallelSequences(decoderTrainSetDataReader->GetNumParallelSequences()); /// not the sentence begining, because the initial hidden layer activity is from the encoder network if (decoderCriterionNodes.size() == 0 && decoderEvaluationNodes.size() == 0) diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index 4adcf874fbaf..e93b01140b2b 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -920,18 +920,18 @@ template } // first, we need to normalize the effect of nbruttsineachrecurrentiter - if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeLRByParallUtterance) + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeLRByParallUtterance) { for (auto& x : m_learningRatesPerSample) - x /= (float)trainSetDataReader->NumberSlicesInEachRecurrentIter(); + x /= (float)trainSetDataReader->GetNumParallelSequences(); } // first, we need to normalize the effect of nbruttsineachrecurrentiter for momemtum - if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) { for (auto& x : m_momentumPerSample) - x = (float)pow(x, 1.0 / trainSetDataReader->NumberSlicesInEachRecurrentIter()); - } + x = (float)pow(x, 1.0 / trainSetDataReader->GetNumParallelSequences()); + } bool learnRateInitialized = false; if (startEpoch > 0) @@ -1047,8 +1047,8 @@ template } actualMinibatchSize = chosenMinibatchSize; - if (trainSetDataReader->NumberSlicesInEachRecurrentIter() > 1 && m_needToNormalizeMomentumByParallUtterance) - actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->NumberSlicesInEachRecurrentIter(); + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) + actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->GetNumParallelSequences(); fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n", i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize)); @@ -1307,8 +1307,8 @@ template ComputationNetwork::UpdateEvalTimeStamps(labelNodes); net.SetActualMiniBatchSizeFromFeatures(); - net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); // TODO: Exactly this loop should be INSIDE ComputationNetwork--pass the nodes array instead! for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) @@ -1766,8 +1766,8 @@ template LogicError("no output node was found."); net.SetActualMiniBatchSizeFromFeatures(); - net.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); net.Evaluate(outputNodes[0]); // Only evaluate the first output trainSetDataReader->SetNetOutput(uttInfo, dynamic_pointer_cast>(outputNodes[0])->FunctionValues(), @@ -1922,7 +1922,7 @@ template size_t actualMBSize = 0; if (wasDataRead) { - size_t nSlices = trainSetDataReader->NumberSlicesInEachRecurrentIter(); + size_t nSlices = trainSetDataReader->GetNumParallelSequences(); MBLayoutPtr pMBLayout; if (!useDistributedMBReading && useParallelTrain) { @@ -1946,14 +1946,14 @@ template { if (!useDistributedMBReading && useParallelTrain && trainSetDataReader->RequireSentenceSeg()) { - net.SetActualNbrSlicesInEachRecurentIteration(nSlices); *net.GetMBLayoutPtr() = *pMBLayout; // TODO: ^^ we should just pass pointers; this current code is semantically identical to before the change to MBLayout + net.VerifyActualNumParallelSequences(nSlices); } else { - net.SetActualNbrSlicesInEachRecurentIteration(nSlices); trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(nSlices); } nSamplesSinceLastModelSync += actualMBSize; @@ -1969,8 +1969,9 @@ template if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) { refNet.SetActualMiniBatchSize(actualMBSize); - refNet.SetActualNbrSlicesInEachRecurentIteration(trainSetDataReader->NumberSlicesInEachRecurrentIter()); - // TODO: not setting MBLayout? + *refNet.GetMBLayoutPtr() = *net.GetMBLayoutPtr(); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently) + refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); + refNet.Evaluate(refNode); Matrix::ScaleAndAdd((ElemType)m_adaptationRegWeight, dynamic_pointer_cast>(refNode)->FunctionValues(), diff --git a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h index 6306903b36b1..b0e35552cea9 100644 --- a/MachineLearning/CNTKSGDLib/SimpleEvaluator.h +++ b/MachineLearning/CNTKSGDLib/SimpleEvaluator.h @@ -128,8 +128,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(labelNodes); actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); - m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr()); + m_net.VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); //for now since we share the same label masking flag we call this on one node only //Later, when we apply different labels on different nodes @@ -447,8 +447,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { actualMBSize = (*ptr)->SetActualMiniBatchSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but encoderNet reports no data read"); - (*ptr)->SetActualNbrSlicesInEachRecurentIteration((*preader)->NumberSlicesInEachRecurrentIter()); (*preader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr()); + (*ptr)->VerifyActualNumParallelSequences((*preader)->GetNumParallelSequences()); const auto & pairs = (*ptr)->PairNodes(); for (auto ptr2 = pairs.begin(); ptr2 != pairs.end(); ptr2++) @@ -460,8 +460,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { actualMBSize = decoderNet->SetActualMiniBatchSizeFromFeatures(); if (actualMBSize == 0) LogicError("decoderTrainSetDataReader read data but decoderNet reports no data read"); - decoderNet->SetActualNbrSlicesInEachRecurentIteration(decoderDataReader->NumberSlicesInEachRecurrentIter()); decoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); + decoderNet->VerifyActualNumParallelSequences(decoderDataReader->GetNumParallelSequences()); size_t i = 0; assert(decoderEvaluationNodes.size() == 1); @@ -624,7 +624,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (auto ptr = readers.begin(); ptr != readers.end(); ptr++) { (*ptr)->StartMinibatchLoop(mbSize, 0, testSize); - (*ptr)->SetNbrSlicesEachRecurrentIter(1); + (*ptr)->SetNumParallelSequences(1); } Matrix historyMat(m_net.GetDeviceId()); @@ -660,9 +660,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// evaluate on the encoder networks actualMBSize = (*ptr)->SetActualMiniBatchSizeFromFeatures(); - mNutt = (*ptrreader)->NumberSlicesInEachRecurrentIter(); - (*ptr)->SetActualNbrSlicesInEachRecurentIteration(mNutt); + mNutt = (*ptrreader)->GetNumParallelSequences(); (*ptrreader)->CopyMBLayoutTo((*ptr)->GetMBLayoutPtr()); + (*ptr)->VerifyActualNumParallelSequences(mNutt); const auto & pairs = (*ptr)->PairNodes(); for (auto ptr2 = pairs.begin(); ptr2 != pairs.end(); ptr2++) @@ -673,8 +673,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// not the sentence begining, because the initial hidden layer activity is from the encoder network decoderNet->SetActualMiniBatchSize(actualMBSize); - decoderNet->SetActualNbrSlicesInEachRecurentIteration(mNutt); encoderDataReader->CopyMBLayoutTo(decoderNet->GetMBLayoutPtr()); + decoderNet->VerifyActualNumParallelSequences(mNutt); FindBestPathWithVariableLength(decoderNet, actualMBSize, decoderDataReader, dataWriter, outputNodes, writeNodes, decoderFeatureNodes, beam, &decoderInputMatrices, best_path); @@ -735,8 +735,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++) { ComputationNodeBasePtr node = *nodeIter; - node->EvaluateThisNode(FrameRange(atTime, node->GetNbrSlicesInEachRecurrentIteration())); - if (node->GetNumCols() != node->GetNbrSlicesInEachRecurrentIteration()) + node->EvaluateThisNode(FrameRange(atTime, node->GetNumParallelSequences())); + if (node->GetNumCols() != node->GetNumParallelSequences()) RuntimeError("preComputeActivityAtTime: the function values has to be a single column matrix "); } } @@ -828,7 +828,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t actualMBSize = 0; dataReader->StartMinibatchLoop(mbSize, 0, testSize); - dataReader->SetNbrSlicesEachRecurrentIter(1); + dataReader->SetNumParallelSequences(1); startReadMBTime = clock(); size_t numMBsRun = 0; @@ -900,8 +900,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// use reader to initialize evalnet's sentence start information to let it know that this /// is the begining of sentence evalnet->SetActualMiniBatchSize(mbSize); - evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); + evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); clock_t start, now; start = clock(); @@ -1065,8 +1065,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// use reader to initialize evalnet's sentence start information to let it know that this /// is the beginning of sentence evalnet->SetActualMiniBatchSize(mbSize); - evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); // TODO: not setting MBLayout? + evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); + // TODO: This is UNTESTED; if it fails, change ^^ this back to SetActual...() clock_t start, now; start = clock(); @@ -1088,7 +1089,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// need to set the minibatch size to 1, and initialize evalnet's sentence start information to let it know that this /// is the begining of sentence // BUGBUG: This is almost certainly wrong; slice != MB size - evalnet->SetActualMiniBatchSize(dataReader->NumberSlicesInEachRecurrentIter()); + evalnet->SetActualMiniBatchSize(dataReader->GetNumParallelSequences()); double best_score = -numeric_limits::infinity(); double best_score_so_far = -numeric_limits::infinity(); diff --git a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h index 9adb3184c820..e7e94f15567c 100644 --- a/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h +++ b/MachineLearning/CNTKSGDLib/SimpleOutputWriter.h @@ -62,7 +62,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { //evaluate with minibatches dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples); - dataReader.SetNbrSlicesEachRecurrentIter(1); + dataReader.SetNumParallelSequences(1); size_t totalEpochSamples = 0; std::map outputMatrices; @@ -73,8 +73,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetwork::UpdateEvalTimeStamps(labelNodes); size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures(); - m_net.SetActualNbrSlicesInEachRecurentIteration(dataReader.NumberSlicesInEachRecurrentIter()); dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr()); + m_net.VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences()); for (int i=0; iSetActualMiniBatchSize(mbSize); - evalnet->SetActualNbrSlicesInEachRecurentIteration(dataReader->NumberSlicesInEachRecurrentIter()); dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); + evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); #endif #if 0 // a VERY TELLING piece of code // packing flags = frame-wise or over all streams of start and end From 7e10b57b8eb56484b39b0e3f321ac58d0c561e3e Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 17:47:44 -0700 Subject: [PATCH 08/44] changed all read accesses to ComputationNode::m_samplesInRecurrentStep to GetNumParallelSequences()--480+ changes! This is in prep to remove that member variable completely, replacing it solely by pMBLayout (we will first add a check that both are the same) --- .../CompositeComputationNodes.h | 24 +- .../ComputationNode.h | 6 +- .../ConvolutionalNodes.h | 22 +- .../InputAndParamNodes.h | 20 +- .../LinearAlgebraNodes.h | 246 +++++++++--------- .../NonlinearityNodes.h | 124 ++++----- .../RecurrentNodes.h | 98 +++---- .../TrainingCriterionNodes.h | 8 +- Math/Math/Matrix.h | 6 +- 9 files changed, 276 insertions(+), 278 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index c6adb948b20d..251e24a388ca 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,10 +539,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, - m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, - m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -692,8 +690,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -839,13 +837,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert(m_memory.GetNumCols() > 0); - //FunctionValues().Resize(m_memory.GetNumRows(), m_samplesInRecurrentStep); + //FunctionValues().Resize(m_memory.GetNumRows(), GetNumParallelSequences()); FunctionValues().Resize(m_memory.GetNumRows(), frameRange.NumCols()); // extra space for one time step if (frameRange.t() == 0) // for first frame, check that we got all in memory --TODO: is this comment correct? How about going backwards? - assert(FunctionValues().FrameSlice(FrameRange(0, m_samplesInRecurrentStep)/*TODO: delete the next two parameters*/, 0, m_samplesInRecurrentStep).FrobeniusNorm() == m_memory.FrameSlice(FrameRange(0, m_samplesInRecurrentStep)/*TODO: delete the next two parameters*/, 0, m_samplesInRecurrentStep).FrobeniusNorm()); - //assert(FunctionValues().ColumnSlice(0, m_samplesInRecurrentStep).FrobeniusNorm() == m_memory.ColumnSlice(0, m_samplesInRecurrentStep).FrobeniusNorm()); - FunctionValues().SetValue(m_memory.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep)); - assert(FunctionValues().GetNumCols() == m_samplesInRecurrentStep); + assert(FunctionValues().FrameSlice(FrameRange(0, GetNumParallelSequences())/*TODO: delete the next two parameters*/, 0, GetNumParallelSequences()).FrobeniusNorm() == m_memory.FrameSlice(FrameRange(0, GetNumParallelSequences())/*TODO: delete the next two parameters*/, 0, GetNumParallelSequences()).FrobeniusNorm()); + //assert(FunctionValues().ColumnSlice(0, GetNumParallelSequences()).FrobeniusNorm() == m_memory.ColumnSlice(0, GetNumParallelSequences()).FrobeniusNorm()); + FunctionValues().SetValue(m_memory.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences())); + assert(FunctionValues().GetNumCols() == GetNumParallelSequences()); } virtual void SaveToFile(File& fstream) const @@ -934,7 +932,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("TimeReverse operation only takes one input."); ComputationNodePtr child = Inputs(inputIndex); - ComputeInputPartialS(GradientValues(), child->GradientValues(), m_samplesInRecurrentStep); + ComputeInputPartialS(GradientValues(), child->GradientValues(), GetNumParallelSequences()); } static void WINAPI ComputeInputPartialS(Matrix& gradientValues, Matrix& inputGradientValues, int nSamples) @@ -967,7 +965,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (m_hasComputed == false) { - EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), m_samplesInRecurrentStep); + EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), GetNumParallelSequences()); m_memory.SetValue(FunctionValues()); } } diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index b3542c092fe5..8b5f0265f403 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -256,7 +256,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Note: only used in one place, SimpleEvaluator.h PreComputeActivityAtTime(). // The member is, however, read out at 284 places inside nodes, // most of the time as - // FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep) + // FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()) // This expression will be turned into a function call to right here, so that we compute this only at one place // and can also handle the full-minibatch case. // Let us try to get this member out of this class altogether; it belongs elsewhere. @@ -861,7 +861,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /*implement*/void EvaluateThisNodeGivenInputs(const size_t timeIdxInSeq) // TODO: change to FrameRange as well { - EvaluateThisNode(FrameRange(timeIdxInSeq, m_samplesInRecurrentStep)); + EvaluateThisNode(FrameRange(timeIdxInSeq, GetNumParallelSequences())); if (!UseCustomizedMultiSeqHandling()) MaskToZeroWhenLabelAndFeatureMissing(m_functionValues, timeIdxInSeq); @@ -1081,7 +1081,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { (msra::strfun::utf8 (child->OperationName())).c_str(), (msra::strfun::utf8 (child->NodeName())).c_str()); #endif - ComputeInputPartial(i, FrameRange(timeIdxInSeq, m_samplesInRecurrentStep)); //this computes partial wrt to the child and sums the gradient value in the child + ComputeInputPartial(i, FrameRange(timeIdxInSeq, GetNumParallelSequences())); //this computes partial wrt to the child and sums the gradient value in the child } #ifdef DISPLAY_DEBUG else fprintf (stderr, " [%lu]: %s(%s) (no gradient needed so don't compute for)\n", i, diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index fe3cc5c47a0c..8d591bfe9d94 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -111,14 +111,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("Convolution operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,8 +215,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,11 +433,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); } @@ -447,8 +447,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 1be91c8ef041..1639a55e6e55 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -330,15 +330,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -384,8 +384,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -555,8 +555,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(m_functionValues.GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues.GetNumRows() for loop dimension assert(m_pMBLayout); - Matrix mTmp = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix::ScaleAndAdd(1.0, GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep), mTmp); + Matrix mTmp = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix::ScaleAndAdd(1.0, GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), mTmp); } virtual void EvaluateThisNode() @@ -566,8 +566,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - mTmp.SetValue(Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep)); + Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + mTmp.SetValue(Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences())); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index e5abadebf623..ebc7eef2d904 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,8 +53,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -71,8 +71,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,8 +138,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -156,8 +156,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -233,8 +233,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -251,8 +251,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -370,8 +370,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); } @@ -388,8 +388,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); } @@ -486,8 +486,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex >= ChildrenSize()) InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range."); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); } @@ -504,9 +504,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceFunctionValues = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceFunctionValues = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) @@ -623,15 +623,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { //left Node must be a scalar if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -654,8 +654,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -742,15 +742,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -801,8 +801,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -915,15 +915,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -969,8 +969,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1073,10 +1073,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1-inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1-inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } @@ -1100,9 +1100,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1202,10 +1202,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("RowElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) { @@ -1252,9 +1252,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1353,17 +1353,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ColumnElementTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,8 +1403,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } @@ -1509,13 +1509,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (cols0 >= cols1) { - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad); } @@ -1584,25 +1584,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1780,11 +1780,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); Matrix ones = Matrix(); @@ -1890,25 +1890,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2048,16 +2048,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("DiagTimes operation only takes two inputs."); //left parameter (diag matix cannot be sliced) - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,8 +2083,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2205,11 +2205,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) //left derivative { @@ -2280,9 +2280,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2426,19 +2426,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("KhatriRaoProduct operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,9 +2461,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2564,11 +2564,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(inputIndex, m_invNorm0, m_invNorm1, sliceOutputValue, m_temp, m_rightTerm, m_leftTerm, m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad); } @@ -2681,9 +2681,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); } @@ -2961,25 +2961,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("StrideTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (m_StrideDim == 1) /// column stride { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); Matrix mTmp1(sliceInput1Value.GetDeviceId()); size_t r = Inputs(0)->FunctionValues().GetNumRows(); - size_t T1 = Inputs(0)->FunctionValues().GetNumCols() / m_samplesInRecurrentStep; + size_t T1 = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); mTmp1.Resize(r, T1); Matrix mTmp2(sliceInput1Value.GetDeviceId()); Matrix mTmp3(sliceInput1Value.GetDeviceId()); - for (size_t k = 0; k < m_samplesInRecurrentStep; k++) + for (size_t k = 0; k < GetNumParallelSequences(); k++) { mTmp1.SetValue(0); mTmp2 = sliceInput1Value.ColumnSlice(k, 1); @@ -2989,25 +2989,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t t = 0; t < T1; t++) { - Inputs(0)->GradientValues().ColumnSlice(t*m_samplesInRecurrentStep + k, 1) += mTmp1.ColumnSlice(t, 1); + Inputs(0)->GradientValues().ColumnSlice(t*GetNumParallelSequences() + k, 1) += mTmp1.ColumnSlice(t, 1); } } } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); - for (size_t k = 0; k < m_samplesInRecurrentStep; k++) + for (size_t k = 0; k < GetNumParallelSequences(); k++) { Matrix mTmp1(sliceOutputGrad.GetDeviceId()); size_t r = Inputs(0)->FunctionValues().GetNumRows(); - size_t T1 = Inputs(0)->FunctionValues().GetNumCols() / m_samplesInRecurrentStep; + size_t T1 = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); mTmp1.Resize(r, T1); for (size_t t = 0; t < T1; t++) { - mTmp1.ColumnSlice(t, 1).SetValue(Inputs(0)->FunctionValues().ColumnSlice(t*m_samplesInRecurrentStep + k, 1)); + mTmp1.ColumnSlice(t, 1).SetValue(Inputs(0)->FunctionValues().ColumnSlice(t*GetNumParallelSequences() + k, 1)); } Matrix mTmp2(sliceOutputGrad.GetDeviceId()); mTmp2 = sliceInput1Grad.ColumnSlice(k, 1); @@ -3022,13 +3022,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - for (size_t k = 0; k < m_samplesInRecurrentStep; k++) + for (size_t k = 0; k < GetNumParallelSequences(); k++) { Matrix mTmp1(sliceInput1Value.GetDeviceId()); size_t d = Inputs(1)->FunctionValues().GetNumRows(); - size_t T1 = Inputs(0)->FunctionValues().GetNumRows() / m_samplesInRecurrentStep; + size_t T1 = Inputs(0)->FunctionValues().GetNumRows() / GetNumParallelSequences(); mTmp1.Resize(d, T1); Matrix mTmp2(sliceInput1Value.GetDeviceId()); mTmp2 = sliceInput1Value.ColumnSlice(k, 1); @@ -3041,18 +3041,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { { mTmp4 = mTmp1.ColumnSlice(t, 1); mTmp4.Reshape(1, d); - Inputs(0)->GradientValues().AddToRowSliceValuesOf(mTmp4, t*m_samplesInRecurrentStep + k, 1); + Inputs(0)->GradientValues().AddToRowSliceValuesOf(mTmp4, t*GetNumParallelSequences() + k, 1); } } } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - for (size_t k = 0; k < m_samplesInRecurrentStep; k++) + for (size_t k = 0; k < GetNumParallelSequences(); k++) { size_t d = Inputs(1)->FunctionValues().GetNumRows(); - size_t T1 = Inputs(0)->FunctionValues().GetNumRows() / m_samplesInRecurrentStep; + size_t T1 = Inputs(0)->FunctionValues().GetNumRows() / GetNumParallelSequences(); Matrix mTmp0(sliceOutputGrad.GetDeviceId()); mTmp0.Resize(1, d); @@ -3062,7 +3062,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t t = 0; t < T1; t++) { mTmp0.SetValue(0); - mTmp0.AddWithRowSliceValuesOf(Inputs(0)->FunctionValues(), t * m_samplesInRecurrentStep + k, 1); + mTmp0.AddWithRowSliceValuesOf(Inputs(0)->FunctionValues(), t * GetNumParallelSequences() + k, 1); mTmp1.AssignToRowSliceValuesOf(mTmp0, t, 1); } Matrix mTmp2(sliceOutputGrad.GetDeviceId()); @@ -3112,7 +3112,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); UpdateStride(Inputs(1)->FunctionValues()); if (m_StrideDim == 0) - FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1); + FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); if (m_StrideDim == 1) FunctionValues().Resize(rows0, cols1); @@ -3127,13 +3127,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) - FunctionValues().Resize(rows0 / m_samplesInRecurrentStep, cols1); + FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); if (m_StrideDim == 1) FunctionValues().Resize(rows0, cols1); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_Stride, m_StrideDim); } diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index c481f8406ce3..eca814210627 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,8 +81,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); } @@ -206,10 +206,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -265,10 +265,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,10 +508,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -616,10 +616,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(m_gradient, m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -727,8 +727,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice const size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - Matrix sliceGradientValue = m_gradientValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceGradientValue = m_gradientValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); switch (inputIndex) { @@ -738,40 +738,40 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } } break; case 1: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (colsPrior == 1) ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceMeanGradient = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } break; case 2: { - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (colsPrior == 1) ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } break; case 3: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceFeatureGradient = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceFeatureGradient = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -888,11 +888,11 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); //get the right slice - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceFeature = Inputs(3)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceFeature = Inputs(3)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); if (colsPrior == 1) { @@ -901,12 +901,12 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceMean = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceLogstddev = Inputs(2)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceMean = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceLogstddev = Inputs(2)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature, slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); @@ -1113,13 +1113,13 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); Matrix sliceMask = Matrix(); if (m_dropoutRate > 0) { - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } ComputeInputPartialS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad); @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1151,10 +1151,10 @@ virtual const std::wstring OperationName() const { return TypeName(); } { FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); m_maskOfDropout.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } - sliceOutputValue = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + sliceOutputValue = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value); } @@ -1399,13 +1399,13 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { size_t rows = Inputs(0)->FunctionValues().GetNumRows(); - if ((rows * m_samplesInRecurrentStep) % m_numRows > 0) + if ((rows * GetNumParallelSequences()) % m_numRows > 0) { LogicError("Reshape operation: Number of elements in the recurrent input step is not a multiple of the specified number of rows."); } - size_t outputSamplesInRecurrentStep = m_samplesInRecurrentStep * rows / m_numRows; - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRows); @@ -1442,14 +1442,14 @@ virtual const std::wstring OperationName() const { return TypeName(); } InvalidArgument("Reshape operation only takes one input."); size_t rows = Inputs(0)->GradientValues().GetNumRows(); - if ((rows * m_samplesInRecurrentStep) % m_numRows > 0) + if ((rows * GetNumParallelSequences()) % m_numRows > 0) { LogicError("Reshape operation: Number of elements in the recurrent input step is not a multiple of the specified number of rows."); } - size_t outputSamplesInRecurrentStep = m_samplesInRecurrentStep * rows / m_numRows; + size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRows); @@ -1646,8 +1646,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); } @@ -1673,8 +1673,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * m_samplesInRecurrentStep, m_samplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); } diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index df94ccecfe7f..aaba256771d2 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -374,12 +374,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) // TODO: is this check necessary here? Can this be a generic check in the base class? InvalidArgument("PastValue and FutureValue operations only take one input."); - int nbrSamples = GradientValues().GetNumCols() / m_samplesInRecurrentStep; + int nbrSamples = GradientValues().GetNumCols() / GetNumParallelSequences(); for (int timeIdxInSeq = nbrSamples - 1; timeIdxInSeq >= 0; timeIdxInSeq--) { // TODO: call the looping version below to avoid code dup const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - ComputeInputPartialSRP(FrameRange(timeIdxInSeq, m_samplesInRecurrentStep), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); + ComputeInputPartialSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); } } @@ -388,12 +388,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert(m_timeStep > 0); - int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / m_samplesInRecurrentStep; + int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); for (int timeIdxInSeq = 0; timeIdxInSeq < nbrSamples; timeIdxInSeq++) { // TODO: call the looping version below to avoid code dup const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, m_samplesInRecurrentStep), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); + EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); } //set the past activity to be used by next minibatch @@ -444,12 +444,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("PastValue and FutureValue operations only take one input."); - int nbrSamples = GradientValues().GetNumCols() / m_samplesInRecurrentStep; + int nbrSamples = GradientValues().GetNumCols() / GetNumParallelSequences(); for (int timeIdxInSeq = 0; timeIdxInSeq < nbrSamples; timeIdxInSeq++) { // TODO: call the looping version below to avoid code dup const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - ComputeInputPartialSRP(FrameRange(timeIdxInSeq, m_samplesInRecurrentStep), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); + ComputeInputPartialSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); } } @@ -457,11 +457,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert(m_timeStep > 0); - int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / m_samplesInRecurrentStep; + int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); for (int timeIdxInSeq = nbrSamples - 1; timeIdxInSeq >= 0; timeIdxInSeq--) { const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, m_samplesInRecurrentStep), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); + EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); } //set the future activity to be used by next minibatch @@ -472,7 +472,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { assert(m_pMBLayout); - if (frameRange.t() == Inputs(0)->FunctionValues().GetNumCols() / m_samplesInRecurrentStep - 1) + if (frameRange.t() == Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences() - 1) m_delayedActivation = Inputs(0)->FunctionValues(); const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(frameRange.t()); @@ -592,8 +592,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix slicePrevOutput(m_deviceId), slicePrevState(m_deviceId); Matrix grdToPrevOutput(m_deviceId), grdToPrevState(m_deviceId); Matrix stateError(m_deviceId); - slicePrevState.Resize(outputDim, m_samplesInRecurrentStep); - slicePrevOutput.Resize(outputDim, m_samplesInRecurrentStep); + slicePrevState.Resize(outputDim, GetNumParallelSequences()); + slicePrevOutput.Resize(outputDim, GetNumParallelSequences()); slicePrevOutput.SetValue(0); stateError.Resize(slicePrevState.GetNumRows(), slicePrevState.GetNumCols()); @@ -603,21 +603,21 @@ namespace Microsoft { namespace MSR { namespace CNTK { grdToPrevOutput.SetValue(0); grdToPrevState.SetValue(0); - for (int timeIdxInSeq = nT - m_samplesInRecurrentStep; timeIdxInSeq >= 0; timeIdxInSeq -= m_samplesInRecurrentStep) + for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { - FrameRange frameRange(timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); + FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); + Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); + Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix error = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep); + Matrix error = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); Matrix grdToObsSlice(this->m_deviceId); @@ -627,7 +627,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif PrepareThisErrorsBeforeBackProp(timeIdxInSeq, nT, error, stateError, grdToPrevOutput, grdToPrevState, - m_obs_error_from_future_minibatch, m_state_error_from_future_minibatch, m_samplesInRecurrentStep, &m_pMBLayout->GetM()); + m_obs_error_from_future_minibatch, m_state_error_from_future_minibatch, GetNumParallelSequences(), &m_pMBLayout->GetM()); #ifdef DEBUG_DECODER fprintf(stderr, "output error [%ld] norm = %.8e\n", timeIdxInSeq, error.FrobeniusNorm()); @@ -639,7 +639,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { grdToPrevOutput.SetValue(0); grdToPrevState.SetValue(0); - PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, m_samplesInRecurrentStep, m_DefaultState, &m_pMBLayout->GetM()); + PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM()); ComputeInputGradientWrtGates( error, @@ -666,9 +666,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { grdToPrevState, m_tempMatrix ); - grdToObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, m_samplesInRecurrentStep).SetValue(grdToObsSlice); + grdToObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()).SetValue(grdToObsSlice); - PrepareErrors(timeIdxInSeq, grdToPrevOutput, grdToPrevState, m_samplesInRecurrentStep, &m_pMBLayout->GetM()); + PrepareErrors(timeIdxInSeq, grdToPrevOutput, grdToPrevState, GetNumParallelSequences(), &m_pMBLayout->GetM()); } #ifdef DEBUG_DECODER fprintf(stderr, "after error prop b_c norm = %.8e\n", Inputs(4)->FunctionValues().ColumnSlice(0, 1).FrobeniusNorm()); @@ -917,16 +917,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { */ int GetSegInfo(size_t t, size_t streamid) { - if (streamid >= m_samplesInRecurrentStep) - LogicError("GetSegInfo: stream id %d is larger than the number of streams %d", streamid, m_samplesInRecurrentStep); + if (streamid >= GetNumParallelSequences()) + LogicError("GetSegInfo: stream id %d is larger than the number of streams %d", streamid, GetNumParallelSequences()); size_t nT = Inputs(0)->FunctionValues().GetNumCols(); if (t >= nT) LogicError("GetSegInfo: time %d times is larger than the total number of observations %d", t, nT); - int utt_t = (int)t / m_samplesInRecurrentStep; + int utt_t = (int)t / GetNumParallelSequences(); auto thisCol = m_pMBLayout->GetFrame(utt_t).first; - thisCol.Reshape(1, m_samplesInRecurrentStep); + thisCol.Reshape(1, GetNumParallelSequences()); return (int) thisCol.ColumnSlice(streamid, 1).Get00Element(); } @@ -939,12 +939,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t outputDim = Inputs(1)->FunctionValues().GetNumRows(); // save the hidden activities and output for the next minibatch - mLastOutput.Resize(outputDim, m_samplesInRecurrentStep); - mLastState.Resize(outputDim, m_samplesInRecurrentStep); + mLastOutput.Resize(outputDim, GetNumParallelSequences()); + mLastState.Resize(outputDim, GetNumParallelSequences()); - for (size_t i = 0; i < m_samplesInRecurrentStep; i++) + for (size_t i = 0; i < GetNumParallelSequences(); i++) { - for (int t = nT - m_samplesInRecurrentStep + i; t >= 0; t -= m_samplesInRecurrentStep) + for (int t = nT - GetNumParallelSequences() + i; t >= 0; t -= GetNumParallelSequences()) { if (GetSegInfo(t, i) == ((int) MinibatchPackingFlags::None)) { @@ -977,14 +977,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { tanhObs.Resize(outputDim, nT); tanhObs.SetValue(NAN); // set to this extrem value so, if anything wrong in later procedure, problems can be easily spotted. - if (m_PastState.IsEmpty() || m_PastState.GetNumCols() != m_samplesInRecurrentStep) + if (m_PastState.IsEmpty() || m_PastState.GetNumCols() != GetNumParallelSequences()) { - m_PastState.Resize(outputDim, m_samplesInRecurrentStep); + m_PastState.Resize(outputDim, GetNumParallelSequences()); m_PastState.SetValue(m_DefaultState); } - if (m_PastOutput.IsEmpty() || m_PastOutput.GetNumCols() != m_samplesInRecurrentStep) + if (m_PastOutput.IsEmpty() || m_PastOutput.GetNumCols() != GetNumParallelSequences()) { - m_PastOutput.Resize(outputDim, m_samplesInRecurrentStep); + m_PastOutput.Resize(outputDim, GetNumParallelSequences()); } #ifdef DEBUG_DECODER @@ -994,21 +994,21 @@ namespace Microsoft { namespace MSR { namespace CNTK { fprintf(stderr, "LSTM node %ls past state norm = %.8e\n", this->NodeName().c_str(), m_PastState.FrobeniusNorm()); #endif - for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += m_samplesInRecurrentStep) + for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { - FrameRange frameRange(timeIdxInSeq, m_samplesInRecurrentStep); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); + FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); + Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); - Matrix sliceTanhInput = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), m_samplesInRecurrentStep); + Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceTanhInput = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, m_samplesInRecurrentStep, m_DefaultState, &m_pMBLayout->GetM()); + PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM()); EvaluateThisNodeS(Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(4)->FunctionValues(), sliceObs, mSlicePrevOutput, mSlicePrevState, sliceOutput, sliceState, sliceGi, sliceGf, sliceGo, sliceTanhState, sliceTanhInput, m_tempMatrix); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index ddafb73b04de..6f608b529d88 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -1237,8 +1237,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { FunctionValues().SetValue(0.0); Matrix funcVal = FunctionValues(); - size_t nstep = ncol / m_samplesInRecurrentStep; - for (size_t i = 0; i < m_samplesInRecurrentStep; i++) + size_t nstep = ncol / GetNumParallelSequences(); + for (size_t i = 0; i < GetNumParallelSequences(); i++) { Matrix postProbSlice = mPostProb.ColumnSlice(i * nstep, nstep); Matrix alphaSlice = mAlpha.ColumnSlice(i * nstep, nstep); @@ -1269,9 +1269,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { else if (inputIndex == 2) { size_t ncol = mAlpha.GetNumCols(); - size_t nstep = ncol / m_samplesInRecurrentStep; + size_t nstep = ncol / GetNumParallelSequences(); assert(Inputs(inputIndex)->GradientValues().GetNumElements() > 0); - for (size_t i = 0; i < m_samplesInRecurrentStep; i++) + for (size_t i = 0; i < GetNumParallelSequences(); i++) { ErrorSignalToTransitionNode( Inputs(0)->FunctionValues().ColumnSlice(i * nstep, nstep), diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 224835b5f665..9a639422aff3 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -29,10 +29,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: This may not belong here, but having it in ComputeNode would require syntax changes, while having it as a member here only requires a local find-replace. Let's make it work first, then decide how to refactor. // the looping versions of EvaluateThisNode() and ComputeInputPartial() take a frame range, through this structure // It can cast from a size_t, i.e. those functions can be called passing a size_t in place of the FrameRange. - // TODO: m_samplesInRecurrentStep should be subsumed here & removed from nodes + // TODO: GetNumParallelSequences() should be subsumed here & removed from nodes // TODO: Where this design currently breaks: - // - BatchModeNodes must access m_samplesInRecurrentStep, yet operate on the whole sequence - // - likewise, LSTMNode does its own iteration, hence needs access to m_samplesInRecurrentStep or NumCols() in the whole-batch iterator + // - BatchModeNodes must access GetNumParallelSequences(), yet operate on the whole sequence + // - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator // - RecurrentNodes access frames with a time shift, where out-of-bounds ones access a different matrix' values // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) From 42924c43a9efdd128c9da4c93dd5c18094111130 Mon Sep 17 00:00:00 2001 From: erw Date: Fri, 18 Sep 2015 18:06:13 -0700 Subject: [PATCH 09/44] Add ReviseParameter function to MEL so users can revise the parameters of a given model. An example: > cat example.config command=edit precision=float deviceId=-1 edit=[ action=edit editPath=example.mel ] > cat example.mel model1 = LoadModel("lstm.model.100", format=cntk); ReviseParameter("GlobalPrior", //path/to/prior/vector); SaveModel(model1, "lstm.model.100.priorfloored"); > cat //path/to/prior/vector 5.2499845e-006 4.88998558e-006 4.89998547e-006 0.000525058422 0.000597978244 ... > $CNTK configFile=example.config --- MachineLearning/CNTK/ModelEditLanguage.cpp | 24 +++++++++++++++++++ .../InputAndParamNodes.h | 18 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/MachineLearning/CNTK/ModelEditLanguage.cpp b/MachineLearning/CNTK/ModelEditLanguage.cpp index 8e6641be5ee5..a31c4f386574 100644 --- a/MachineLearning/CNTK/ModelEditLanguage.cpp +++ b/MachineLearning/CNTK/ModelEditLanguage.cpp @@ -592,6 +592,30 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa netNdlFrom->cn->RenameNode(node, nodeName.second); } } + else if (EqualInsensitive(name, "ReviseParameter")) + { + typedef LearnableParameter LearnableParameterNode; + if (params.size() != 2) + RuntimeError("Invalid number of parameters: Valid parameters are: ReviseParameter(nodeName, nodeParametersInASCIIPathName)"); + std::string nodeName = params[0]; + std::string paramPath = params[1]; + + NetNdl* netNdl; + vector nodes = FindSymbols(params[0], netNdl); + + for (auto pNodes : nodes) + { + if (pNodes->OperationName() != LearnableParameter::TypeName()) + { + fprintf(stderr, "WARNING: you want to change the parameter of node (%ls), but it is not a learnable parameter (it is a %ls node). Skipping this node\n", + pNodes->NodeName().c_str(), pNodes->OperationName().c_str()); + continue; + } + shared_ptr pParamNode = std::dynamic_pointer_cast(pNodes); + pParamNode->ReviseFromFile(msra::strfun::mbstowcs(paramPath)); + fprintf(stderr, "Revise node %ls using parameter file %s\n", pNodes->NodeName().c_str(), paramPath.c_str()); + } + } else { RuntimeError("Unknown Editor function %s", name.c_str()); diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 1be91c8ef041..c0c551c31252 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -111,6 +111,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { auto array = File::LoadMatrixFromTextFile(msra::strfun::utf8(initFromFilePath), numRows, numCols); // TODO: change pathname to wstring FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, m_deviceId); } + + void ReviseFromFile(const std::wstring & reviseFromFilePath) + { + size_t numRows = 0; + size_t numCols = 0; + auto array = File::LoadMatrixFromTextFile(msra::strfun::utf8(reviseFromFilePath), numRows, numCols); // TODO: change pathname to wstring + size_t nRows = m_functionValues.GetNumRows(); + size_t nCols = m_functionValues.GetNumCols(); + + if (numRows != nRows || numCols != nCols) + { + RuntimeError("Error in ReviseFromFile for node %ls using file %ls: original size (%d x %d) vs current size (%d x %d)", + m_nodeName.c_str(), reviseFromFilePath.c_str(), nRows, nCols, numRows, numCols); + } + + FunctionValues().SetValue(numRows, numCols, array.data(), matrixFlagNormal, m_deviceId); + + } virtual const std::wstring OperationName() const {return TypeName();} From 93d93e0a5abcb93eba4a53be23414d2ecbe35281 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 20:53:24 -0700 Subject: [PATCH 10/44] ComputationNetwork now has a second layout, pMBNoLayout, which matches pMBLayout in #sequences but is otherwise empty, and used for nodes that do not require sequential processing; m_samplesInRecurrentStep now gone from ComputationNode, if needed, the value is determined from pMBLayout--yay! One more down; Matrix::SetValue() now happily accepts empty matrices (no reason why it should not); (made gc happy again) --- .../ComputationNetwork.h | 22 +++++++++--------- .../ComputationNode.h | 23 ++++++++----------- Math/Math/Matrix.cpp | 10 ++++---- Math/Math/Matrix.h | 7 ++++-- Tests/Speech/README.txt | 7 +++++- 5 files changed, 38 insertions(+), 31 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index adf9136b9ad5..d251bada48b6 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -75,7 +75,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // ----------------------------------------------------------------------- ComputationNetwork(DEVICEID_TYPE deviceId = AUTOPLACEMATRIX) : - m_deviceId(deviceId), m_pMBLayout(make_shared()) + m_deviceId(deviceId), m_pMBLayout(make_shared()), m_pMBNoLayout(make_shared()) { m_randomSeedOffset = 0; m_actMiniBSize = 0; @@ -595,12 +595,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // TODO: rename to ForwardProp()? To make it very clear? void Evaluate(const ComputationNodeBasePtr rootNode) { - // checks that will disappear once we complete the refactoring. If this passes for a while, we will eliminate one - // If this fails, comment this out (it is safe) and tell fseide@microsoft.com. - if (GetNumParallelSequences() != m_pMBLayout->GetNumParallelSequences()) - LogicError("Evaluate: detected that m_nbrSlicesInEachRecurrentIteration != m_pMBLayout->GetNumParallelSequences()"); - if (m_pMBLayout->GetNumTimeSteps() != m_pMBLayout->GetSize()) - LogicError("Evaluate: detected that m_pMBLayout->GetNumTimeSteps() != m_pMBLayout->GetSize()"); + // We have a matching layout structure that matches pMBLayout in number of sequences while not having any flags set. + // This is used for nodes that do not need recurrent processing, but can be done in batch. + // TODO: Does it harm if we have flags, for those that can be done in batch? I.e. why don't we just always provide flags? + m_pMBNoLayout->Resize(m_pMBLayout->GetNumParallelSequences(), 0); // TODO: this is not nice, but we currently have no trigger to detect changes in layout // prepare to compute with the subnetwork that this rootNode depends on, including // - auto-detecting recurrent loops @@ -623,10 +621,11 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // TODO: in the future, these will be different on different nodes for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { - // TODO: nbrSlices set once to the same value for all nodes each evaluation--is it ever changed later? - (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); if ((*nodeIter)->ReqMultiSeqHandling()) (*nodeIter)->ResetBound(m_pMBLayout); + else + (*nodeIter)->ResetBound(m_pMBNoLayout); + (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); } for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) @@ -723,7 +722,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); // TODO: move to FrameRange object + (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); // TODO: move to FrameRange object (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -734,7 +733,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) { - (*nodeIter)->SetNumParallelSequences(GetNumParallelSequences()); + (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); (*nodeIter)->ComputeGradientForChildren(timeIndex); } } @@ -1580,6 +1579,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // used for sentence boundary information passed from reader to reset RNN state // specify how the minibatch is packed for each sample MBLayoutPtr m_pMBLayout; + MBLayoutPtr m_pMBNoLayout; // this one is a dummy, passed when no layout is available/should be used int m_actMiniBSize; diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 8b5f0265f403..28887e96e55f 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -246,23 +246,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { return m_loopId; } - // TODO: these two will disappear once the information is correctly held in a FrameRange record - // This is called at 3 places; two are directly before ComputeGradientForChildren(). - void SetNumParallelSequences(size_t bsz) + // temporary function that is called to verify stuff is called as I think it is. Delete if this does not fire for a while. + void VerifyNumParallelSequences(size_t bsz) { - m_samplesInRecurrentStep = bsz; + //m_samplesInRecurrentStep = bsz; + if (bsz != m_pMBLayout->GetNumParallelSequences()) + LogicError("VerifyNumParallelSequences: value inconsistent with MB layout"); } - // Note: only used in one place, SimpleEvaluator.h PreComputeActivityAtTime(). - // The member is, however, read out at 284 places inside nodes, - // most of the time as + // This is used at 284 places inside nodes, most of the time as // FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()) - // This expression will be turned into a function call to right here, so that we compute this only at one place - // and can also handle the full-minibatch case. - // Let us try to get this member out of this class altogether; it belongs elsewhere. size_t GetNumParallelSequences() const { - return m_samplesInRecurrentStep; + //return m_samplesInRecurrentStep; + return m_pMBLayout->GetNumParallelSequences(); } int64_t UpdateEvalTimeStamp() @@ -682,7 +679,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// the order in reverse graph. int m_visitedOrder; int m_index; - int m_lowlink; + int m_lowlink; // TODO: comment this, as it is not obvious bool m_visited; bool m_inStack; int m_indexInLoop; @@ -1281,7 +1278,7 @@ protected: \ using Base::m_visitedOrder; using Base::m_index; using Base::m_lowlink; using Base::m_visited; using Base::m_inStack; \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ - using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; \ + using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ using Base::m_children; using Base::m_deviceId; using Base::m_evalTimeStamp; using Base::m_functionValues; using Base::m_gradientValues; \ using Base::m_inputChannels; using Base::m_inputHeight; using Base::m_inputWidth; using Base::m_needGradient; using Base::m_nodeName; \ using Base::m_outputChannels; using Base::m_outputHeight; using Base::m_outputWidth; using Base::s_constOnes; using Base::s_timeStampCounter; \ diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 31199feb646a..e122905be16f 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1005,8 +1005,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void Matrix::SetValue(const ElemType v) { - if (IsEmpty()) - throw std::logic_error("SetValue: Matrix is empty."); + if (IsEmpty()) // if empty then we are done + return; + //throw std::logic_error("SetValue: Matrix is empty."); DISPATCH_MATRIX_ON_FLAG(this, this, @@ -1020,8 +1021,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void Matrix::SetValue(const DeviceBoundNumber& db_number) { - if (IsEmpty()) - throw std::logic_error("SetValue: Matrix is empty."); + if (IsEmpty()) // if empty then we are done + return; + //throw std::logic_error("SetValue: Matrix is empty."); DISPATCH_MATRIX_ON_FLAG(this, this, diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 9a639422aff3..2fc25539fb26 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -602,10 +602,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { // these accessors were for now just collected from actual usage; need to be cleaned up once this compiles again size_t GetNumTimeSteps() const { validate(); return m_sentenceBoundaryFlags.GetNumCols(); } - size_t GetNumParallelSequences() const { return IsAllNone() ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix + size_t GetNumParallelSequences() const { return (m_sentenceBoundaryFlags.GetNumRows() == 0) ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix size_t GetSize() const { validate(); return m_minibatchPackingFlags.size(); } - // ^^ TODO: add a check whether Size() == GetNumTimeSteps(); it really should, unless I misunderstood + + // if we have no matrix/vector, this means no frame has any flag set + // We still can have a number of rows in this case. bool IsAllNone() const { validate(); return m_minibatchPackingFlags.empty(); } + #if 0 // we have this pattern often: // TODO: mbSize and #slices must also move into MBLayout evalnet->SetActualMiniBatchSize(mbSize); diff --git a/Tests/Speech/README.txt b/Tests/Speech/README.txt index 93287fcbea3a..58ce4785c15c 100644 --- a/Tests/Speech/README.txt +++ b/Tests/Speech/README.txt @@ -26,8 +26,13 @@ bin/cntk configFile=Tests/Speech/QuickE2E/cntk.config RunDir=Tests/Speech/RunDir WORKING DIR: $(SolutionDir)Tests\Speech\Data COMMAND: configFile=$(SolutionDir)Tests\Speech\LSTM\cntk.config stderr=$(SolutionDir)Tests\Speech\RunDir\LSTM\models\cntkSpeech.dnn.log RunDir=$(SolutionDir)Tests\Speech\RunDir\LSTM NdlDir=$(SolutionDir)Tests\Speech\LSTM DataDir=$(SolutionDir)Tests\Speech\Data DeviceId=Auto +--- MNIST: + +WORKING DIR: $(SolutionDir)ExampleSetups\Image\MNIST +COMMAND: configFile=02_Conv.config configName=02_Conv + + Simple test ----------- -../build/debug/bin/cntk configFile=/home/cbasoglu/src/cntk/.run-linux/Simple.conf COMMAND: configFile=$(SolutionDir)Demos\Simple\Simple.config stderr=$(SolutionDir)Demos\Simple\RunDir\Simple.config.log RootDir=$(SolutionDir) DeviceNumber=-1 From a1173a48ccb3381f4a6e1798902a0237827aff57 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 21:12:13 -0700 Subject: [PATCH 11/44] fixed the spelling/casing of a few recurrence-related ComputationNode member accessors --- .../ComputationNetwork.cpp | 34 ++++---- .../ComputationNode.h | 86 +++++-------------- .../EvaluationCriterionNodes.h | 2 +- .../LinearAlgebraNodes.h | 20 ++--- 4 files changed, 51 insertions(+), 91 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index 960cf8c45d54..681d7d09b6d7 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -408,9 +408,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { void ComputationNetwork::ClearCalcOrderCaches() { - for (std::map>::iterator it = m_cacheEvalOrders.begin(); it != m_cacheEvalOrders.end(); ++it) - for (auto iter2 = m_cacheEvalOrders[it->first].begin(); iter2 != m_cacheEvalOrders[it->first].end(); iter2++) - (*iter2)->clearCache(); + for (auto it : m_cacheEvalOrders) + for (auto iter2 : m_cacheEvalOrders[it.first]) + iter2->ClearCache(); m_cacheEvalOrders.clear(); m_cacheGradientCalcOrders.clear(); } @@ -419,15 +419,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { { /// merge loops if they have the same source node std::vector m_recurrentInfoTmp; - if (m_recurrentInfo.size() <= 1) - return; + if (m_recurrentInfo.size() <= 1) + return; for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++) { if (m_recurrentInfoTmp.size() == 0) { RecurrentInfo rInfo; - rInfo.Copy(*iter); + rInfo.Copy(*iter); m_recurrentInfoTmp.push_back(rInfo); } else @@ -476,7 +476,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { std::list sccStack; size_t index = 0; size_t loopId = 0; - if (rootNode->isVisisted() == false) + if (rootNode->IsVisisted() == false) strongSCC(rootNode, sccStack, index, loopId); } @@ -486,7 +486,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t& index, size_t& loopId) { cur->SetIndex(index); - cur->Setlowlink(index); + cur->SetLowLink(index); index++; cur->SetVisited(true); @@ -498,19 +498,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { // pairnetwork is the socket from other network, so ignore its children, which are in the other networks for (int i = 0; i < cur->ChildrenSize(); i++) { - if (cur->GetChildren()[i]->isVisisted() == false) + if (cur->GetChildren()[i]->IsVisisted() == false) { strongSCC(cur->GetChildren()[i], sccStack, index, loopId); - cur->Setlowlink(min(cur->Getlowlink(), cur->GetChildren()[i]->Getlowlink())); + cur->SetLowLink(min(cur->GetLowLink(), cur->GetChildren()[i]->GetLowLink())); } - else if (cur->GetChildren()[i]->isInStack()) + else if (cur->GetChildren()[i]->IsInStack()) { - cur->Setlowlink(min(cur->Getlowlink(), cur->GetChildren()[i]->Getlowlink())); + cur->SetLowLink(min(cur->GetLowLink(), cur->GetChildren()[i]->GetLowLink())); } } } - if (cur->Getlowlink() == cur->GetIndex()) // something special has happened --TODO: comment what that was!! + if (cur->GetLowLink() == cur->GetIndex()) // something special has happened --TODO: comment what that was!! { RecurrentInfo rInfo; rInfo.m_loopId = loopId; @@ -549,7 +549,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { cur->OperationName() != OperationNameOf(FutureValueNode)) { for (size_t i = 0; i < cur->ChildrenSize(); i++) - if (cur->GetChildren()[i]->LoopId() == cur->LoopId()) + if (cur->GetChildren()[i]->GetLoopId() == cur->GetLoopId()) getLoopForwordOrder(visited, recStack, nodesStack, cur->GetChildren()[i]); } recStack.erase(cur); @@ -626,7 +626,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodeBasePtr nodeRecIter = (*iter).m_recurrentNodes[j]; for (size_t i = 0; i < nodeRecIter->ChildrenSize(); i++) { - if (nodeRecIter->GetChildren()[i]->LoopId() == nodeRecIter->LoopId() && + if (nodeRecIter->GetChildren()[i]->GetLoopId() == nodeRecIter->GetLoopId() && nodeRecIter->OperationName() != OperationNameOf(PastValueNode) && nodeRecIter->OperationName() != OperationNameOf(FutureValueNode)) // TODO: test for type RecurrentNode instead? { @@ -681,8 +681,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { DetermineLoopTypes(); - for (auto iter = nodes.begin(); iter != nodes.end(); iter++) - (*iter)->clearCache(); + for (auto iter : nodes) + iter->ClearCache(); } void ComputationNetwork::DetermineLoopTypes() diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 28887e96e55f..b873a9612345 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -75,7 +75,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_samplesInRecurrentStep(1), m_visitedOrder(-1), m_index(-1), - m_lowlink(-1), + m_lowLink(-1), m_indexInLoop(0), m_visited(false), m_inStack(false), @@ -172,79 +172,39 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_pMBLayout = pMBLayout; } - void SetLoopId(const int id) { m_loopId = id; } - void SetVisitedOrder(const int id) { m_visitedOrder = id; } - void SetIndex(const size_t ind) { m_index = ind; } - - void Setlowlink(const size_t lowlink) - { - m_lowlink = lowlink; - } - - void SetVisited(const bool visited) - { - m_visited = visited; - } - - void SetInStack(const bool instack) - { - m_inStack = instack; - } - - void SetIndexInLoop(const size_t index) - { - m_indexInLoop = index; - } - - void clearCache() + void ClearCache() { m_loopId = -1; m_visitedOrder = -1; m_index = -1; - m_lowlink = -1; + m_lowLink = -1; m_indexInLoop = 0; m_visited = false; m_inStack = false; } - size_t GetIndex() const - { - return m_index; - } + void SetLoopId(const int id) { m_loopId = id; } + int GetLoopId() const { return m_loopId; } - size_t GetVisitedOrder() const - { - return m_visitedOrder; - } + void SetVisitedOrder(const int id) { m_visitedOrder = id; } + size_t GetVisitedOrder() const { return m_visitedOrder; } - size_t Getlowlink() const - { - return m_lowlink; - } + void SetIndex(const size_t ind) { m_index = ind; } + size_t GetIndex() const { return m_index; } - size_t GetIndexInLoop() const - { - return m_indexInLoop; - } + void SetLowLink(const size_t lowlink) { m_lowLink = lowlink; } + size_t GetLowLink() const { return m_lowLink; } - std::wstring GetName() const - { - return m_nodeName; - } + void SetVisited(const bool visited) { m_visited = visited; } + bool IsVisisted() const { return m_visited; } - bool isVisisted() const - { - return m_visited; - } + void SetInStack(const bool instack) { m_inStack = instack; } + bool IsInStack() const { return m_inStack; } - bool isInStack() const - { - return m_inStack; - } - int LoopId() const - { - return m_loopId; - } + void SetIndexInLoop(const size_t index) { m_indexInLoop = index; } + size_t GetIndexInLoop() const { return m_indexInLoop; } + + std::wstring GetName() const { return m_nodeName; } // temporary function that is called to verify stuff is called as I think it is. Delete if this does not fire for a while. void VerifyNumParallelSequences(size_t bsz) @@ -523,8 +483,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!IsLeaf()) m_needGradient = ChildrenNeedGradient(); //only nodes that require gradient calculation is included in gradient calculation - if (LoopId() >= 0) - recurrentResult[LoopId()].push_back(shared_from_this()); + if (GetLoopId() >= 0) + recurrentResult[GetLoopId()].push_back(shared_from_this()); else noRecurrentResult.push_back(shared_from_this()); //we put this in the list even if it's leaf since we need to use it to determine learnable params } @@ -679,7 +639,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// the order in reverse graph. int m_visitedOrder; int m_index; - int m_lowlink; // TODO: comment this, as it is not obvious + int m_lowLink; // TODO: comment this, as it is not obvious bool m_visited; bool m_inStack; int m_indexInLoop; @@ -1275,7 +1235,7 @@ public: \ using Base::SaveToFile; using Base::SetFunctionAndGradientSize; using Base::SetInput; using Base::Validate; \ protected: \ using Base::m_loopId; using Base::m_samplesInRecurrentStep; \ - using Base::m_visitedOrder; using Base::m_index; using Base::m_lowlink; using Base::m_visited; using Base::m_inStack; \ + using Base::m_visitedOrder; using Base::m_index; using Base::m_lowLink; using Base::m_visited; using Base::m_inStack; \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ diff --git a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h index 004c0c449abd..a60ea5c604e1 100644 --- a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h @@ -90,7 +90,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LogicError("ErrorPrediction operation: one of the operants has 0 element."); if (((!(Inputs(0)->FunctionValues().GetNumRows() == Inputs(1)->FunctionValues().GetNumRows() && //match size - Inputs(0)->FunctionValues().GetNumCols() == Inputs(1)->FunctionValues().GetNumCols()) )) && Inputs(0)->LoopId() < 0) + Inputs(0)->FunctionValues().GetNumCols() == Inputs(1)->FunctionValues().GetNumCols()) )) && Inputs(0)->GetLoopId() < 0) { LogicError("The Matrix dimension in the ErrorPrediction operation does not match."); } diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index ebc7eef2d904..69810bfd5674 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -832,22 +832,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols(); size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - if ((rows0 == 0 || cols1 == 0 ) && this->LoopId() < 0) + if ((rows0 == 0 || cols1 == 0 ) && this->GetLoopId() < 0) throw logic_error("Times operation: Inputs(0)->FunctionValues().GetNumRows() and Inputs(1)->FunctionValues().GetNumCols() should not be 0 since it cannot be automatically inferred"); // TODO: use dynamic_pointer_cast // TODO: why should these nodes even care whether their inputs are LearnableParmaeters? If needed, can the base class do this? - if ((Inputs(0)->OperationName() == OperationNameOf(LearnableParameter) && cols0 == 0 && rows1 != 0) && this->LoopId() < 0) + if ((Inputs(0)->OperationName() == OperationNameOf(LearnableParameter) && cols0 == 0 && rows1 != 0) && this->GetLoopId() < 0) Inputs(0)->FunctionValues().Resize(rows0, rows1); if (Inputs(1)->OperationName() == OperationNameOf(LearnableParameter) && cols0 != 0 && rows1 == 0) Inputs(1)->FunctionValues().Resize(cols0, cols1); - if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements())&& this->LoopId() < 0) + if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements())&& this->GetLoopId() < 0) LogicError("Times operation: One of the operants has 0 elements."); //cols0 and rows1 may have been changed so don't use them in the following check - if ((Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumCols()) && this->LoopId() < 0) + if ((Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumCols()) && this->GetLoopId() < 0) { LogicError("The Matrix dimension in the Times operation does not match."); } @@ -1000,20 +1000,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols(); size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - if ((rows0 == 0 || cols1 == 0) && this->LoopId() < 0) + if ((rows0 == 0 || cols1 == 0) && this->GetLoopId() < 0) throw logic_error("TransposeTimes operation: Inputs(0)->FunctionValues().GetNumRows() and Inputs(1)->FunctionValues().GetNumCols() should not be 0 since it cannot be automatically inferred"); - if ((Inputs(0)->OperationName() == OperationNameOf(LearnableParameter) && cols0 == 0 && rows1 != 0) && this->LoopId() < 0) + if ((Inputs(0)->OperationName() == OperationNameOf(LearnableParameter) && cols0 == 0 && rows1 != 0) && this->GetLoopId() < 0) Inputs(0)->FunctionValues().Resize(rows0, rows1); if (Inputs(1)->OperationName() == OperationNameOf(LearnableParameter) && cols0 != 0 && rows1 == 0) Inputs(1)->FunctionValues().Resize(cols0, cols1); - if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements()) && this->LoopId() < 0) + if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements()) && this->GetLoopId() < 0) LogicError("TransposeTimes operation: One of the operants has 0 elements."); //cols0 and rows1 may have been changed so don't use them in the following check - if ((Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumRows()) && this->LoopId() < 0) + if ((Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumRows()) && this->GetLoopId() < 0) { LogicError("The Matrix dimension in the TransposeTimes operation does not match."); } @@ -1679,7 +1679,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Inputs(index)->FunctionValues().Resize(rows, cols); } - if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements()) && this->LoopId() < 0) + if ((Inputs(0)->FunctionValues().HasNoElements() || Inputs(1)->FunctionValues().HasNoElements()) && this->GetLoopId() < 0) LogicError("Plus operation: one of the operants has 0 element."); size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols(); @@ -1689,7 +1689,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { !((rows0 == 1 || rows1 == 1) && cols0 == cols1) && //one is row vec !( (cols0 > cols1 && cols0 % cols1 == 0) || (cols0 == 1 && rows1 % rows0 == 0) || - (cols1 == 1 && rows0 % rows1 == 0))) && this->LoopId() < 0) //one is col vec with divisable rows, including scalar + (cols1 == 1 && rows0 % rows1 == 0))) && this->GetLoopId() < 0) //one is col vec with divisable rows, including scalar { LogicError("The Matrix dimension in the Plus operation does not match."); } From 5b27cda6a8b43929c9ae584bc3fa84454a77eb49 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 21:19:27 -0700 Subject: [PATCH 12/44] (made nvcc/Linux happy) --- Math/Math/MatrixQuantizerGPU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Math/Math/MatrixQuantizerGPU.h b/Math/Math/MatrixQuantizerGPU.h index 9247bcbf1ce5..0435452cfc6a 100644 --- a/Math/Math/MatrixQuantizerGPU.h +++ b/Math/Math/MatrixQuantizerGPU.h @@ -1,7 +1,7 @@ #pragma once +#include "QuantizedMatrix.h" // TODO: strangely, this must be included first, although it is the first thing MatrixQuantizer.h includes. Without, nvcc fails. #include "MatrixQuantizer.h" -#include "QuantizedMatrix.h" #include "ColumnQuantizer.h" #include "GPUMatrix.h" #ifndef CPUONLY From 4b7b6855c96f67387c73f11d60f9f0523789de3e Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 21:27:12 -0700 Subject: [PATCH 13/44] HTKMLFReader::CopyMBLayoutTo() now resets pMBLayout to default in frame mode (instead of leaving it untouched--readers should not make assumptions on downstream consumers' defaults) --- DataReader/HTKMLFReader/HTKMLFReader.cpp | 3 ++- Math/Math/Matrix.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DataReader/HTKMLFReader/HTKMLFReader.cpp b/DataReader/HTKMLFReader/HTKMLFReader.cpp index ec015a65e51d..8ee76f3152c7 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.cpp +++ b/DataReader/HTKMLFReader/HTKMLFReader.cpp @@ -1607,7 +1607,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (!m_framemode) *pMBLayout = *m_pMBLayout; - // TODO: what about frame mode? Should we create a dummy one? Or Clear() it? Reader should not know what ComputationNetworks' defaults are. + else + pMBLayout->SetAllNone(); // no flags in frame mode } diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 2fc25539fb26..34eac851b7f8 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -608,6 +608,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // if we have no matrix/vector, this means no frame has any flag set // We still can have a number of rows in this case. bool IsAllNone() const { validate(); return m_minibatchPackingFlags.empty(); } + void SetAllNone() { Resize(0, 0); } #if 0 // we have this pattern often: // TODO: mbSize and #slices must also move into MBLayout From a47889e500d642789faf4d0da3ad1b885f81b972 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 22:14:24 -0700 Subject: [PATCH 14/44] unified the two versions of FindInRecurrentLoops(); renamed m_actMiniBSize to m_actualMBSize; moved EvaluateLoop() back to Evaluate(), no point in having a separate function --- .../ComputationNetwork.cpp | 4 +- .../ComputationNetwork.h | 147 ++++++++++-------- MachineLearning/CNTKSGDLib/SGD.cpp | 8 +- 3 files changed, 87 insertions(+), 72 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index 681d7d09b6d7..8accfc5dfde5 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -743,10 +743,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { accessed.assign(m_recurrentInfo.size(), false); for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) { - int iId = FindInRecurrentLoop(*nodeIter); + const vector* pRecurrentNodesDummy; + int iId = FindInRecurrentLoops(*nodeIter, pRecurrentNodesDummy); if (iId >= 0) { - if (!accessed[iId]) { newList.insert(newList.end(), diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index d251bada48b6..b1ea714fd516 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -78,7 +78,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb m_deviceId(deviceId), m_pMBLayout(make_shared()), m_pMBNoLayout(make_shared()) { m_randomSeedOffset = 0; - m_actMiniBSize = 0; + m_actualMBSize = 0; SetDeviceId(deviceId); } @@ -513,24 +513,26 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // evaluation // ----------------------------------------------------------------------- - int FindInRecurrentLoop(const ComputationNodeBasePtr startNode, vector& recurrentNodes) + // find if node is part of a recurrent loop; and return the loop id + // If found then return a pointer to the list of nodes of this loop. + // TODO: This should just return &m_recurrentInfo of the matching loop, or nullptr if no match. If needed, m_recurrentInfo knows its loop id. + int FindInRecurrentLoops(const ComputationNodeBasePtr node, const vector* & pRecurrentNodes) const { - int iFound = -1; - - for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++) + // look in all recurrent loops of the network + for (const auto & iter : m_recurrentInfo) { - if (std::find((*iter).m_recurrentNodes.begin(), (*iter).m_recurrentNodes.end(), startNode) != (*iter).m_recurrentNodes.end()) + if (std::find(iter.m_recurrentNodes.begin(), iter.m_recurrentNodes.end(), node) != iter.m_recurrentNodes.end()) { - iFound = (*iter).m_loopId; - recurrentNodes = (*iter).m_recurrentNodesForForward; - break; + // found + pRecurrentNodes = &iter.m_recurrentNodesForForward; + return iter.m_loopId; } } - - return iFound; + return -1; // not part of a recurrent loop } - int FindInRecurrentLoop(const ComputationNodeBasePtr startNode) +#if 0 + int FindInRecurrentLoops(const ComputationNodeBasePtr startNode) const { int iFound = -1; @@ -545,45 +547,12 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb return iFound; } +#endif bool IsFuncValueOlderThanInputs(const std::vector& recurrentNodes); - void EvaluateLoop(std::list& /*allNodes*/, const ComputationNodeBasePtr startNode) + void EvaluateLoop(const ComputationNodeBasePtr startNode) { - std::vector recurrentNodes; - int iLoopId = FindInRecurrentLoop(startNode, recurrentNodes); - if (iLoopId != -1 && IsFuncValueOlderThanInputs(recurrentNodes) && m_recurrentInfo[iLoopId].m_completedEvaluate == false) - { - for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) - (*nodeIter)->SetFunctionAndGradientSize(m_actMiniBSize); - - int iMBSize = m_actMiniBSize / GetNumParallelSequences(); - - if (m_recurrentInfo[iLoopId].m_isForwardLoop) - { - for (int timeIndex = 0; timeIndex < iMBSize; timeIndex ++) - { - for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) - { - (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); - (*nodeIter)->UpdateEvalTimeStamp(); - } - } - } - else - { - for (int timeIndex = iMBSize-1; timeIndex >= 0; timeIndex--) - { - for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) - { - (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); - (*nodeIter)->UpdateEvalTimeStamp(); - } - } - } - - m_recurrentInfo[iLoopId].m_completedEvaluate = true; - } } bool IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr); @@ -618,7 +587,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb m_recurrentInfo[i].m_completedEvaluate = false; // pass #slices and MB layout to all nodes - // TODO: in the future, these will be different on different nodes + // TODO: in the future, these will be different on different nodes; and probably should be propagated by nodes themselves, like functionValues for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { if ((*nodeIter)->ReqMultiSeqHandling()) @@ -628,14 +597,52 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); } + // traverse all nodes in the pre-determined evaluation order for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { - // TODO: is this the frame-by-frame evaluation? Why is there no comment here?? - // evaluate all recurrence that hangs off this first - EvaluateLoop(allNodes, *nodeIter); + // --- first, evaluate all recurrence that hangs off this + + const std::vector* pRecurrentNodes; // set of nodes that participate in same loop as current node, if any + int iLoopId = FindInRecurrentLoops(*nodeIter, pRecurrentNodes); // check if this node participates in a recurrent loop + + if (iLoopId != -1 && IsFuncValueOlderThanInputs(*pRecurrentNodes) && m_recurrentInfo[iLoopId].m_completedEvaluate == false) + { + // node participates in a recurrent loop: process the loop frame by frame + for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) + (*nodeIter)->SetFunctionAndGradientSize(m_actualMBSize); + + const size_t T = m_actualMBSize / GetNumParallelSequences(); + + // for every time step run through all nodes in this particular loop + if (m_recurrentInfo[iLoopId].m_isForwardLoop) + { + for (size_t timeIndex = 0; timeIndex < T; timeIndex ++) + { + for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) + { + (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); + (*nodeIter)->UpdateEvalTimeStamp(); + } + } + } + else + { + for (size_t timeIndex = T - 1; timeIndex --> 0; ) + { + for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) + { + (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); + (*nodeIter)->UpdateEvalTimeStamp(); + } + } + } + + m_recurrentInfo[iLoopId].m_completedEvaluate = true; + } + + // --- second, do the whole batch (unless it's already done) - // now do the whole batch (unless it's already done) - if ((*nodeIter)->IsFuncValueOlderThanInputs() && (FindInRecurrentLoop(*nodeIter) == -1)) + else if (iLoopId == -1 && (*nodeIter)->IsFuncValueOlderThanInputs()) { #ifdef DISPLAY_DEBUG fprintf (stderr, "Evaluate Node: %s\n",(msra::strfun::utf8 ((*nodeIter)->NodeName())).c_str()); @@ -654,9 +661,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // resize entire network to handle a given MB size // TODO: actually it only updates nodes in m_recurrentInfo. Why? Because without recurrence, size never changes? // TODO: Is this always called with the result of DetermineActualMBSizeFromFeatures()? Why would it ever not? + // TODO: the network should know this by itself, no? void SetActualMiniBatchSize(const size_t aSize) { - m_actMiniBSize = (int) aSize; + m_actualMBSize = (int) aSize; // assume that all nodes in recurrent loops need to be reset to aSize minibatch size, so need to reset the following for (int i = 0; i < m_recurrentInfo.size(); i++) @@ -668,7 +676,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // resize function values and gradients of everything in m_recurrentInfo for (int i = 0; i < m_recurrentInfo.size(); i++) for (auto nodeIter : m_recurrentInfo[i].m_recurrentNodes) - nodeIter->SetFunctionAndGradientSize(m_actMiniBSize); + nodeIter->SetFunctionAndGradientSize(m_actualMBSize); } // it is used this way most of the time @@ -681,7 +689,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // GetMaxMBSize - Get the maximum minibatch size that will be seen in a training run // returns the result from SetActualMiniBatchSize(). Note DetermineActualMBSizeFromFeatures() also exists but returns a value derived from the inputs dimensions - size_t GetMaxMBSize() { return m_actMiniBSize; } + size_t GetMaxMBSize() { return m_actualMBSize; } #if 0 // always called in this pattern: @@ -709,18 +717,18 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb void ComputeGradientLoop(std::list& /*allNodes*/, const ComputationNodeBasePtr startNode) { - std::vector recurrentNodes; - int iLoopId = FindInRecurrentLoop(startNode, recurrentNodes); + const std::vector* pRecurrentNodes; + int iLoopId = FindInRecurrentLoops(startNode, pRecurrentNodes); if (iLoopId != -1) { if (m_recurrentInfo[iLoopId].m_completedGradient == false) { - int mbSize = m_actMiniBSize / GetNumParallelSequences(); + size_t T = m_actualMBSize / GetNumParallelSequences(); if (m_recurrentInfo[iLoopId].m_isForwardLoop) { - for (int timeIndex = mbSize - 1; timeIndex >= 0; timeIndex--) + for (size_t timeIndex = T; timeIndex --> 0; ) { - for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) + for (auto nodeIter = pRecurrentNodes->rbegin(); nodeIter != pRecurrentNodes->rend(); ++nodeIter) { (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); // TODO: move to FrameRange object (*nodeIter)->ComputeGradientForChildren(timeIndex); @@ -729,9 +737,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb } else { - for (int timeIndex = 0; timeIndex < mbSize; timeIndex++) + for (size_t timeIndex = 0; timeIndex < T; timeIndex++) { - for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) + for (auto nodeIter = pRecurrentNodes->rbegin(); nodeIter != pRecurrentNodes->rend(); ++nodeIter) { (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); (*nodeIter)->ComputeGradientForChildren(timeIndex); @@ -758,6 +766,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb RuntimeError("ComputeGradient: The root of the Gradient computation must evaluate to R1 value."); //run forward pass first + // TODO: feels out of place; can't we stick for ForwardProp()/BackwardProp()? Evaluate(rootNode); // TODO: comment what the purpose of this is @@ -1257,11 +1266,11 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { - std::vector recurrentNodes; - int iLoopId = FindInRecurrentLoop(*nodeIter, recurrentNodes); + const std::vector* pRecurrentNodes; + int iLoopId = FindInRecurrentLoops(*nodeIter, pRecurrentNodes); if (iLoopId != -1 && m_recurrentInfo[iLoopId].m_completedGradient == false) { - for (auto nodeIterInLoop = recurrentNodes.rbegin(); nodeIterInLoop != recurrentNodes.rend(); ++nodeIterInLoop) + for (auto nodeIterInLoop = pRecurrentNodes->rbegin(); nodeIterInLoop != pRecurrentNodes->rend(); ++nodeIterInLoop) AllocateGradientMatricesForChildren(*nodeIterInLoop, numParents); m_recurrentInfo[iLoopId].m_completedGradient = true; } @@ -1475,7 +1484,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb std::list& allNodes = GetGradientCalcOrder(rootNode); for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) - (*nodeIter)->ClearGradientForChildren(m_actMiniBSize); + (*nodeIter)->ClearGradientForChildren(m_actualMBSize); //for (auto nodeIter = m_recurrentInfo.begin(); nodeIter != m_recurrentInfo.end(); nodeIter++) // (*nodeIter).m_completedGradient = false; @@ -1579,9 +1588,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // used for sentence boundary information passed from reader to reset RNN state // specify how the minibatch is packed for each sample MBLayoutPtr m_pMBLayout; - MBLayoutPtr m_pMBNoLayout; // this one is a dummy, passed when no layout is available/should be used + MBLayoutPtr m_pMBNoLayout; // this alternative one is passed when no layout is available/should be used - int m_actMiniBSize; + int m_actualMBSize; // current MB size in columns --note: this is not #frames, if we have multiple parallel sequences, cf. MBLayout // main node holder std::map m_nameToNodeMap; // [name] -> node; this is the main container that holds this networks' nodes diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index e93b01140b2b..0c1ddeaf9d52 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -1968,7 +1968,13 @@ template // TODO: currently only support one node regularization if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) { - refNet.SetActualMiniBatchSize(actualMBSize); +#if 1 + size_t actualMBSize2 = refNet.SetActualMiniBatchSizeFromFeatures(); + if (actualMBSize2 != actualMBSize) + LogicError("TrainOneEpoch: refNet has different MB size than main net??"); +#else + refNet.SetActualMiniBatchSize(actualMBSize); // TODO: SetActualMiniBatchSizeFromFeatures() should have the same result, no? +#endif *refNet.GetMBLayoutPtr() = *net.GetMBLayoutPtr(); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently) refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); From ae94278f91ab7a2860989bed7fd2f9b02c49d6bc Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 22:36:19 -0700 Subject: [PATCH 15/44] FindInRecurrentLoops() now returns a pointer directly to the structure it found, instead of an index--way simpler; merged ComputeGradientLoop() into ComputeGradient(), no point in having a separate function --- .../ComputationNetwork.cpp | 10 +- .../ComputationNetwork.h | 172 ++++++++---------- .../ComputationNode.h | 4 +- 3 files changed, 78 insertions(+), 108 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index 8accfc5dfde5..5e73afcfe4f1 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -738,15 +738,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { std::list vTmp; std::list vRecurrentTmp; - //int prevId = -1; - vector accessed; - accessed.assign(m_recurrentInfo.size(), false); + vector accessed(m_recurrentInfo.size(), false); for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) { - const vector* pRecurrentNodesDummy; - int iId = FindInRecurrentLoops(*nodeIter, pRecurrentNodesDummy); - if (iId >= 0) + const RecurrentInfo * recInfo = FindInRecurrentLoops(*nodeIter); + if (recInfo) { + int iId = recInfo->m_loopId; if (!accessed[iId]) { newList.insert(newList.end(), diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index b1ea714fd516..107f6f409c77 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -516,45 +516,17 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // find if node is part of a recurrent loop; and return the loop id // If found then return a pointer to the list of nodes of this loop. // TODO: This should just return &m_recurrentInfo of the matching loop, or nullptr if no match. If needed, m_recurrentInfo knows its loop id. - int FindInRecurrentLoops(const ComputationNodeBasePtr node, const vector* & pRecurrentNodes) const + RecurrentInfo * FindInRecurrentLoops(const ComputationNodeBasePtr node) { // look in all recurrent loops of the network - for (const auto & iter : m_recurrentInfo) - { + for (auto & iter : m_recurrentInfo) if (std::find(iter.m_recurrentNodes.begin(), iter.m_recurrentNodes.end(), node) != iter.m_recurrentNodes.end()) - { - // found - pRecurrentNodes = &iter.m_recurrentNodesForForward; - return iter.m_loopId; - } - } - return -1; // not part of a recurrent loop + return &iter; + return nullptr; // not part of a recurrent loop } -#if 0 - int FindInRecurrentLoops(const ComputationNodeBasePtr startNode) const - { - int iFound = -1; - - for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++) - { - if (std::find((*iter).m_recurrentNodes.begin(), (*iter).m_recurrentNodes.end(), startNode) != (*iter).m_recurrentNodes.end()) - { - iFound = (*iter).m_loopId; - break; - } - } - - return iFound; - } -#endif - bool IsFuncValueOlderThanInputs(const std::vector& recurrentNodes); - void EvaluateLoop(const ComputationNodeBasePtr startNode) - { - } - bool IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr); void SetNodesReqMultiSeqHandling(); @@ -602,47 +574,47 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { // --- first, evaluate all recurrence that hangs off this - const std::vector* pRecurrentNodes; // set of nodes that participate in same loop as current node, if any - int iLoopId = FindInRecurrentLoops(*nodeIter, pRecurrentNodes); // check if this node participates in a recurrent loop + RecurrentInfo * recInfo = FindInRecurrentLoops(*nodeIter); // check if this node participates in a recurrent loop - if (iLoopId != -1 && IsFuncValueOlderThanInputs(*pRecurrentNodes) && m_recurrentInfo[iLoopId].m_completedEvaluate == false) + if (recInfo && IsFuncValueOlderThanInputs(recInfo->m_recurrentNodesForForward) && !recInfo->m_completedEvaluate) { + const auto & recurrentNodes = recInfo->m_recurrentNodesForForward; // node participates in a recurrent loop: process the loop frame by frame - for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) - (*nodeIter)->SetFunctionAndGradientSize(m_actualMBSize); + for (auto & nodeIter : recurrentNodes) + nodeIter->SetFunctionAndGradientSize(m_actualMBSize); const size_t T = m_actualMBSize / GetNumParallelSequences(); // for every time step run through all nodes in this particular loop - if (m_recurrentInfo[iLoopId].m_isForwardLoop) + if (recInfo->m_isForwardLoop) { - for (size_t timeIndex = 0; timeIndex < T; timeIndex ++) + for (size_t t = 0; t < T; t ++) { - for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) + for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) { - (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); + (*nodeIter)->EvaluateThisNodeGivenInputs(t); (*nodeIter)->UpdateEvalTimeStamp(); } } } else { - for (size_t timeIndex = T - 1; timeIndex --> 0; ) + for (size_t t = T - 1; t --> 0; ) { - for (auto nodeIter = pRecurrentNodes->begin(); nodeIter != pRecurrentNodes->end(); nodeIter++) + for (auto nodeIter = recurrentNodes.begin(); nodeIter != recurrentNodes.end(); nodeIter++) { - (*nodeIter)->EvaluateThisNodeGivenInputs(timeIndex); + (*nodeIter)->EvaluateThisNodeGivenInputs(t); (*nodeIter)->UpdateEvalTimeStamp(); } } } - m_recurrentInfo[iLoopId].m_completedEvaluate = true; + recInfo->m_completedEvaluate = true; } // --- second, do the whole batch (unless it's already done) - else if (iLoopId == -1 && (*nodeIter)->IsFuncValueOlderThanInputs()) + else if (!recInfo && (*nodeIter)->IsFuncValueOlderThanInputs()) { #ifdef DISPLAY_DEBUG fprintf (stderr, "Evaluate Node: %s\n",(msra::strfun::utf8 ((*nodeIter)->NodeName())).c_str()); @@ -715,42 +687,6 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb LogicError("VerifyActualNumParallelSequences: mismatching MB size in MBLayout"); } - void ComputeGradientLoop(std::list& /*allNodes*/, const ComputationNodeBasePtr startNode) - { - const std::vector* pRecurrentNodes; - int iLoopId = FindInRecurrentLoops(startNode, pRecurrentNodes); - if (iLoopId != -1) - { - if (m_recurrentInfo[iLoopId].m_completedGradient == false) - { - size_t T = m_actualMBSize / GetNumParallelSequences(); - if (m_recurrentInfo[iLoopId].m_isForwardLoop) - { - for (size_t timeIndex = T; timeIndex --> 0; ) - { - for (auto nodeIter = pRecurrentNodes->rbegin(); nodeIter != pRecurrentNodes->rend(); ++nodeIter) - { - (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); // TODO: move to FrameRange object - (*nodeIter)->ComputeGradientForChildren(timeIndex); - } - } - } - else - { - for (size_t timeIndex = 0; timeIndex < T; timeIndex++) - { - for (auto nodeIter = pRecurrentNodes->rbegin(); nodeIter != pRecurrentNodes->rend(); ++nodeIter) - { - (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); - (*nodeIter)->ComputeGradientForChildren(timeIndex); - } - } - } - - m_recurrentInfo[iLoopId].m_completedGradient = true; - } - } - } // MAIN ENTRY POINT for evaluation followed by gradient computation (forward prop then back prop) // TODO: pass a set of nodes instead of only one @@ -786,6 +722,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb if (rootGradientInitValue != nullptr) dynamic_pointer_cast>(rootNode)->GradientValues().SetValue(*rootGradientInitValue); + // process nodes in pre-determined order for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { #ifdef DISPLAY_DEBUG @@ -793,7 +730,43 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb (msra::strfun::utf8 ((*nodeIter)->OperationName())).c_str(), (msra::strfun::utf8 ((*nodeIter)->NodeName())).c_str()); #endif - ComputeGradientLoop(allNodes, *nodeIter); + // --- first, perform recurrent loops if this node participates in one + + RecurrentInfo * recInfo = FindInRecurrentLoops(*nodeIter); + if (recInfo) + { + if (recInfo->m_completedGradient == false) + { + const auto & recurrentNodes = recInfo->m_recurrentNodesForForward; + size_t T = m_actualMBSize / GetNumParallelSequences(); + if (recInfo->m_isForwardLoop) + { + for (size_t t = T; t--> 0;) + { + for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) + { + (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); + (*nodeIter)->ComputeGradientForChildren(t); + } + } + } + else + { + for (size_t t = 0; t < T; t++) + { + for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter) + { + (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); + (*nodeIter)->ComputeGradientForChildren(t); + } + } + } + + recInfo->m_completedGradient = true; + } + } + + // --- second, do whole-batch operation if not recurrent (*nodeIter)->ComputeGradientForChildren(); } @@ -1237,10 +1210,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb std::list& nodes = GetEvalOrder(rootNode, false); - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) + for (auto & nodeIter : nodes) { - (*nodeIter)->RequestEvalMatrices(m_matrixPool); - (*nodeIter)->ReleaseMatricesAfterEval(m_matrixPool); + nodeIter->RequestEvalMatrices(m_matrixPool); + nodeIter->ReleaseMatricesAfterEval(m_matrixPool); } } @@ -1251,9 +1224,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb std::list& nodes = GetEvalOrder(rootNode, false); - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) + for (auto & nodeIter : nodes) { - std::vector children = (*nodeIter)->GetChildren(); + std::vector children = nodeIter->GetChildren(); for (int i = 0; i < children.size(); i++) numParents[children[i]] ++; } @@ -1264,20 +1237,19 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb for (int i = 0; i < m_recurrentInfo.size(); i++) m_recurrentInfo[i].m_completedGradient = false; - for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) + for (auto & nodeIter : allNodes) { - const std::vector* pRecurrentNodes; - int iLoopId = FindInRecurrentLoops(*nodeIter, pRecurrentNodes); - if (iLoopId != -1 && m_recurrentInfo[iLoopId].m_completedGradient == false) + RecurrentInfo * recInfo = FindInRecurrentLoops(nodeIter); + if (recInfo && !recInfo->m_completedGradient) { - for (auto nodeIterInLoop = pRecurrentNodes->rbegin(); nodeIterInLoop != pRecurrentNodes->rend(); ++nodeIterInLoop) - AllocateGradientMatricesForChildren(*nodeIterInLoop, numParents); - m_recurrentInfo[iLoopId].m_completedGradient = true; + for (auto nodeIterInLoop : recInfo->m_recurrentNodesForForward) + AllocateGradientMatricesForChildren(nodeIterInLoop, numParents); + recInfo->m_completedGradient = true; } else - AllocateGradientMatricesForChildren(*nodeIter, numParents); + AllocateGradientMatricesForChildren(nodeIter, numParents); - (*nodeIter)->ReleaseGradientMatrices(m_matrixPool); + nodeIter->ReleaseGradientMatrices(m_matrixPool); } } @@ -1337,9 +1309,9 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb std::list& nodes = GetEvalOrder(rootNode, false); - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) - if (!(*nodeIter)->UnitTest()) - return false; + for (auto & nodeIter : nodes) + if (!nodeIter->UnitTest()) + return false; fprintf(stderr, "\n\n"); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index b873a9612345..7c95b89405fe 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -318,7 +318,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void ComputeGradientForChildren() = 0; - virtual void ComputeGradientForChildren(const size_t timeIdxInSeq) = 0; + virtual void ComputeGradientForChildren(const size_t timeIdxInSeq) = 0; // TODO: don't we need a FrameRange here, too? // TODO: some evaluation method to be abstracted, but types don't match @@ -825,7 +825,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } static void WINAPI SetToInitStateValueForResetSeg(const Matrix& sentenceBegin, - size_t nStream, ElemType initStateValue, Matrix& newprevstate) + size_t nStream, ElemType initStateValue, Matrix& newprevstate) { Matrix colSeg(sentenceBegin.GetDeviceId()); colSeg.Resize(nStream, nStream); From 35350f7f13e980bbce8b3b0924bcff4bb0b80424 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Fri, 18 Sep 2015 22:50:09 -0700 Subject: [PATCH 16/44] (#if-0'd out an unused function) --- MachineLearning/CNTKComputationNetworkLib/ComputationNode.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 7c95b89405fe..4e5c9fa0b2e8 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -824,6 +824,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { MaskToZeroWhenLabelAndFeatureMissing(m_functionValues, timeIdxInSeq); } +#if 0 // (this function cannot be used currently since sentenceBegin is not a Matrix anymore; only affects LSTMNode which is no longer used) static void WINAPI SetToInitStateValueForResetSeg(const Matrix& sentenceBegin, size_t nStream, ElemType initStateValue, Matrix& newprevstate) { @@ -849,6 +850,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// add default state value if it is for reset Matrix::MultiplyAndWeightedAdd(initStateValue, ones, false, colSeg, false, 1.0, newprevstate); /// += [0 initStateValue 0 ] } +#endif /** reset to error signals to 0 for any elements without labele From 91d7d17e5d43094d1bfedcee23c4a701b6d62a16 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Sat, 19 Sep 2015 17:14:14 -0700 Subject: [PATCH 17/44] new method DataSlice() that takes care of slicing in a unified manner. Not yet used, meant to be used for all derived classes' data access --- .../ComputationNode.h | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 4e5c9fa0b2e8..5abaae7026bb 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -128,7 +128,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Normally, N is 1 or it spans the entire minibatch. virtual void EvaluateThisNode(const FrameRange &) = 0; // evaluate a node--this calls EvaluateThisNode() and MaskToZeroWhenLabelAndFeatureMissing() if needed - // TODO: name this better--which is the main entry point? + // this is the main entry point for Network; while EvaluateThisNode() is the virtual call into specific node implementation virtual void EvaluateThisNodeGivenInputs() = 0; virtual void EvaluateThisNodeGivenInputs(const size_t timeIdxInSeq) = 0; // TODO: change to FrameRange as well @@ -655,8 +655,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { // ComputationNode -- abstract base class for computation nodes parameterized by float vs. double // ======================================================================= - // TODO: number of inputs should be a template parameter! SIZE_MAX for those that take variable numvber - template class ComputationNode : public ComputationNodeBase //Abstract Class that cannot be instantiated { @@ -768,10 +766,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_children[i] = UpCast(inputs[i]); // (this checks the type) } - //making them virtual so that nodes that only copy values from it's children (e.g., dropout) can be efficient in evaluation - virtual const Matrix& FunctionValues() const {return m_functionValues;} - virtual Matrix& FunctionValues() { return m_functionValues;} - virtual void DumpNodeInfo(const bool /*printValues*/, File& fstream) const; // TODO: similar to DumpInfo; used by ExperimentalNetworkBuilder test implementation @@ -808,14 +802,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } - /*implement*/ void EvaluateThisNodeGivenInputs() + /*implement*/void EvaluateThisNodeGivenInputs() { - EvaluateThisNode(); + EvaluateThisNode(); // this is a call to the virtual function that implements the actual operation if (!UseCustomizedMultiSeqHandling()) MaskToZeroWhenLabelAndFeatureMissing(m_functionValues); } + // TODO: use a FrameRange arg, then unify with above + // TODO: do we even need this extra function? Should Node know about this masking business, or is that the job of Network? + // TODO: rename this to make it more clear what this function does /*implement*/void EvaluateThisNodeGivenInputs(const size_t timeIdxInSeq) // TODO: change to FrameRange as well { EvaluateThisNode(FrameRange(timeIdxInSeq, GetNumParallelSequences())); @@ -853,8 +850,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif /** - reset to error signals to 0 for any elements without labele + reset to error signals to 0 for any elements without labels */ + // TODO: use a FrameRange instead of timeIdxSeq bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) const { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed @@ -954,9 +952,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } - const Matrix& GradientValues() const { return m_gradientValues; } - Matrix& GradientValues() { return m_gradientValues; } - // up-cast to make life easier static ComputationNodePtr UpCast(ComputationNodeBasePtr inode) { @@ -985,14 +980,52 @@ namespace Microsoft { namespace MSR { namespace CNTK { // expand the inputs to exist up to the desired index while (childIndex >= m_children.size()) - { - m_children.push_back(NULL); - } + m_children.push_back(nullptr); // set the input value m_children[childIndex] = node; } + //making them virtual so that nodes that only copy values from it's children (e.g., dropout) can be efficient in evaluation + virtual const Matrix& FunctionValues() const { return m_functionValues; } + virtual Matrix& FunctionValues() { return m_functionValues; } + + const Matrix& GradientValues() const { return m_gradientValues; } + Matrix& GradientValues() { return m_gradientValues; } + + // function to access any input and output, value and gradient, whole batch or single frame + // Note: This returns an object, not a reference. That object is a column slice, i.e. a small object that just points into another object. + // TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together. + // Note: This is not used anywhere yet, only a sketch how we may further abstract timing. +#define INDEX_OUT SIZE_MAX +#define SEQUENCE_ALL SIZE_MAX + enum ValueOrGradient { VAL, GRAD }; + Matrix DataSlice(size_t index/*input index or OUT*/, + ValueOrGradient valueOrGradient/*as it says*/, + FrameRange frameRange/*select frame or entire batch*/, size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) + { + ComputationNode * node = (index == INDEX_OUT) ? this : Inputs(index).get(); + Matrix & data = (valueOrGradient == VAL) ? node->FunctionValues() : node->GradientValues(); + if (frameRange.IsAllFrames()) + { + if (sequence == SEQUENCE_ALL) + return data.ColumnSlice(0, data.GetNumCols()); + else + LogicError("DataSlice: sequence index only supported when accessing individual frame"); // (not needed; doable but more involved, requiring a reshape) + } + else + { + size_t numParallelSequences = pMBLayout->GetNumParallelSequences(); + size_t startColumn = frameRange.t() * numParallelSequences; + if (sequence == SEQUENCE_ALL) + return data.ColumnSlice(startColumn, numParallelSequences); + else + return data.ColumnSlice(startColumn + sequence, 1); + } + // TODO: + } + + // this is the entry point from Network; while it will call virtual ComputeInputPartial() into the actual node implementation /*implement*/void ComputeGradientForChildren() { // batch is done only for feed-forward nodes @@ -1024,7 +1057,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif } } - + + // TODO: use a FrameRange here as well, then unify with above /*implement*/void ComputeGradientForChildren(const size_t timeIdxInSeq) { for (size_t i=0; i Date: Sun, 20 Sep 2015 21:59:16 -0700 Subject: [PATCH 18/44] Fixed a bug in the code to enable device memory peer access --- Math/Math/GPUMatrix.cu | 6 +++++- Math/Math/GPUSparseMatrix.cu | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index 228f9467778b..aaaeb36b2b85 100755 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -298,7 +298,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDA_CALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice)); if (canAccessPeer) { - CUDA_CALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0)); + cudaError_t cudaStatus = cudaDeviceEnablePeerAccess(m_computeDevice, 0); + if (cudaStatus != cudaErrorPeerAccessAlreadyEnabled) + { + CUDA_CALL(cudaStatus); + } CUDA_CALL(cudaMemcpyPeer(d_dst,to_id,m_pArray,m_computeDevice,sizeof(ElemType)*m_numRows*m_numCols)); } else diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index 5f3461ae0a92..c53ade5dd445 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -418,7 +418,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { CUDACALL(cudaDeviceCanAccessPeer(&canAccessPeer, to_id, m_computeDevice)); if (canAccessPeer) { - CUDACALL(cudaDeviceEnablePeerAccess(m_computeDevice, 0)); + cudaError_t cudaStatus = cudaDeviceEnablePeerAccess(m_computeDevice, 0); + if (cudaStatus != cudaErrorPeerAccessAlreadyEnabled) + { + CUDACALL(cudaStatus); + } CUDACALL(cudaMemcpyPeer(d_dst, to_id, m_pArray, m_computeDevice, m_totalBufferSizeAllocated)); } else From e8274968b84f5ed9ac43ec4df66ffea2d59ffe39 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 10:18:31 -0700 Subject: [PATCH 19/44] changed a few "for (auto x :" to "for ([const] auto & x :" --- .../LMSequenceReader/SequenceReader.cpp | 6 +-- .../LUSequenceReader/LUSequenceReader.cpp | 2 +- DataReader/UCIFastReader/UCIFastReader.cpp | 4 +- MachineLearning/CNTK/ModelEditLanguage.cpp | 10 ++--- MachineLearning/CNTK/ModelEditLanguage.h | 2 +- MachineLearning/CNTK/NDLNetworkBuilder.h | 2 +- .../CNTK/NetworkDescriptionLanguage.h | 2 +- .../ComputationNetwork.cpp | 44 +++++++++---------- .../ComputationNetwork.h | 10 ++--- .../NetworkBuilderFromConfig.cpp | 2 +- 10 files changed, 42 insertions(+), 42 deletions(-) diff --git a/DataReader/LMSequenceReader/SequenceReader.cpp b/DataReader/LMSequenceReader/SequenceReader.cpp index 854705ece8cd..bf9c9fb31bc1 100644 --- a/DataReader/LMSequenceReader/SequenceReader.cpp +++ b/DataReader/LMSequenceReader/SequenceReader.cpp @@ -655,7 +655,7 @@ void SequenceReader::ReadClassInfo(const wstring & vocfile, int& class LogicError("SequenceReader::ReadClassInfo the actual number of words %d is smaller than the specified vocabulary size %d. Check if labelDim is too large. ", idx4class.size(), nwords); } std::vector counts(idx4cnt.size()); - for (auto p : idx4cnt) + for (const auto & p : idx4cnt) counts[p.first] = (double)p.second; m_noiseSampler = noiseSampler(counts); @@ -689,7 +689,7 @@ void SequenceReader::InitCache(const ConfigParameters& readerConfig) found = true; } FindConfigNames(readerConfig, "wfile", names); - for (auto name : names) + for (const auto & name : names) { ConfigParameters config = readerConfig(name); filesList.push_back(config("wfile")); @@ -714,7 +714,7 @@ void SequenceReader::InitCache(const ConfigParameters& readerConfig) // now get the section names for map and category types std::map sections; m_cachingWriter->GetSections(sections); - for (auto pair : sections) + for (const auto & pair : sections) { // TODO: we would need to add a sequenceMap type here as well // or maybe change to heirarchal name (i.e. root.labelIn.map) diff --git a/DataReader/LUSequenceReader/LUSequenceReader.cpp b/DataReader/LUSequenceReader/LUSequenceReader.cpp index e50eab054989..df9bcc801598 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.cpp +++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp @@ -1272,7 +1272,7 @@ void MultiIOBatchLUSequenceReader::CopyMBLayoutTo(MBLayoutPtr pMBLayou /// run for each reader vector col; size_t rows = 0, cols = 0; - for (auto p : mReader) + for (const auto & p : mReader) { p.second->CopyMBLayoutTo(pMBLayout); if (rows == 0) diff --git a/DataReader/UCIFastReader/UCIFastReader.cpp b/DataReader/UCIFastReader/UCIFastReader.cpp index e7aeea2a8871..9f087757fe82 100644 --- a/DataReader/UCIFastReader/UCIFastReader.cpp +++ b/DataReader/UCIFastReader/UCIFastReader.cpp @@ -450,7 +450,7 @@ void UCIFastReader::InitCache(const ConfigParameters& readerConfig) found = true; } FindConfigNames(readerConfig, "wfile", names); - for (auto name : names) + for (const auto & name : names) { ConfigParameters config = readerConfig(name); filesList.push_back(config("wfile")); @@ -475,7 +475,7 @@ void UCIFastReader::InitCache(const ConfigParameters& readerConfig) // now get the section names for map and category types std::map sections; m_cachingWriter->GetSections(sections); - for (auto pair : sections) + for (const auto & pair : sections) { if (pair.second == sectionTypeCategoryLabel) { diff --git a/MachineLearning/CNTK/ModelEditLanguage.cpp b/MachineLearning/CNTK/ModelEditLanguage.cpp index a31c4f386574..6c700133ebf1 100644 --- a/MachineLearning/CNTK/ModelEditLanguage.cpp +++ b/MachineLearning/CNTK/ModelEditLanguage.cpp @@ -345,7 +345,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa // process outstanding NDL scripts ensuring that the inputs have all been resolved ProcessNDLScript(netNdlFrom, ndlPassResolve); - for (auto node : nodeTo) + for (auto & node : nodeTo) { node->SetInput(inputNum, nodeFrom[0]); } @@ -442,7 +442,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa ProcessNDLScript(netNdl, ndlPassInitial, false); ComputationNetwork* cn = netNdl->cn; - for (auto node : nodes) + for (auto & node : nodes) { switch(prop) { @@ -524,7 +524,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa // make sure all NDL links have been resolved ProcessNDLScript(netNdl, ndlPassResolve); - for (auto node : nodes) + for (auto & node : nodes) { switch(prop) { @@ -562,7 +562,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa if (nodes.size() < 1) RuntimeError("Delete must have at least one target, %s doesn't represent any items", params[i].c_str()); - for (auto node : nodes) + for (const auto & node : nodes) { netNdl->cn->DeleteNode(node->NodeName()); } @@ -603,7 +603,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa NetNdl* netNdl; vector nodes = FindSymbols(params[0], netNdl); - for (auto pNodes : nodes) + for (auto & pNodes : nodes) { if (pNodes->OperationName() != LearnableParameter::TypeName()) { diff --git a/MachineLearning/CNTK/ModelEditLanguage.h b/MachineLearning/CNTK/ModelEditLanguage.h index cda901fb57cb..85ee0297b479 100644 --- a/MachineLearning/CNTK/ModelEditLanguage.h +++ b/MachineLearning/CNTK/ModelEditLanguage.h @@ -263,7 +263,7 @@ class MELScript: public ConfigParser // this is the *.W = L2.W case // We want to find all the destination existing matches and then assign the in node to all of them - for (auto node : nodesOut) + for (const auto & node : nodesOut) { std::wstring nodeOutName = node->NodeName(); GenNameValue value(nodeIn, nodeOutName); diff --git a/MachineLearning/CNTK/NDLNetworkBuilder.h b/MachineLearning/CNTK/NDLNetworkBuilder.h index 3a9b2a669dc3..da741c6b8bac 100644 --- a/MachineLearning/CNTK/NDLNetworkBuilder.h +++ b/MachineLearning/CNTK/NDLNetworkBuilder.h @@ -138,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // "load" parameter are in fact loaded (if they were all processed at once, the last file's "load" // parameter would override all the earlier ones, and those sections wouldn't get loaded). std::vector filePathVec = msra::strfun::split(ndlMacrosPaths, "+"); - for (auto filePath : filePathVec) + for (const auto & filePath : filePathVec) { ndlScript.LoadConfigFileAndResolveVariables(msra::strfun::utf16(filePath), config); } diff --git a/MachineLearning/CNTK/NetworkDescriptionLanguage.h b/MachineLearning/CNTK/NetworkDescriptionLanguage.h index 0894d7efa140..4b92b399f010 100644 --- a/MachineLearning/CNTK/NetworkDescriptionLanguage.h +++ b/MachineLearning/CNTK/NetworkDescriptionLanguage.h @@ -577,7 +577,7 @@ class NDLScript: public ConfigParser { vector*> result; std::string empty; - for (auto symbol : m_symbols) + for (auto & symbol : m_symbols) { NDLNode* node = symbol.second; std::string value = node->GetOptionalParameter(optParamName, empty); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index 5e73afcfe4f1..cbcb2ed1ee95 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -326,9 +326,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { return false; } + // TODO: comment on who owns this flag. Is it entirely owned by Network? + // Or should the 4 node types below know? void ComputationNetwork::SetNodesReqMultiSeqHandling() { - for (auto node : m_nodesReqMultiSeqHandling) + for (auto & node : m_nodesReqMultiSeqHandling) { //SumElements node will generate a scalar value and so it should never require special handling //TransposeNode will change the size of columns and so it should also not included for special handling @@ -343,11 +345,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { //if a typical criterion node is used as the training criterion node we assume it requires multiseq handling //this is for backward compatibility - for (auto node : m_finalCriteria) + for (auto & node : m_finalCriteria) if (IsTypicalCriterionNode(node)) node->SetReqMultiSeqHandlingTo(true); - for (auto node : m_evalNodes) + for (auto & node : m_evalNodes) if (IsTypicalCriterionNode(node)) node->SetReqMultiSeqHandlingTo(true); } @@ -408,8 +410,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { void ComputationNetwork::ClearCalcOrderCaches() { - for (auto it : m_cacheEvalOrders) - for (auto iter2 : m_cacheEvalOrders[it.first]) + for (auto & it : m_cacheEvalOrders) + for (auto & iter2 : m_cacheEvalOrders[it.first]) iter2->ClearCache(); m_cacheEvalOrders.clear(); m_cacheGradientCalcOrders.clear(); @@ -681,7 +683,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { DetermineLoopTypes(); - for (auto iter : nodes) + for (auto & iter : nodes) iter->ClearCache(); } @@ -1097,7 +1099,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { wstring str = style; - for (auto x : specialNodes) + for (const auto & x : specialNodes) str = str + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); return str + L"; \n"; } @@ -1112,7 +1114,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // get precompute node std::vector PreComputedNodes; std::vector allnodes = GetAllNodes(); - for (auto n : allnodes) + for (const auto & n : allnodes) { if (n->RequiresPreCompute()) PreComputedNodes.push_back(n); @@ -1120,7 +1122,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // get PastValue node std::vector pastValueNodes; - for (auto n : allnodes) + for (const auto & n : allnodes) { if (n->OperationName() == OperationNameOf(PastValueNode) || n->OperationName() == L"Delay") pastValueNodes.push_back(n); @@ -1128,14 +1130,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { // get FuturetValue node std::vector futureValueNodes; - for (auto n : allnodes) + for (const auto & n : allnodes) { if (n->OperationName() == OperationNameOf(FutureValueNode)) futureValueNodes.push_back(n); } // get learnableParameters std::vector learnableParameters; - for (auto n : allnodes) + for (const auto & n : allnodes) { if (n->OperationName() == OperationNameOf(LearnableParameter)) learnableParameters.push_back(n); @@ -1173,7 +1175,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ////////////////////////////////////////////////////////////////////////// fstream << L"\n// add labels and operation name\n"; wstring line; - for (auto x : allnodes) + for (const auto & x : allnodes) { line.clear(); size_t nrows = x->GetNumRows(); @@ -1191,25 +1193,23 @@ namespace Microsoft { namespace MSR { namespace CNTK { fstream << L"subgraph {\n"; fstream << L"\t\t rank=source ; "; line.clear(); - for (auto x : m_features) - { + for (const auto & x : m_features) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - } fstream << line << L"\n}\n"; // subgraph eval/output/criteria fstream << L"subgraph {\n"; fstream << L"\t\t rank=sink ; "; line.clear(); - for (auto x : m_finalCriteria) + for (const auto & x : m_finalCriteria) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - for (auto x : m_nodesReqMultiSeqHandling) + for (const auto & x : m_nodesReqMultiSeqHandling) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - for (auto x : m_outputNodes) + for (const auto & x : m_outputNodes) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - for (auto x : m_pairNodes) + for (const auto & x : m_pairNodes) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - for (auto x : m_evalNodes) + for (const auto & x : m_evalNodes) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); fstream << line << L"\n}\n"; @@ -1294,7 +1294,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { vector, float>> nodeGroups; wregex NameFilter; - for (auto e : SVDConfig) + for (const auto & e : SVDConfig) { wstring regexStr = e.first; float keepRatio = e.second; @@ -1336,7 +1336,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { fprintf(stderr, "--------------------------------------------------------------------------------------------\n"); - for (auto name : group.first) + for (const auto & name : group.first) { if (m_nameToNodeMap.find(name) == m_nameToNodeMap.end()) { diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 107f6f409c77..afe489328c04 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -647,7 +647,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // resize function values and gradients of everything in m_recurrentInfo for (int i = 0; i < m_recurrentInfo.size(); i++) - for (auto nodeIter : m_recurrentInfo[i].m_recurrentNodes) + for (auto & nodeIter : m_recurrentInfo[i].m_recurrentNodes) nodeIter->SetFunctionAndGradientSize(m_actualMBSize); } @@ -1242,7 +1242,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb RecurrentInfo * recInfo = FindInRecurrentLoops(nodeIter); if (recInfo && !recInfo->m_completedGradient) { - for (auto nodeIterInLoop : recInfo->m_recurrentNodesForForward) + for (auto & nodeIterInLoop : recInfo->m_recurrentNodesForForward) AllocateGradientMatricesForChildren(nodeIterInLoop, numParents); recInfo->m_completedGradient = true; } @@ -1273,7 +1273,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // first give criteria nodes as root node if (FinalCriterionNodes().size() > 0) { - for (auto node : FinalCriterionNodes()) + for (auto & node : FinalCriterionNodes()) { if (!allowFragment) FormRecurrentLoops(node); @@ -1287,7 +1287,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // now output nodes if (OutputNodes().size() > 0) { - for (auto node : OutputNodes()) + for (auto & node : OutputNodes()) if (!UnitTest(node)) vErrors.push_back(node->NodeName().c_str()); } @@ -1296,7 +1296,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // now evaluation nodes if (EvaluationNodes().size() > 0) { - for (auto node : EvaluationNodes()) + for (auto & node : EvaluationNodes()) if (!UnitTest(node)) vErrors.push_back(node->NodeName().c_str()); } diff --git a/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp b/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp index ad745cf56893..d2d04b7ad0d7 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp @@ -687,7 +687,7 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects { // traverse children: append them to the end of the work list let children = node->GetChildren(); - for (auto child : children) + for (auto & child : children) workList.push_back(child); // (we could check whether c is in 'nodes' already here to optimize, but this way it is cleaner) } From 4a07903dd1b98b1939657e5fb1e5f80373fca7ac Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 10:38:55 -0700 Subject: [PATCH 20/44] first use of DataSlice() --- .../CNTKComputationNetworkLib/ComputationNode.h | 15 +++++++++------ .../CNTKComputationNetworkLib/RecurrentNodes.h | 8 ++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 5abaae7026bb..c2cf5e17552c 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -997,15 +997,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Note: This returns an object, not a reference. That object is a column slice, i.e. a small object that just points into another object. // TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together. // Note: This is not used anywhere yet, only a sketch how we may further abstract timing. -#define INDEX_OUT SIZE_MAX + // TODO: move sequence into FrameRange object + enum Index : size_t { OUTPUT = SIZE_MAX }; #define SEQUENCE_ALL SIZE_MAX - enum ValueOrGradient { VAL, GRAD }; + enum ValueOrGradient { VALUE, GRADIENT }; Matrix DataSlice(size_t index/*input index or OUT*/, ValueOrGradient valueOrGradient/*as it says*/, - FrameRange frameRange/*select frame or entire batch*/, size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) + const FrameRange & frameRange/*select frame or entire batch*/, size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) { - ComputationNode * node = (index == INDEX_OUT) ? this : Inputs(index).get(); - Matrix & data = (valueOrGradient == VAL) ? node->FunctionValues() : node->GradientValues(); + ComputationNode * node = (index == OUTPUT) ? this : Inputs(index).get(); + Matrix & data = (valueOrGradient == VALUE) ? node->FunctionValues() : node->GradientValues(); if (frameRange.IsAllFrames()) { if (sequence == SEQUENCE_ALL) @@ -1015,7 +1016,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else { - size_t numParallelSequences = pMBLayout->GetNumParallelSequences(); + size_t numParallelSequences = m_pMBLayout->GetNumParallelSequences(); + if (numParallelSequences != frameRange.samplesInRecurrentStep) + LogicError("DataSlice: inconsistent samplesInRecurrentStep"); // TODO: this will go away when we remove this memebr from FrameRange size_t startColumn = frameRange.t() * numParallelSequences; if (sequence == SEQUENCE_ALL) return data.ColumnSlice(startColumn, numParallelSequences); diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index aaba256771d2..d0a73d2bac4e 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -206,9 +206,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { // this one differs in the starting condition virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) = 0; - static void WINAPI EvaluateThisNodeSRP(const FrameRange & frameRange, const int timeStep, - Matrix& functionValues, const Matrix& delayedActivation, const Matrix& inputFunctionValues, - const ElemType & initStateValue, const Matrix & colBoundaryFlags, const MinibatchPackingFlags minibatchPackingFlags) + void EvaluateThisNodeSRP(const FrameRange & frameRange, const int timeStep, + Matrix& functionValues, const Matrix& delayedActivation, const Matrix& inputFunctionValues, + const ElemType & initStateValue, const Matrix & colBoundaryFlags, const MinibatchPackingFlags minibatchPackingFlags) { size_t timeIdxInSeq = frameRange.t(); size_t mNbr = frameRange.NumCols(); @@ -225,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { d = (int)functionValues.Mod((float)delayedIndex, (float)delayedActivation.GetNumCols()); // this can point to the past activity of the previous minibatch - Matrix out = functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq * mNbr, mNbr); + Matrix out = DataSlice(OUTPUT, VALUE, frameRange); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); if (minibatchPackingFlags & SequenceStart_or_End) From 8307e74824e0ecf1c268bc8393338dca627cf19a Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 10:58:44 -0700 Subject: [PATCH 21/44] new method FrameSlice() that takes a pMBLayout, in prep of getting samplesInRecurrentStep out from FrameRange, in prep of allowing inconsistent layouts across the graph; moved FrameSlice() from .h to .cpp --- Math/Math/Matrix.cpp | 23 ++++++++++ Math/Math/Matrix.h | 101 +++++++++++++++++++++---------------------- 2 files changed, 73 insertions(+), 51 deletions(-) diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index e122905be16f..e65ed384a7a0 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -772,6 +772,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { return slice; } + // special convenience function to apply ColumnSlice() to getting a frame range + // It assumes that columns are frames, and returns a sub-range. + // TODO: decide whether this belongs here or elsewhere + // TODO: remove this one, as it does not take #slices explicitly, which will be needed in the future + template + Matrix Matrix::FrameSlice(const FrameRange & frameRange + // TODO: temporary only until this has been tested to work: + , size_t expectedStartColumn, size_t expectedNumCols + ) const + { + if (frameRange.IsAllFrames()) return ColumnSlice(0, GetNumCols()); // TODO: can we just return a reference to ourselves? --ownership problem + // TODO: temporary only until this has been tested to work: + if (expectedStartColumn != frameRange.StartColumn() || expectedNumCols != frameRange.NumCols()) + LogicError("FrameSlice: FrameRange object gives different range than original explicit code. Logic is borked."); + return ColumnSlice(frameRange.StartColumn(), frameRange.NumCols()); + } + template + Matrix Matrix::FrameSlice(const FrameRange & frameRange, const shared_ptr & pMBLayout) const + { + if (frameRange.IsAllFrames()) return ColumnSlice(0, GetNumCols()); // TODO: can we just return a reference to ourselves? --ownership problem + return ColumnSlice(frameRange.StartColumn(pMBLayout), frameRange.NumCols(pMBLayout)); + } + template Matrix& Matrix::AssignColumnSlice(const Matrix& fromMatrix, size_t startColumn, size_t numCols) { diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 34eac851b7f8..db99e0875956 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -25,44 +25,6 @@ // This class is exported from the Math.dll namespace Microsoft { namespace MSR { namespace CNTK { - // there is a version down there of ColumnSlice() that abstracts the number of streams - // TODO: This may not belong here, but having it in ComputeNode would require syntax changes, while having it as a member here only requires a local find-replace. Let's make it work first, then decide how to refactor. - // the looping versions of EvaluateThisNode() and ComputeInputPartial() take a frame range, through this structure - // It can cast from a size_t, i.e. those functions can be called passing a size_t in place of the FrameRange. - // TODO: GetNumParallelSequences() should be subsumed here & removed from nodes - // TODO: Where this design currently breaks: - // - BatchModeNodes must access GetNumParallelSequences(), yet operate on the whole sequence - // - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator - // - RecurrentNodes access frames with a time shift, where out-of-bounds ones access a different matrix' values - // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? - // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) - // TODO: Turns out, a FrameRange is either a whole batch or a single frame. - struct FrameRange - { - const size_t timeIdxInSeq; // start frame - const size_t samplesInRecurrentStep; // number of samples in this step --BUGBUG: this should be part of MBLayout, not FrameRange - // can construct from a single size_t -> a single-frame range - //FrameRange(size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(0)/*FIX THIS*/{} - FrameRange(size_t timeIdxInSeq, size_t samplesInRecurrentStep) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(samplesInRecurrentStep){} - // or without arguments -> entire minibatch / no frame-range - FrameRange() : timeIdxInSeq(0), samplesInRecurrentStep(SIZE_MAX) {} - // code that can only handle single-frame ranges will call t() to get the time index, which will throw if numFrames != 1 - // Some functions need just the time index, e.g. for looking up stuff in m_boundaryInfo. That's where an unscaled index is needed (as opposed to startColumn()). - size_t t() const { EnsureNotAllFrames(); return timeIdxInSeq; } - // multi-frame slice case: these two get startFrame and numFrames - size_t StartColumn() const { EnsureNotAllFrames(); return timeIdxInSeq * samplesInRecurrentStep; } - size_t NumCols() const { EnsureNotAllFrames(); return samplesInRecurrentStep; } - bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead - private: - FrameRange(const FrameRange & other);// : timeIdxInSeq(other.timeIdxInSeq), numFrames(other.numFrames) { } - void operator=(const FrameRange &); - void EnsureNotAllFrames() const - { - if (IsAllFrames()) - LogicError("FrameRange::t() called when frame range refers to whole minibatch"); - } - }; - enum CurrentDataLocation { NONE, CPU, GPU, BOTH @@ -184,19 +146,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix ColumnSlice(size_t startColumn, size_t numCols) const; // special convenience function to apply ColumnSlice() to getting a frame range - // It assumes that columns are frames, and returns a sub-range. - // TODO: decide whether this belongs here or elsewhere - Matrix FrameSlice(const FrameRange & frameRange - // TODO: temporary only until this has been tested to work: - , size_t expectedStartColumn, size_t expectedNumCols - ) const - { - if (frameRange.IsAllFrames()) return ColumnSlice(0, GetNumCols()); // TODO: can we just return a reference to ourselves? --ownership problem - // TODO: temporary only until this has been tested to work: - if (expectedStartColumn != frameRange.StartColumn() || expectedNumCols != frameRange.NumCols()) - LogicError("FrameSlice: FrameRange object gives different range than original explicit code. Logic is borked."); - return ColumnSlice(frameRange.StartColumn(), frameRange.NumCols()); - } + Matrix FrameSlice(const struct FrameRange & frameRange, size_t expectedStartColumn, size_t expectedNumCols) const; + Matrix FrameSlice(const struct FrameRange & frameRange, const shared_ptr & pMBLayout) const; // difference between AssignColumnSlice and SetColumnSlice // AssignColumnSlice : this(:, startColumn:startColumn+numCols-1) = fromMatrix(:, startColumn: startColumn+numCols-1) @@ -632,4 +583,52 @@ namespace Microsoft { namespace MSR { namespace CNTK { }; typedef std::shared_ptr MBLayoutPtr; + // there is a version down there of ColumnSlice() that abstracts the number of streams + // TODO: This may not belong here, but having it in ComputeNode would require syntax changes, while having it as a member here only requires a local find-replace. Let's make it work first, then decide how to refactor. + // the looping versions of EvaluateThisNode() and ComputeInputPartial() take a frame range, through this structure + // It can cast from a size_t, i.e. those functions can be called passing a size_t in place of the FrameRange. + // TODO: GetNumParallelSequences() should be subsumed here & removed from nodes + // TODO: We should also have a FrameRange that selects a single sequence instead of all. + // TODO: Where this design currently breaks: + // - BatchModeNodes must access GetNumParallelSequences(), yet operate on the whole sequence + // - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator + // - RecurrentNodes access frames with a time shift, where out-of-bounds ones access a different matrix' values + // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? + // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) + // TODO: This will in the future be able to hold sub-ranges for nested loops as well. + struct FrameRange + { + const size_t timeIdxInSeq; // start frame + const size_t samplesInRecurrentStep; // number of samples in this step --BUGBUG: this should be part of MBLayout, not FrameRange + // can construct from a single size_t -> a single-frame range + //FrameRange(size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(0)/*FIX THIS*/{} + FrameRange(size_t timeIdxInSeq, size_t samplesInRecurrentStep) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(samplesInRecurrentStep){} + // or without arguments -> entire minibatch / no frame-range + FrameRange() : timeIdxInSeq(0), samplesInRecurrentStep(SIZE_MAX/*all frames (map)*/) {} + // code that can only handle single-frame ranges will call t() to get the time index, which will throw if numFrames != 1 + // Some functions need just the time index, e.g. for looking up stuff in m_boundaryInfo. That's where an unscaled index is needed (as opposed to startColumn()). + size_t t() const { EnsureNotAllFrames(); return timeIdxInSeq; } + // multi-frame slice case: these two get startFrame and numFrames + size_t StartColumn() const { EnsureNotAllFrames(); return timeIdxInSeq * samplesInRecurrentStep; } + size_t NumCols() const { EnsureNotAllFrames(); return samplesInRecurrentStep; } + // TODO: remove these ^^ two in favor of these vv + size_t StartColumn(const shared_ptr & pMBLayout) const { EnsureNotAllFrames(); VerifyMBLayout(pMBLayout); return timeIdxInSeq * pMBLayout->GetNumParallelSequences(); } + size_t NumCols(const shared_ptr & pMBLayout) const { EnsureNotAllFrames(); VerifyMBLayout(pMBLayout); return pMBLayout->GetNumParallelSequences(); } + bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead + private: + FrameRange(const FrameRange & other);// : timeIdxInSeq(other.timeIdxInSeq), numFrames(other.numFrames) { } + void operator=(const FrameRange &); + void EnsureNotAllFrames() const + { + if (IsAllFrames()) + LogicError("FrameRange::t() called when frame range refers to whole minibatch"); + } + // TODO: this will go away once we remove samplesInRecurrentStep from this class + void VerifyMBLayout(const shared_ptr & pMBLayout) const + { + if (pMBLayout->GetNumParallelSequences() != samplesInRecurrentStep) + LogicError("VerifyMBLayout: MBLayout inconsistent with local copy of samplesInRecurrentStep"); + } + }; + }}} From 218ab2ac3bc7d246a07670a7354fe04740370e81 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 11:42:02 -0700 Subject: [PATCH 22/44] changed all FrameSlice() calls to pass pMBLayout, in prep of changing all of these to DataSlice(); removed the original version of FrameSlice() --- .../CompositeComputationNodes.h | 14 +- .../ComputationNode.h | 2 +- .../ConvolutionalNodes.h | 22 +- .../InputAndParamNodes.h | 20 +- .../LinearAlgebraNodes.h | 216 +++++++++--------- .../NonlinearityNodes.h | 122 +++++----- .../RecurrentNodes.h | 36 +-- .../TrainingCriterionNodes.h | 16 +- Math/Math/Matrix.cpp | 2 + Math/Math/Matrix.h | 16 +- 10 files changed, 242 insertions(+), 224 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 251e24a388ca..0ef020705100 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,8 +539,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -690,8 +690,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -840,9 +840,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { //FunctionValues().Resize(m_memory.GetNumRows(), GetNumParallelSequences()); FunctionValues().Resize(m_memory.GetNumRows(), frameRange.NumCols()); // extra space for one time step if (frameRange.t() == 0) // for first frame, check that we got all in memory --TODO: is this comment correct? How about going backwards? - assert(FunctionValues().FrameSlice(FrameRange(0, GetNumParallelSequences())/*TODO: delete the next two parameters*/, 0, GetNumParallelSequences()).FrobeniusNorm() == m_memory.FrameSlice(FrameRange(0, GetNumParallelSequences())/*TODO: delete the next two parameters*/, 0, GetNumParallelSequences()).FrobeniusNorm()); - //assert(FunctionValues().ColumnSlice(0, GetNumParallelSequences()).FrobeniusNorm() == m_memory.ColumnSlice(0, GetNumParallelSequences()).FrobeniusNorm()); - FunctionValues().SetValue(m_memory.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences())); + assert(FunctionValues().FrameSlice(FrameRange(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm() == m_memory.FrameSlice(FrameRange(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm()); + //assert(FunctionValues().ColumnSlice(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm() == m_memory.ColumnSlice(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm()); + FunctionValues().SetValue(m_memory.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); assert(FunctionValues().GetNumCols() == GetNumParallelSequences()); } diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index c2cf5e17552c..35427db341a4 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -215,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } // This is used at 284 places inside nodes, most of the time as - // FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()) + // FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout) size_t GetNumParallelSequences() const { //return m_samplesInRecurrentStep; diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index 8d591bfe9d94..a497972dbc28 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -111,14 +111,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("Convolution operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,8 +215,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,11 +433,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); } @@ -447,8 +447,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index cd9b6dec72a7..02c61c76d794 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -348,15 +348,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -402,8 +402,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -573,8 +573,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(m_functionValues.GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues.GetNumRows() for loop dimension assert(m_pMBLayout); - Matrix mTmp = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix::ScaleAndAdd(1.0, GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), mTmp); + Matrix mTmp = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix::ScaleAndAdd(1.0, GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout), mTmp); } virtual void EvaluateThisNode() @@ -584,8 +584,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - mTmp.SetValue(Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences())); + Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + mTmp.SetValue(Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index 69810bfd5674..4ea368de7937 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,8 +53,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -71,8 +71,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,8 +138,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -156,8 +156,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -233,8 +233,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -251,8 +251,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -370,8 +370,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); } @@ -388,8 +388,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); } @@ -486,8 +486,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex >= ChildrenSize()) InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range."); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); } @@ -504,7 +504,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceFunctionValues = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceFunctionValues = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } @@ -623,15 +623,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { //left Node must be a scalar if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -654,8 +654,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -742,15 +742,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -801,8 +801,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -915,15 +915,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -969,8 +969,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1073,10 +1073,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1-inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1-inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } @@ -1100,9 +1100,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1202,10 +1202,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("RowElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) { @@ -1252,9 +1252,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1353,17 +1353,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ColumnElementTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,8 +1403,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } @@ -1509,13 +1509,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (cols0 >= cols1) { - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad); } @@ -1584,25 +1584,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1780,11 +1780,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix ones = Matrix(); @@ -1890,25 +1890,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2048,16 +2048,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("DiagTimes operation only takes two inputs."); //left parameter (diag matix cannot be sliced) - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,8 +2083,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2205,11 +2205,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { @@ -2280,9 +2280,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2426,19 +2426,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("KhatriRaoProduct operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,9 +2461,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2564,11 +2564,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(inputIndex, m_invNorm0, m_invNorm1, sliceOutputValue, m_temp, m_rightTerm, m_leftTerm, m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad); } @@ -2681,9 +2681,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); } @@ -2961,13 +2961,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("StrideTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (m_StrideDim == 1) /// column stride { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); @@ -2995,7 +2995,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -3022,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3047,7 +3047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3127,13 +3127,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); if (m_StrideDim == 1) FunctionValues().Resize(rows0, cols1); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_Stride, m_StrideDim); } diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index eca814210627..07182f8f093a 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,8 +81,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); } @@ -206,10 +206,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -265,10 +265,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,10 +508,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -616,10 +616,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -727,8 +727,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice const size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - Matrix sliceGradientValue = m_gradientValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceGradientValue = m_gradientValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); switch (inputIndex) { @@ -738,40 +738,40 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } } break; case 1: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (colsPrior == 1) ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceMeanGradient = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } break; case 2: { - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (colsPrior == 1) ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } break; case 3: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceFeatureGradient = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeatureGradient = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -888,11 +888,11 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); //get the right slice - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceFeature = Inputs(3)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeature = Inputs(3)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (colsPrior == 1) { @@ -901,12 +901,12 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceMean = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceLogstddev = Inputs(2)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMean = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLogstddev = Inputs(2)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature, slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); @@ -1113,13 +1113,13 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceMask = Matrix(); if (m_dropoutRate > 0) { - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); } ComputeInputPartialS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad); @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1151,10 +1151,10 @@ virtual const std::wstring OperationName() const { return TypeName(); } { FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); m_maskOfDropout.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); } - sliceOutputValue = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + sliceOutputValue = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value); } @@ -1405,8 +1405,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRows); } @@ -1449,8 +1450,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRows); } @@ -1646,8 +1648,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); } @@ -1673,8 +1675,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); + Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); } diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index d0a73d2bac4e..2936774923d6 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -606,18 +606,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix error = GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()); + Matrix error = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix grdToObsSlice(this->m_deviceId); @@ -666,7 +666,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { grdToPrevState, m_tempMatrix ); - grdToObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, timeIdxInSeq, GetNumParallelSequences()).SetValue(grdToObsSlice); + grdToObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout).SetValue(grdToObsSlice); PrepareErrors(timeIdxInSeq, grdToPrevOutput, grdToPrevState, GetNumParallelSequences(), &m_pMBLayout->GetM()); } @@ -997,16 +997,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); - Matrix sliceTanhInput = tanhObs.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t(), GetNumParallelSequences()); + Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceTanhInput = tanhObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM()); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 6f608b529d88..c16fc6af5ec6 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -879,9 +879,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t sz = 0; for (size_t t = 0; t < nT; t++) { - FrameRange frameRange(t, 1); + FrameRange frameRange(t, 1); // TODO: change to frameRange over a whole MB with a sequence index. BUGBUG: below code will break until this is fixed /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); + Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t c_t = (size_t)lbl_t(1, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); @@ -890,14 +890,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { continue; Matrix input_weight_t = Inputs(2)->FunctionValues().ColumnSlice(lft_bnd, nbr_wrd); - Matrix obs = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); + Matrix obs = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); Matrix grd_to_soft_max_input = m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd); - Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); + Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); switch (inputIndex){ case 1: /// gradient to input - grd_t = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); + grd_t = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); ComputeInputPartialRight(input_weight_t, grd_t, grd_to_soft_max_input); break; case 2: @@ -906,8 +906,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputeInputPartialLeft(obs, grd_to_wgt_t, grd_to_soft_max_input); break; case 3: - grd_t = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); - grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1)); + grd_t = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout)); ComputeCEPartialToSoftmaxInputs(grd_t, GradientValues(), c_t); break; default: @@ -947,7 +947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete the next two parameters*/, t, 1); + Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t y_t = (size_t)lbl_t(0, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index e65ed384a7a0..892f6b784383 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -773,6 +773,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } // special convenience function to apply ColumnSlice() to getting a frame range +#if 0 // It assumes that columns are frames, and returns a sub-range. // TODO: decide whether this belongs here or elsewhere // TODO: remove this one, as it does not take #slices explicitly, which will be needed in the future @@ -788,6 +789,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { LogicError("FrameSlice: FrameRange object gives different range than original explicit code. Logic is borked."); return ColumnSlice(frameRange.StartColumn(), frameRange.NumCols()); } +#endif template Matrix Matrix::FrameSlice(const FrameRange & frameRange, const shared_ptr & pMBLayout) const { diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index db99e0875956..89548709374a 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -146,7 +146,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix ColumnSlice(size_t startColumn, size_t numCols) const; // special convenience function to apply ColumnSlice() to getting a frame range - Matrix FrameSlice(const struct FrameRange & frameRange, size_t expectedStartColumn, size_t expectedNumCols) const; + //Matrix FrameSlice(const struct FrameRange & frameRange, size_t expectedStartColumn, size_t expectedNumCols) const; Matrix FrameSlice(const struct FrameRange & frameRange, const shared_ptr & pMBLayout) const; // difference between AssignColumnSlice and SetColumnSlice @@ -596,6 +596,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) // TODO: This will in the future be able to hold sub-ranges for nested loops as well. + // BUGBUG: These are currently broken and will need to be fixed: + // - ClassBasedCrossEntropyWithSoftmaxNode: + // FrameRange frameRange(t, 1); + // using a different #sequences. Solve by treating all frames as one sequence (in FrameRange) + // - ReshapeNode: + // Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + // using a differeren #sequences. Find out what this really means. struct FrameRange { const size_t timeIdxInSeq; // start frame @@ -615,6 +622,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t StartColumn(const shared_ptr & pMBLayout) const { EnsureNotAllFrames(); VerifyMBLayout(pMBLayout); return timeIdxInSeq * pMBLayout->GetNumParallelSequences(); } size_t NumCols(const shared_ptr & pMBLayout) const { EnsureNotAllFrames(); VerifyMBLayout(pMBLayout); return pMBLayout->GetNumParallelSequences(); } bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead + + const FrameRange & Check(size_t expectedStartColumn, size_t expectedNumCols) const + { + if (!IsAllFrames() && expectedStartColumn != StartColumn() || expectedNumCols != NumCols()) + LogicError("FrameSlice: FrameRange object gives different range than original explicit code. Logic is borked."); + return *this; + } private: FrameRange(const FrameRange & other);// : timeIdxInSeq(other.timeIdxInSeq), numFrames(other.numFrames) { } void operator=(const FrameRange &); From 94e354e08dd8ce114e0d122d05423370b780914e Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 11:46:00 -0700 Subject: [PATCH 23/44] (made gcc happy) --- MachineLearning/CNTKComputationNetworkLib/ComputationNode.h | 1 + 1 file changed, 1 insertion(+) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 35427db341a4..390cce90db54 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -1278,6 +1278,7 @@ protected: \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ + using Base::DataSlice; using Base::OUTPUT; using Base::VALUE; using Base::GRADIENT; \ using Base::m_children; using Base::m_deviceId; using Base::m_evalTimeStamp; using Base::m_functionValues; using Base::m_gradientValues; \ using Base::m_inputChannels; using Base::m_inputHeight; using Base::m_inputWidth; using Base::m_needGradient; using Base::m_nodeName; \ using Base::m_outputChannels; using Base::m_outputHeight; using Base::m_outputWidth; using Base::s_constOnes; using Base::s_timeStampCounter; \ From 23e6f5833fdbbee97dc516a78a9880eb23451955 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 11:58:35 -0700 Subject: [PATCH 24/44] changed all Inputs(n)...FrameSlice to DataSlice(n, ...) --- .../CompositeComputationNodes.h | 4 +- .../ComputationNode.h | 4 +- .../ConvolutionalNodes.h | 12 +- .../InputAndParamNodes.h | 8 +- .../LinearAlgebraNodes.h | 108 +++++++++--------- .../NonlinearityNodes.h | 54 ++++----- .../RecurrentNodes.h | 6 +- .../TrainingCriterionNodes.h | 10 +- 8 files changed, 104 insertions(+), 102 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 0ef020705100..6e0c2e07c1da 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,7 +539,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); @@ -690,7 +690,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 390cce90db54..3b9a4145608d 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -1003,7 +1003,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { enum ValueOrGradient { VALUE, GRADIENT }; Matrix DataSlice(size_t index/*input index or OUT*/, ValueOrGradient valueOrGradient/*as it says*/, - const FrameRange & frameRange/*select frame or entire batch*/, size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) + const FrameRange & frameRange/*select frame or entire batch*/, + const MBLayoutPtr &, // DELETE THIS after refactoring; it's a dummy left-over + size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) { ComputationNode * node = (index == OUTPUT) ? this : Inputs(index).get(); Matrix & data = (valueOrGradient == VALUE) ? node->FunctionValues() : node->GradientValues(); diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index a497972dbc28..c3d7dbc31655 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -112,13 +112,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("Convolution operation only takes two inputs."); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,10 +433,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); @@ -447,7 +447,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 02c61c76d794..06e9331e5079 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -349,13 +349,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -402,7 +402,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -585,7 +585,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - mTmp.SetValue(Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); + mTmp.SetValue(DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index 4ea368de7937..aa5c8529e6e4 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -71,7 +71,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -156,7 +156,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -233,7 +233,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -370,7 +370,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); @@ -388,7 +388,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); @@ -624,13 +624,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -654,7 +654,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -743,13 +743,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -801,7 +801,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -916,13 +916,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -969,7 +969,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -1100,8 +1100,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1252,8 +1252,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1357,13 +1357,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,7 +1403,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); @@ -1589,20 +1589,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1895,20 +1895,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2052,12 +2052,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,7 +2083,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -2205,8 +2205,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -2280,8 +2280,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2430,15 +2430,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,8 +2461,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2564,8 +2564,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -2681,8 +2681,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); @@ -2967,7 +2967,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); @@ -2995,7 +2995,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -3022,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3047,7 +3047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3127,7 +3127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index 07182f8f093a..1a2566ffadac 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,7 +81,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); @@ -206,7 +206,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -265,7 +265,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,7 +508,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -616,7 +616,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -738,7 +738,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPriorGradient = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } @@ -751,7 +751,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMeanGradient = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } @@ -763,7 +763,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = Inputs(2)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLotStddevGradient = DataSlice(2, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } @@ -771,7 +771,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } case 3: { Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeatureGradient = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeatureGradient = DataSlice(3, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -889,7 +889,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeature = Inputs(3)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeature = DataSlice(3, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -901,9 +901,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceMean = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceLogstddev = Inputs(2)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPrior = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMean = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLogstddev = DataSlice(2, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -1113,7 +1113,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceMask = Matrix(); @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1405,7 +1405,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); @@ -1450,7 +1450,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); @@ -1648,7 +1648,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); @@ -1675,7 +1675,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 2936774923d6..02afa697c3d9 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -225,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { d = (int)functionValues.Mod((float)delayedIndex, (float)delayedActivation.GetNumCols()); // this can point to the past activity of the previous minibatch - Matrix out = DataSlice(OUTPUT, VALUE, frameRange); + Matrix out = DataSlice(OUTPUT, VALUE, frameRange, m_pMBLayout); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); if (minibatchPackingFlags & SequenceStart_or_End) @@ -606,7 +606,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); @@ -997,7 +997,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index c16fc6af5ec6..bcdbdf0a9135 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -881,7 +881,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); // TODO: change to frameRange over a whole MB with a sequence index. BUGBUG: below code will break until this is fixed /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t c_t = (size_t)lbl_t(1, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); @@ -890,14 +890,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { continue; Matrix input_weight_t = Inputs(2)->FunctionValues().ColumnSlice(lft_bnd, nbr_wrd); - Matrix obs = Inputs(1)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix obs = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); Matrix grd_to_soft_max_input = m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd); Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); switch (inputIndex){ case 1: /// gradient to input - grd_t = Inputs(1)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); ComputeInputPartialRight(input_weight_t, grd_t, grd_to_soft_max_input); break; case 2: @@ -906,7 +906,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputeInputPartialLeft(obs, grd_to_wgt_t, grd_to_soft_max_input); break; case 3: - grd_t = Inputs(3)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = DataSlice(3, GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout)); ComputeCEPartialToSoftmaxInputs(grd_t, GradientValues(), c_t); break; @@ -947,7 +947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t y_t = (size_t)lbl_t(0, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); From 3967ba9e8b2afa20b81e1022f82211418061e957 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 12:07:34 -0700 Subject: [PATCH 25/44] removed the input index from DataSlice(), seems more clear to say Inputs(n)->DataSlice(...) --- .../CompositeComputationNodes.h | 4 +- .../ComputationNode.h | 9 +- .../ConvolutionalNodes.h | 12 +- .../InputAndParamNodes.h | 8 +- .../LinearAlgebraNodes.h | 108 +++++++++--------- .../NonlinearityNodes.h | 54 ++++----- .../RecurrentNodes.h | 6 +- .../TrainingCriterionNodes.h | 10 +- 8 files changed, 104 insertions(+), 107 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 6e0c2e07c1da..13d9576279cd 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,7 +539,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); @@ -690,7 +690,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 3b9a4145608d..895294514940 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -998,17 +998,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together. // Note: This is not used anywhere yet, only a sketch how we may further abstract timing. // TODO: move sequence into FrameRange object - enum Index : size_t { OUTPUT = SIZE_MAX }; #define SEQUENCE_ALL SIZE_MAX enum ValueOrGradient { VALUE, GRADIENT }; - Matrix DataSlice(size_t index/*input index or OUT*/, - ValueOrGradient valueOrGradient/*as it says*/, + Matrix DataSlice(ValueOrGradient valueOrGradient/*as it says*/, const FrameRange & frameRange/*select frame or entire batch*/, const MBLayoutPtr &, // DELETE THIS after refactoring; it's a dummy left-over size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) { - ComputationNode * node = (index == OUTPUT) ? this : Inputs(index).get(); - Matrix & data = (valueOrGradient == VALUE) ? node->FunctionValues() : node->GradientValues(); + Matrix & data = (valueOrGradient == VALUE) ? FunctionValues() : GradientValues(); if (frameRange.IsAllFrames()) { if (sequence == SEQUENCE_ALL) @@ -1280,7 +1277,7 @@ protected: \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ - using Base::DataSlice; using Base::OUTPUT; using Base::VALUE; using Base::GRADIENT; \ + using Base::DataSlice; using Base::VALUE; using Base::GRADIENT; \ using Base::m_children; using Base::m_deviceId; using Base::m_evalTimeStamp; using Base::m_functionValues; using Base::m_gradientValues; \ using Base::m_inputChannels; using Base::m_inputHeight; using Base::m_inputWidth; using Base::m_needGradient; using Base::m_nodeName; \ using Base::m_outputChannels; using Base::m_outputHeight; using Base::m_outputWidth; using Base::s_constOnes; using Base::s_timeStampCounter; \ diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index c3d7dbc31655..3541a258adf7 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -112,13 +112,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("Convolution operation only takes two inputs."); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,10 +433,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); @@ -447,7 +447,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 06e9331e5079..60e1bf0c1a5a 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -349,13 +349,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -402,7 +402,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -585,7 +585,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - mTmp.SetValue(DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); + mTmp.SetValue(Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index aa5c8529e6e4..6af7eab08a5d 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -71,7 +71,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -156,7 +156,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -233,7 +233,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); @@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -370,7 +370,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); @@ -388,7 +388,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); @@ -624,13 +624,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -654,7 +654,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -743,13 +743,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -801,7 +801,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -916,13 +916,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -969,7 +969,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -1100,8 +1100,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1252,8 +1252,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1357,13 +1357,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) { - Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,7 +1403,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); @@ -1589,20 +1589,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1895,20 +1895,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2052,12 +2052,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,7 +2083,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -2205,8 +2205,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -2280,8 +2280,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2430,15 +2430,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,8 +2461,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2564,8 +2564,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -2681,8 +2681,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); @@ -2967,7 +2967,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); @@ -2995,7 +2995,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -3022,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3047,7 +3047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3127,7 +3127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index 1a2566ffadac..e363473a2155 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,7 +81,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); @@ -206,7 +206,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -265,7 +265,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,7 +508,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -616,7 +616,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -738,7 +738,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPriorGradient = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } @@ -751,7 +751,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMeanGradient = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } @@ -763,7 +763,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = DataSlice(2, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLotStddevGradient = Inputs(2)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } @@ -771,7 +771,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } case 3: { Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeatureGradient = DataSlice(3, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeatureGradient = Inputs(3)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -889,7 +889,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeature = DataSlice(3, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeature = Inputs(3)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -901,9 +901,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceMean = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceLogstddev = DataSlice(2, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPrior = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMean = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLogstddev = Inputs(2)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -1113,7 +1113,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceMask = Matrix(); @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1405,7 +1405,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); @@ -1450,7 +1450,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); @@ -1648,7 +1648,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); @@ -1675,7 +1675,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = DataSlice(0, GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 02afa697c3d9..132296b4daa3 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -225,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { d = (int)functionValues.Mod((float)delayedIndex, (float)delayedActivation.GetNumCols()); // this can point to the past activity of the previous minibatch - Matrix out = DataSlice(OUTPUT, VALUE, frameRange, m_pMBLayout); + Matrix out = DataSlice(VALUE, frameRange, m_pMBLayout); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); if (minibatchPackingFlags & SequenceStart_or_End) @@ -606,7 +606,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); @@ -997,7 +997,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index bcdbdf0a9135..832419ab2882 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -881,7 +881,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); // TODO: change to frameRange over a whole MB with a sequence index. BUGBUG: below code will break until this is fixed /// compute prb - 1 and prb - Matrix lbl_t = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t c_t = (size_t)lbl_t(1, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); @@ -890,14 +890,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { continue; Matrix input_weight_t = Inputs(2)->FunctionValues().ColumnSlice(lft_bnd, nbr_wrd); - Matrix obs = DataSlice(1, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix obs = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); Matrix grd_to_soft_max_input = m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd); Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); switch (inputIndex){ case 1: /// gradient to input - grd_t = DataSlice(1, GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); ComputeInputPartialRight(input_weight_t, grd_t, grd_to_soft_max_input); break; case 2: @@ -906,7 +906,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputeInputPartialLeft(obs, grd_to_wgt_t, grd_to_soft_max_input); break; case 3: - grd_t = DataSlice(3, GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = Inputs(3)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout)); ComputeCEPartialToSoftmaxInputs(grd_t, GradientValues(), c_t); break; @@ -947,7 +947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); /// compute prb - 1 and prb - Matrix lbl_t = DataSlice(0, VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t y_t = (size_t)lbl_t(0, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); From acfd0108c38bc7c352dde195e2c5db80bb4dcbc7 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 13:32:07 -0700 Subject: [PATCH 26/44] switched DataSlice(VALUE/GRADIENT...) to Value/GradientSlice() --- .../CompositeComputationNodes.h | 4 +- .../ComputationNode.h | 33 +++- .../ConvolutionalNodes.h | 16 +- .../InputAndParamNodes.h | 18 +- .../LinearAlgebraNodes.h | 174 +++++++++--------- .../NonlinearityNodes.h | 78 ++++---- .../RecurrentNodes.h | 12 +- .../TrainingCriterionNodes.h | 10 +- Math/Math/Matrix.h | 2 +- 9 files changed, 180 insertions(+), 167 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 13d9576279cd..667808ed19aa 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,7 +539,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); @@ -690,7 +690,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 895294514940..83b5d75f9062 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -997,18 +997,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Note: This returns an object, not a reference. That object is a column slice, i.e. a small object that just points into another object. // TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together. // Note: This is not used anywhere yet, only a sketch how we may further abstract timing. - // TODO: move sequence into FrameRange object -#define SEQUENCE_ALL SIZE_MAX - enum ValueOrGradient { VALUE, GRADIENT }; - Matrix DataSlice(ValueOrGradient valueOrGradient/*as it says*/, + Matrix DataSlice(Matrix & data, const FrameRange & frameRange/*select frame or entire batch*/, - const MBLayoutPtr &, // DELETE THIS after refactoring; it's a dummy left-over - size_t sequence = SEQUENCE_ALL/*SEQUENCE_ALL is the normal case*/) + const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) { - Matrix & data = (valueOrGradient == VALUE) ? FunctionValues() : GradientValues(); + auto sequence = SIZE_MAX; if (frameRange.IsAllFrames()) { - if (sequence == SEQUENCE_ALL) + if (sequence == SIZE_MAX) return data.ColumnSlice(0, data.GetNumCols()); else LogicError("DataSlice: sequence index only supported when accessing individual frame"); // (not needed; doable but more involved, requiring a reshape) @@ -1019,12 +1015,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numParallelSequences != frameRange.samplesInRecurrentStep) LogicError("DataSlice: inconsistent samplesInRecurrentStep"); // TODO: this will go away when we remove this memebr from FrameRange size_t startColumn = frameRange.t() * numParallelSequences; - if (sequence == SEQUENCE_ALL) + if (sequence == SIZE_MAX) return data.ColumnSlice(startColumn, numParallelSequences); else return data.ColumnSlice(startColumn + sequence, 1); } - // TODO: + } + enum ValueOrGradient { VALUE, GRADIENT }; + Matrix DataSlice(ValueOrGradient valueOrGradient/*as it says*/, + const FrameRange & frameRange/*select frame or entire batch*/, + const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + { + Matrix & data = (valueOrGradient == VALUE) ? FunctionValues() : GradientValues(); + return DataSlice(data, frameRange, m_pMBLayout); + } + Matrix ValueSlice(const FrameRange & frameRange/*select frame or entire batch*/, + const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + { + return DataSlice(FunctionValues(), frameRange, m_pMBLayout); + } + Matrix GradientSlice(const FrameRange & frameRange/*select frame or entire batch*/, + const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + { + return DataSlice(GradientValues(), frameRange, m_pMBLayout); } // this is the entry point from Network; while it will call virtual ComputeInputPartial() into the actual node implementation diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index 3541a258adf7..c42109f7f062 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -111,14 +111,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("Convolution operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,10 +433,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); @@ -447,7 +447,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 60e1bf0c1a5a..6e54fdc75220 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -348,15 +348,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -402,7 +402,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -573,8 +573,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(m_functionValues.GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues.GetNumRows() for loop dimension assert(m_pMBLayout); - Matrix mTmp = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix::ScaleAndAdd(1.0, GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout), mTmp); + Matrix mTmp = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix::ScaleAndAdd(1.0, GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout), mTmp); } virtual void EvaluateThisNode() @@ -584,8 +584,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix mTmp = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - mTmp.SetValue(Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); + Matrix mTmp = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + mTmp.SetValue(Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index 6af7eab08a5d..322ecce8f238 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,8 +53,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -71,7 +71,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,8 +138,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -156,7 +156,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -233,8 +233,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); @@ -370,8 +370,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); } @@ -388,7 +388,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); @@ -486,8 +486,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex >= ChildrenSize()) InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range."); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); } @@ -504,7 +504,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceFunctionValues = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFunctionValues = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } @@ -623,15 +623,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { //left Node must be a scalar if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -654,7 +654,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -742,15 +742,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -801,7 +801,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -915,15 +915,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -969,7 +969,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -1073,10 +1073,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1-inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1-inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } @@ -1100,8 +1100,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1202,10 +1202,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("RowElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1 - inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1 - inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) { @@ -1252,8 +1252,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -1353,17 +1353,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ColumnElementTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) { - Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,7 +1403,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); @@ -1509,13 +1509,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (cols0 >= cols1) { - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad); } @@ -1589,20 +1589,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1780,11 +1780,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(inputIndex)->FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix ones = Matrix(); @@ -1895,20 +1895,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2048,16 +2048,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("DiagTimes operation only takes two inputs."); //left parameter (diag matix cannot be sliced) - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,7 +2083,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); @@ -2205,11 +2205,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = this->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = this->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { @@ -2280,8 +2280,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2426,19 +2426,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("KhatriRaoProduct operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,8 +2461,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); @@ -2564,11 +2564,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceThisGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceThisGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(inputIndex, m_invNorm0, m_invNorm1, sliceOutputValue, m_temp, m_rightTerm, m_leftTerm, m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad); } @@ -2681,8 +2681,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); @@ -2961,13 +2961,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("StrideTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); if (m_StrideDim == 1) /// column stride { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); @@ -2995,7 +2995,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -3022,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3047,7 +3047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3127,7 +3127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index e363473a2155..efff274b6ff5 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,7 +81,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); @@ -206,8 +206,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -265,8 +265,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,8 +508,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -616,8 +616,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -738,7 +738,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } @@ -751,7 +751,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMeanGradient = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } @@ -763,7 +763,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = Inputs(2)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLotStddevGradient = Inputs(2)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } @@ -771,7 +771,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } case 3: { Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeatureGradient = Inputs(3)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeatureGradient = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -889,7 +889,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeature = Inputs(3)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFeature = Inputs(3)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -901,9 +901,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceMean = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceLogstddev = Inputs(2)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPrior = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMean = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLogstddev = Inputs(2)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); @@ -1113,8 +1113,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceMask = Matrix(); if (m_dropoutRate > 0) @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1154,7 +1154,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); } - sliceOutputValue = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value); } @@ -1405,7 +1405,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); @@ -1450,9 +1450,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRows); } @@ -1648,7 +1648,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); @@ -1675,8 +1675,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = Inputs(0)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); } diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 132296b4daa3..e6f45eb57909 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -225,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { d = (int)functionValues.Mod((float)delayedIndex, (float)delayedActivation.GetNumCols()); // this can point to the past activity of the previous minibatch - Matrix out = DataSlice(VALUE, frameRange, m_pMBLayout); + Matrix out = ValueSlice(frameRange, m_pMBLayout); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); if (minibatchPackingFlags & SequenceStart_or_End) @@ -606,8 +606,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); @@ -617,7 +617,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix error = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix error = GradientSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); Matrix grdToObsSlice(this->m_deviceId); @@ -997,8 +997,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutput = FunctionValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 832419ab2882..e15d802c9250 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -881,7 +881,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); // TODO: change to frameRange over a whole MB with a sequence index. BUGBUG: below code will break until this is fixed /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t c_t = (size_t)lbl_t(1, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); @@ -890,14 +890,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { continue; Matrix input_weight_t = Inputs(2)->FunctionValues().ColumnSlice(lft_bnd, nbr_wrd); - Matrix obs = Inputs(1)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix obs = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); Matrix grd_to_soft_max_input = m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd); Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); switch (inputIndex){ case 1: /// gradient to input - grd_t = Inputs(1)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); ComputeInputPartialRight(input_weight_t, grd_t, grd_to_soft_max_input); break; case 2: @@ -906,7 +906,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputeInputPartialLeft(obs, grd_to_wgt_t, grd_to_soft_max_input); break; case 3: - grd_t = Inputs(3)->DataSlice(GRADIENT, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout)); ComputeCEPartialToSoftmaxInputs(grd_t, GradientValues(), c_t); break; @@ -947,7 +947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->DataSlice(VALUE, frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); size_t y_t = (size_t)lbl_t(0, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 89548709374a..e5a57c058552 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -601,7 +601,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // FrameRange frameRange(t, 1); // using a different #sequences. Solve by treating all frames as one sequence (in FrameRange) // - ReshapeNode: - // Matrix sliceOutputGrad = GradientValues().FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + // Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); // using a differeren #sequences. Find out what this really means. struct FrameRange { From 025174b9a132a852de51294395d077959251f1c4 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Mon, 21 Sep 2015 13:44:15 -0700 Subject: [PATCH 27/44] Fixed some linker errors in the CNTKMathTest unit test project --- Math/Math/GPUSparseMatrix.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Math/Math/GPUSparseMatrix.cu b/Math/Math/GPUSparseMatrix.cu index c53ade5dd445..5a2e5da61957 100644 --- a/Math/Math/GPUSparseMatrix.cu +++ b/Math/Math/GPUSparseMatrix.cu @@ -2401,8 +2401,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { #pragma endregion Helper Functions - template class GPUSparseMatrix; - template class GPUSparseMatrix; + template class MATH_API GPUSparseMatrix; + template class MATH_API GPUSparseMatrix; // We use Matrix as the backing store for QuantizedMatrix // Let's explciitly instantiate the methods we need for that purpose From dc92c384cd6e6e637f5a24d11b408e34dcf5cb10 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Mon, 21 Sep 2015 13:59:05 -0700 Subject: [PATCH 28/44] Include CNTKMathTest in the VS build to avoid build issues in the project from getting in undetected --- CNTK.sln | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/CNTK.sln b/CNTK.sln index b68c8721c7ff..956df91c529a 100644 --- a/CNTK.sln +++ b/CNTK.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 2013 -VisualStudioVersion = 12.0.21005.1 +VisualStudioVersion = 12.0.31101.0 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKMathDll", "Math\Math\Math.vcxproj", "{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}" ProjectSection(ProjectDependencies) = postProject @@ -351,7 +351,9 @@ Global {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.ActiveCfg = Release|x64 {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}.Release|x64.Build.0 = Release|x64 {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.ActiveCfg = Debug|x64 + {6CEE834A-8104-46A8-8902-64C81BD7928F}.Debug|x64.Build.0 = Debug|x64 {6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.ActiveCfg = Release|x64 + {6CEE834A-8104-46A8-8902-64C81BD7928F}.Release|x64.Build.0 = Release|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.ActiveCfg = Debug|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Debug|x64.Build.0 = Debug|x64 {33D2FD22-DEF2-4507-A58A-368F641AEBE5}.Release|x64.ActiveCfg = Release|x64 @@ -377,6 +379,7 @@ Global {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.ActiveCfg = Debug|x64 {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Debug|x64.Build.0 = Debug|x64 {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.ActiveCfg = Release|x64 + {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC}.Release|x64.Build.0 = Release|x64 {B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.ActiveCfg = Debug|x64 {B3DD765E-694E-4494-BAD7-37BBF2942517}.Debug|x64.Build.0 = Debug|x64 {B3DD765E-694E-4494-BAD7-37BBF2942517}.Release|x64.ActiveCfg = Release|x64 @@ -416,38 +419,38 @@ Global HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution - {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} - {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} {6CEE834A-8104-46A8-8902-64C81BD7928F} = {D45DF403-6781-444E-B654-A96868C5BE68} + {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94} {668BEED5-AC07-4F35-B3AE-EE65A7F9C976} = {D45DF403-6781-444E-B654-A96868C5BE68} - {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68} - {DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68} - {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68} - {7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68} - {5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68} {E6646FFE-3588-4276-8A15-8D65C22711C1} = {33EBFE78-A1A8-4961-8938-92A271941F94} {1D5787D4-52E4-45DB-951B-82F220EE0C6A} = {33EBFE78-A1A8-4961-8938-92A271941F94} {62836DC1-DF77-4B98-BF2D-45C943B7DDC6} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {0F30EBCF-09F3-4EED-BF54-4214BCE53FEC} = {D45DF403-6781-444E-B654-A96868C5BE68} + {B3DD765E-694E-4494-BAD7-37BBF2942517} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} {9A2F2441-5972-4EA8-9215-4119FCE0FB68} = {33EBFE78-A1A8-4961-8938-92A271941F94} {014DA766-B37B-4581-BC26-963EA5507931} = {33EBFE78-A1A8-4961-8938-92A271941F94} {D667AF32-028A-4A5D-BE19-F46776F0F6B2} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94} - {065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601} {3ED0465D-23E7-4855-9694-F788717B6533} = {39E42C4B-A078-4CA4-9D92-B883D8129601} + {065AF55D-AF02-448B-BFCD-52619FDA4BD0} = {39E42C4B-A078-4CA4-9D92-B883D8129601} {98D2C32B-0C1F-4E19-A626-65F7BA4600CF} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {EA67F51F-1FE8-462D-9F3E-01161685AD59} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {DE1A06BA-EC5C-4E0D-BCA8-3EA555310C58} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {63024704-A2D7-497E-AD4B-5C10C6AA1374} = {065AF55D-AF02-448B-BFCD-52619FDA4BD0} {F9BEB27E-8AF5-464E-8D45-0000D5AFA2D3} = {EA67F51F-1FE8-462D-9F3E-01161685AD59} {889C1CCF-92B3-450B-B00D-FC9A9D5BE464} = {EA67F51F-1FE8-462D-9F3E-01161685AD59} + {DBB3C106-B0B4-4059-8477-C89528CEC1B0} = {D45DF403-6781-444E-B654-A96868C5BE68} + {CE429AA2-3778-4619-8FD1-49BA3B81197B} = {33EBFE78-A1A8-4961-8938-92A271941F94} + {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} = {D45DF403-6781-444E-B654-A96868C5BE68} {4BBF2950-3DBD-469A-AD57-6CACBEBAF541} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} {5F733BBA-FE83-4668-8F83-8B0E78A36619} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} {19EE975B-232D-49F0-94C7-6F1C6424FB53} = {C47CDAA5-6D6C-429E-BC89-7CA0F868FDC8} + {7C4E77C9-6B17-4B02-82C1-DB62EEE2635B} = {D45DF403-6781-444E-B654-A96868C5BE68} + {928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DD043083-71A4-409A-AA91-F9C548DCF7EC} + {5E666C53-2D82-49C9-9127-3FDDC321C741} = {D45DF403-6781-444E-B654-A96868C5BE68} {6D1353D6-F196-466F-B886-F16D48759B20} = {5E666C53-2D82-49C9-9127-3FDDC321C741} {B6725C9F-A6D2-4269-9B74-7888A90F7884} = {5E666C53-2D82-49C9-9127-3FDDC321C741} {B27DD434-EECD-4EE0-A03B-1150EB87258E} = {B6725C9F-A6D2-4269-9B74-7888A90F7884} From 207bfec369da60e23a72466e070f55c5ad527948 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 14:05:02 -0700 Subject: [PATCH 29/44] changed X.FrameSlice(...) to DataSlice(X, ...); discovered that many LinearAlgebraNodes and NonlinearityNodes accessed m_functionValues instead of going through the virtual FunctionValues(), I hope that that was a bug, and that changing that (to DataSlice(), which calls FunctionValues()) did not break anythingl moved pMBLayout in DataSlice calls to be an argument to the FrameRange::Check() instead --- .../CompositeComputationNodes.h | 12 +- .../ComputationNode.h | 21 +- .../ConvolutionalNodes.h | 22 +- .../InputAndParamNodes.h | 19 +- .../LinearAlgebraNodes.h | 216 +++++++++--------- .../NonlinearityNodes.h | 120 +++++----- .../RecurrentNodes.h | 42 ++-- .../TrainingCriterionNodes.h | 14 +- Math/Math/Matrix.h | 6 +- 9 files changed, 234 insertions(+), 238 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 667808ed19aa..077daa841cbe 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -539,8 +539,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -690,8 +690,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { //only feature (input0) and output needs to be sliced - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues()); } @@ -840,9 +840,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { //FunctionValues().Resize(m_memory.GetNumRows(), GetNumParallelSequences()); FunctionValues().Resize(m_memory.GetNumRows(), frameRange.NumCols()); // extra space for one time step if (frameRange.t() == 0) // for first frame, check that we got all in memory --TODO: is this comment correct? How about going backwards? - assert(FunctionValues().FrameSlice(FrameRange(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm() == m_memory.FrameSlice(FrameRange(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm()); + assert(ValueSlice(FrameRange(0, GetNumParallelSequences())).FrobeniusNorm() == DataSlice(m_memory, FrameRange(0, GetNumParallelSequences())).FrobeniusNorm()); //assert(FunctionValues().ColumnSlice(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm() == m_memory.ColumnSlice(0, GetNumParallelSequences()), m_pMBLayout).FrobeniusNorm()); - FunctionValues().SetValue(m_memory.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); + FunctionValues().SetValue(DataSlice(m_memory, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout))); assert(FunctionValues().GetNumCols() == GetNumParallelSequences()); } diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 83b5d75f9062..71e06618c46e 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -986,7 +986,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_children[childIndex] = node; } - //making them virtual so that nodes that only copy values from it's children (e.g., dropout) can be efficient in evaluation + // these are overridden by DropoutNode, ReshapeNode, and RowRepeatNode to optimize for the trivial case that those don't do anything + // TODO: lots of nodes read out m_functionValues directly--was that a bug or intentional? They have now been changed to ValueSlice(), i.e. would pick it up virtual const Matrix& FunctionValues() const { return m_functionValues; } virtual Matrix& FunctionValues() { return m_functionValues; } @@ -998,8 +999,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: remove FrameRange::samplesInRecurrentStep from FrameRange, as it belongs into pMBLayout. Hence this function that binds both together. // Note: This is not used anywhere yet, only a sketch how we may further abstract timing. Matrix DataSlice(Matrix & data, - const FrameRange & frameRange/*select frame or entire batch*/, - const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + const FrameRange & frameRange/*select frame or entire batch*/) { auto sequence = SIZE_MAX; if (frameRange.IsAllFrames()) @@ -1023,21 +1023,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { } enum ValueOrGradient { VALUE, GRADIENT }; Matrix DataSlice(ValueOrGradient valueOrGradient/*as it says*/, - const FrameRange & frameRange/*select frame or entire batch*/, - const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + const FrameRange & frameRange/*select frame or entire batch*/) { Matrix & data = (valueOrGradient == VALUE) ? FunctionValues() : GradientValues(); - return DataSlice(data, frameRange, m_pMBLayout); + return DataSlice(data, frameRange); } - Matrix ValueSlice(const FrameRange & frameRange/*select frame or entire batch*/, - const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + Matrix ValueSlice(const FrameRange & frameRange/*select frame or entire batch*/) { - return DataSlice(FunctionValues(), frameRange, m_pMBLayout); + return DataSlice(FunctionValues(), frameRange); } - Matrix GradientSlice(const FrameRange & frameRange/*select frame or entire batch*/, - const MBLayoutPtr &) // DELETE THIS after refactoring; it's a dummy left-over) + Matrix GradientSlice(const FrameRange & frameRange/*select frame or entire batch*/) { - return DataSlice(GradientValues(), frameRange, m_pMBLayout); + return DataSlice(GradientValues(), frameRange); } // this is the entry point from Network; while it will call virtual ComputeInputPartial() into the actual node implementation diff --git a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h index c42109f7f062..2da3ba29cfbf 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h @@ -111,14 +111,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("Convolution operation only takes two inputs."); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) //derivative with regard to the weight matrix ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix, !frameRange.IsAllFrames()); else // derivative with regard to the input feature { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } } @@ -215,8 +215,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_tempMatrix); } @@ -433,11 +433,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 0) InvalidArgument("MaxPooling operation only takes one inputs."); - Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue); } @@ -447,8 +447,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeV(sliceOutputValue, sliceInput0Value); } diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 6e54fdc75220..aa29b045369a 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -346,17 +346,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("LookupTable operation only takes two inputs."); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -402,8 +401,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -573,8 +572,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(m_functionValues.GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues.GetNumRows() for loop dimension assert(m_pMBLayout); - Matrix mTmp = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix::ScaleAndAdd(1.0, GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout), mTmp); + Matrix mTmp = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix::ScaleAndAdd(1.0, GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)), mTmp); } virtual void EvaluateThisNode() @@ -584,8 +583,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix mTmp = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - mTmp.SetValue(Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout)); + Matrix mTmp = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + mTmp.SetValue(Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout))); } virtual void /*ComputationNodeBase::*/Validate() diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index 322ecce8f238..a91ed0d81ff6 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -53,8 +53,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Negate operation only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -71,8 +71,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -138,8 +138,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -156,8 +156,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -233,8 +233,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("SumColumnElements only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad); } @@ -251,8 +251,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue); } @@ -370,8 +370,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("RowSlice only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startIndex, m_numRows); } @@ -388,8 +388,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_startIndex, m_numRows); } @@ -486,8 +486,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex >= ChildrenSize()) InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range."); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); } @@ -504,7 +504,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceFunctionValues = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceFunctionValues = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } @@ -623,15 +623,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { //left Node must be a scalar if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -654,8 +654,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -742,15 +742,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -801,8 +801,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); FunctionValues().Resize(rows0, cols1); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -915,15 +915,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex == 0) //left derivative { - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } @@ -969,8 +969,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1073,10 +1073,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInput1Value = Inputs(1-inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1-inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } @@ -1100,9 +1100,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1202,10 +1202,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("RowElementTimes operation only takes two inputs."); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInput1Value = Inputs(1 - inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1 - inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) { @@ -1252,9 +1252,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -1353,17 +1353,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("ColumnElementTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) { - Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, m_tempMatrix); } else { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, m_tempMatrix); } } @@ -1403,8 +1403,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } @@ -1509,13 +1509,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (cols0 >= cols1) { - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad); } @@ -1584,25 +1584,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -1780,11 +1780,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { //only the one with more columns can be sliced, if both have same columns both are sliced size_t cols0 = Inputs(inputIndex)->FunctionValues().GetNumCols(), cols1=Inputs(1-inputIndex)->FunctionValues().GetNumCols(); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput0Value = Inputs(inputIndex)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); Matrix ones = Matrix(); @@ -1890,25 +1890,25 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t cols0 = Inputs(0)->FunctionValues().GetNumCols(), cols1=Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); //only the one with more columns can be sliced, if both have same columns both are sliced if (cols0 == cols1) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } else if (cols0 > cols1) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues()); } else //cols0 < cols1) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2048,16 +2048,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("DiagTimes operation only takes two inputs."); //left parameter (diag matix cannot be sliced) - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(m_innerproduct, sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(m_rightGradient, Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); } } @@ -2083,8 +2083,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value); } @@ -2205,11 +2205,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistance operation only takes two inputs."); - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = this->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = this->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) //left derivative { @@ -2280,9 +2280,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2426,19 +2426,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("KhatriRaoProduct operation only takes two inputs."); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (inputIndex == 0) //left derivative { - Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLeft(sliceInput1Value, sliceInput0Grad, sliceOutputGrad); } else //right derivative { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialRight(sliceInput0Value, sliceInput1Grad, sliceOutputGrad); } @@ -2461,9 +2461,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value); } @@ -2564,11 +2564,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs."); - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceThisGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceThisGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(inputIndex, m_invNorm0, m_invNorm1, sliceOutputValue, m_temp, m_rightTerm, m_leftTerm, m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad); } @@ -2681,9 +2681,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(m_invNorm0, m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), m_leftTerm, m_rightTerm); } @@ -2961,13 +2961,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex > 1) InvalidArgument("StrideTimes operation only takes two inputs."); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (m_StrideDim == 1) /// column stride { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); // TimesNode::ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad); @@ -2995,7 +2995,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); // TimesNode::ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad); @@ -3022,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex == 0) //left derivative { - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3047,7 +3047,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } else //right derivative { - Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); for (size_t k = 0; k < GetNumParallelSequences(); k++) { @@ -3127,13 +3127,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols(); - Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); UpdateStride(sliceInput1Value); if (m_StrideDim == 0) FunctionValues().Resize(rows0 / GetNumParallelSequences(), cols1); if (m_StrideDim == 1) FunctionValues().Resize(rows0, cols1); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, m_Stride, m_StrideDim); } diff --git a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h index efff274b6ff5..7a5a715f76c2 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h @@ -63,11 +63,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We should also unify these two functions into one that decides 1 frame or all frames at runtime... through the slice-extractor function itself. // For now we could define ALL_SAMPLES e.g. as SIZE_MAX. // GetGradientSlice(), GetInputSlice() or something. - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); // why GradientValues() but m_functionValues below and not FunctionValues()? - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialV(m_gradient, sliceInputValue, sliceInputGrad, sliceOutputGrad); } @@ -81,8 +81,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeV(sliceOutputValue, sliceInputValue); } @@ -206,10 +206,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Sigmoid only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -265,10 +265,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Tanh only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -326,10 +326,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Log only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -386,10 +386,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Exp only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -445,10 +445,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Cosine only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad); } @@ -508,10 +508,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -616,10 +616,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (inputIndex != 0) InvalidArgument("Softmax only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(m_gradient, m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue); } @@ -727,8 +727,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } //get the right slice const size_t colsPrior = Inputs(0)->FunctionValues().GetNumCols(); - Matrix sliceGradientValue = m_gradientValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGradientValue = DataSlice(m_gradientValues, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix slicePosterior = DataSlice(m_posterior, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); switch (inputIndex) { @@ -738,40 +738,40 @@ virtual const std::wstring OperationName() const { return TypeName(); } ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, m_prior, slicePosterior, m_temp); else { - Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPriorGradient = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix slicePrior = DataSlice(m_prior, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, m_temp); } } break; case 1: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceNormedDeviationVectors = DataSlice(m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (colsPrior == 1) ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); else { - Matrix sliceMeanGradient = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceMeanGradient = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } } break; case 2: { - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceNormedDeviation = DataSlice(m_normedDeviation, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (colsPrior == 1) ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); else { - Matrix sliceLotStddevGradient = Inputs(2)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceLotStddevGradient = Inputs(2)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, m_temp); } } break; case 3: { - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeatureGradient = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceNormedDeviationVectors = DataSlice(m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceFeatureGradient = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, m_temp); } break; @@ -888,11 +888,11 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t numSamples = Inputs(3)->FunctionValues().GetNumCols(); //get the right slice - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceFeature = Inputs(3)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceNormedDeviation = m_normedDeviation.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceNormedDeviationVectors = m_normedDeviationVectors.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix slicePosterior = m_posterior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceFeature = Inputs(3)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceNormedDeviation = DataSlice(m_normedDeviation, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceNormedDeviationVectors = DataSlice(m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix slicePosterior = DataSlice(m_posterior, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); if (colsPrior == 1) { @@ -901,12 +901,12 @@ virtual const std::wstring OperationName() const { return TypeName(); } } else if (colsPrior == numSamples) { - Matrix sliceUnnormedPrior = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceMean = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceLogstddev = Inputs(2)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceUnnormedPrior = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceMean = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceLogstddev = Inputs(2)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - Matrix slicePrior = m_prior.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceStddev = m_stddev.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix slicePrior = DataSlice(m_prior, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceStddev = DataSlice(m_stddev, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature, slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, m_temp); @@ -1113,13 +1113,13 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex > 0) InvalidArgument("Dropout operation only takes one input."); - Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); Matrix sliceMask = Matrix(); if (m_dropoutRate > 0) { - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + sliceMask = DataSlice(m_maskOfDropout, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); } ComputeInputPartialS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad); @@ -1143,7 +1143,7 @@ virtual const std::wstring OperationName() const { return TypeName(); } } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); Matrix sliceOutputValue = Matrix (); Matrix sliceMask = Matrix(); @@ -1151,10 +1151,10 @@ virtual const std::wstring OperationName() const { return TypeName(); } { FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); m_maskOfDropout.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols()); - sliceMask = m_maskOfDropout.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + sliceMask = DataSlice(m_maskOfDropout, frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); } - sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value); } @@ -1405,9 +1405,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep, m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRows); } @@ -1450,9 +1450,9 @@ virtual const std::wstring OperationName() const { return TypeName(); } size_t outputSamplesInRecurrentStep = GetNumParallelSequences() * rows / m_numRows; - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); // BUGBUG: the following will fail since outputSamplesInRecurrentStep will not match m_pMBLayout. Need to find out what this means (currently layout is constant throughout the graph), and implement it correctly. - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep, m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRows); } @@ -1648,8 +1648,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) { - Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputValue = m_functionValues.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); EvaluateThisNodeS(sliceOutputValue, sliceInputValue, m_numRepeat); } @@ -1675,8 +1675,8 @@ virtual const std::wstring OperationName() const { return TypeName(); } if (inputIndex != 0) InvalidArgument("RowRepeat only has one input."); - Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_numRepeat); } diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index e6f45eb57909..9cdf2f4accd4 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -225,7 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { d = (int)functionValues.Mod((float)delayedIndex, (float)delayedActivation.GetNumCols()); // this can point to the past activity of the previous minibatch - Matrix out = ValueSlice(frameRange, m_pMBLayout); + Matrix out = ValueSlice(frameRange); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); if (minibatchPackingFlags & SequenceStart_or_End) @@ -606,18 +606,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int timeIdxInSeq = nT - GetNumParallelSequences(); timeIdxInSeq >= 0; timeIdxInSeq -= GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceState = DataSlice(m_State, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGi = DataSlice(m_Gi, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceGf = DataSlice(m_Gf, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceGo = DataSlice(m_Go, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); - Matrix sliceTanhObs = tanhObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix sliceTanhState = DataSlice(tanhState, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceTanhObs = DataSlice(tanhObs, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); - Matrix error = GradientSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout); + Matrix error = GradientSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)); Matrix grdToObsSlice(this->m_deviceId); @@ -666,7 +666,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { grdToPrevState, m_tempMatrix ); - grdToObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences()), m_pMBLayout).SetValue(grdToObsSlice); + DataSlice(grdToObs, frameRange/*TODO: delete this:*/.Check(timeIdxInSeq, GetNumParallelSequences(), m_pMBLayout)).SetValue(grdToObsSlice); PrepareErrors(timeIdxInSeq, grdToPrevOutput, grdToPrevState, GetNumParallelSequences(), &m_pMBLayout->GetM()); } @@ -997,16 +997,16 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t timeIdxInSeq = 0; timeIdxInSeq < nT; timeIdxInSeq += GetNumParallelSequences()) { FrameRange frameRange(timeIdxInSeq, GetNumParallelSequences()); - Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceState = m_State.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceObs = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceOutput = ValueSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceState = DataSlice(m_State, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceGi = m_Gi.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGf = m_Gf.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceGo = m_Go.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceGi = DataSlice(m_Gi, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceGf = DataSlice(m_Gf, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceGo = DataSlice(m_Go, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); - Matrix sliceTanhState = tanhState.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); - Matrix sliceTanhInput = tanhObs.FrameSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences()), m_pMBLayout); + Matrix sliceTanhState = DataSlice(tanhState, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); + Matrix sliceTanhInput = DataSlice(tanhObs, frameRange/*TODO: delete this:*/.Check(frameRange.t(), GetNumParallelSequences(), m_pMBLayout)); PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM()); @@ -1101,8 +1101,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { { // this is in the minibatch FrameRange frameRange(timeIdxInSeq, nsamples); - Matrix::Multiply(output.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() - nsamples, nsamples), false, colSeg, false, newPrevOutput); - Matrix::Multiply(state.FrameSlice(frameRange/*TODO: delete the next two parameters*/, frameRange.t() - nsamples, nsamples), false, colSeg, false, newPrevState); + Matrix::Multiply(DataSlice(output, frameRange/*TODO: delete the next two parameters*/, frameRange.t() - nsamples, nsamples), false, colSeg, false, newPrevOutput); + Matrix::Multiply(DataSlice(state, frameRange/*TODO: delete the next two parameters*/, frameRange.t() - nsamples, nsamples), false, colSeg, false, newPrevState); } Base::SetToInitStateValueForResetSeg(sentenceBegin->ColumnSlice(utt_t, 1), nStream, initStateValue, newPrevState); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index e15d802c9250..ab04316ca667 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -881,7 +881,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); // TODO: change to frameRange over a whole MB with a sequence index. BUGBUG: below code will break until this is fixed /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); size_t c_t = (size_t)lbl_t(1, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); @@ -890,14 +890,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { continue; Matrix input_weight_t = Inputs(2)->FunctionValues().ColumnSlice(lft_bnd, nbr_wrd); - Matrix obs = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix obs = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); Matrix grd_to_soft_max_input = m_grdToSoftMaxInput.ColumnSlice(sz, nbr_wrd); - Matrix grd_to_cls_prob = m_clsLogSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix grd_to_cls_prob = DataSlice(m_clsLogSoftmax, frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); switch (inputIndex){ case 1: /// gradient to input - grd_t = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + grd_t = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); ComputeInputPartialRight(input_weight_t, grd_t, grd_to_soft_max_input); break; case 2: @@ -906,8 +906,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputeInputPartialLeft(obs, grd_to_wgt_t, grd_to_soft_max_input); break; case 3: - grd_t = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); - grd_t.SetValue(m_clsSoftmax.FrameSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout)); + grd_t = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); + grd_t.SetValue(DataSlice(m_clsSoftmax, frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout))); ComputeCEPartialToSoftmaxInputs(grd_t, GradientValues(), c_t); break; default: @@ -947,7 +947,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { FrameRange frameRange(t, 1); /// compute prb - 1 and prb - Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1), m_pMBLayout); + Matrix lbl_t = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check(t, 1, m_pMBLayout)); size_t y_t = (size_t)lbl_t(0, 0); size_t lft_bnd = (size_t)lbl_t(2, 0); size_t rgt_bnd = (size_t)lbl_t(3, 0); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index e5a57c058552..75c4798eb181 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -601,7 +601,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // FrameRange frameRange(t, 1); // using a different #sequences. Solve by treating all frames as one sequence (in FrameRange) // - ReshapeNode: - // Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep), m_pMBLayout); + // Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * outputSamplesInRecurrentStep, outputSamplesInRecurrentStep, m_pMBLayout)); // using a differeren #sequences. Find out what this really means. struct FrameRange { @@ -623,9 +623,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t NumCols(const shared_ptr & pMBLayout) const { EnsureNotAllFrames(); VerifyMBLayout(pMBLayout); return pMBLayout->GetNumParallelSequences(); } bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead - const FrameRange & Check(size_t expectedStartColumn, size_t expectedNumCols) const + const FrameRange & Check(size_t expectedStartColumn, size_t expectedNumCols, const shared_ptr & pMBLayout) const { - if (!IsAllFrames() && expectedStartColumn != StartColumn() || expectedNumCols != NumCols()) + if (!IsAllFrames() && (samplesInRecurrentStep != pMBLayout->GetNumParallelSequences() || expectedStartColumn != StartColumn(pMBLayout) || expectedNumCols != NumCols(pMBLayout))) LogicError("FrameSlice: FrameRange object gives different range than original explicit code. Logic is borked."); return *this; } From 01a06b656b9d8797641be88a6a6fa62f9d646600 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Fri, 4 Sep 2015 10:57:12 -0700 Subject: [PATCH 30/44] Add FSAdaGrad --- MachineLearning/CNTKSGDLib/SGD.cpp | 5439 ++++++++++++++-------------- Math/Math/GPUMatrix.cu | 20 + Math/Math/GPUMatrix.h | 2 + Math/Math/GPUMatrixCUDAKernels.cu | 30 + Math/Math/Matrix.cpp | 21 + Math/Math/Matrix.h | 277 +- 6 files changed, 2935 insertions(+), 2854 deletions(-) diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index 0c1ddeaf9d52..6bcaa2b9bd61 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -1,2716 +1,2723 @@ -// SGD.cpp -- implements SGD with all bells and whistles, parallelization, randomizatiom, etc. - -#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings - -#include "Basics.h" -#include "SGD.h" -#include "AllReduceDistGradAggregator.h" - -#include - -namespace Microsoft { namespace MSR { namespace CNTK { - - using namespace std; - - template - void DecimateMinibatch(std::map*>& mb, int numProcessor, int myID) - { - int rank = myID; - int procs = numProcessor; - - size_t rv = 0; - if (procs > 1) - { - for (auto it = mb.begin(); it != mb.end(); ++it) - { - MSR::CNTK::Matrix &mat = *(it->second); - size_t nCols = mat.GetNumCols(); - size_t col_start = (nCols * rank) / procs; - size_t col_end = (nCols * (rank + 1)) / procs; - if (col_end > nCols) - { - // this shouldn't happen - col_end = nCols; - } - - if (col_end == col_start) - { - MSR::CNTK::Matrix tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE); - mat.SetValue(tmp); - } - else - { - MSR::CNTK::Matrix tmp = mat.ColumnSlice(col_start, col_end - col_start); - mat.SetValue(tmp); - } - - if (rv == 0) - { - rv = mat.GetNumCols(); - } - else - { - if (rv != mat.GetNumCols()) - { - throw std::logic_error("Uneven number of columns among inputs."); - } - } - } - } - } - - template - size_t DecimateMinibatchWithSentences(std::map*> &mb, /* (input) matrix to be decimated */ - int rank, int numprocs, /* (input) rank info */ - size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */ - MBLayoutPtr pMBLayout, // gets filled in - IDataReader* trainDataReader) /* (input) to have access to reader */ - { - // For RNN, a input Matrix is organized in the following way: - // | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N | - // |<---- block 1 ---->| .... |<------ block T ----->| - // N is the nSlice (input) - // The decimation here is to split each block to individual GPUs - // So After decimation - // | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} | - // Each block now has nSlice/nProcs - // - // Correspondingly, the SentenceBoundary and PackingFlags will be revised - trainDataReader->CopyMBLayoutTo(pMBLayout); // fill this - - size_t rv = 0; - size_t nOrigParallelUtts = nSlices; - static bool warned = false; - if (numprocs > 1) - { - // decide new parallel utterances - size_t sent_start = 0; - size_t sent_end = 0; - if (nOrigParallelUtts % numprocs != 0) - { - if (!warned) - { - /* give a warning of potential bandwidth wasting */ - fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n", - (int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts); - warned = true; - } - if (rank == numprocs - 1) - { - nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1); - sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1); - sent_end = nOrigParallelUtts; - } - else - { - nSlices = nOrigParallelUtts / numprocs + 1; - sent_start = nSlices * rank; - sent_end = nSlices * (rank + 1); - if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts; - } - } - else - { - nSlices = nOrigParallelUtts / numprocs; - sent_start = rank*nSlices; - sent_end = (rank + 1)*nSlices; - if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts; - } - // decimate data - for (auto it = mb.begin(); it != mb.end(); ++it) - { - MSR::CNTK::Matrix &mat = *(it->second); - size_t nCols = mat.GetNumCols(); - - if (nCols % nOrigParallelUtts != 0) - { - // this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ... - RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts); - } - size_t nBlocks = nCols / nOrigParallelUtts; - // for RNN, nBlocks is the size of truncated BPTT - if (sent_end == sent_start) - { - // should never happen, print debug info - RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n", - (int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs); - } - - MSR::CNTK::Matrix tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType()); - - // do the column slice for each block - for (size_t iblock = 0; iblock < nBlocks; iblock++) - { - tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices), - iblock*nSlices, nSlices); - } - mat.SetValue(tmp); - - // assert the cols are even among nodes - if (0 == rv) - { - rv = mat.GetNumCols(); - } - else - { - if (rv != mat.GetNumCols()) - throw std::logic_error("Uneven number of columns among inputs."); - } - } - // revise sentence boundary and packing flags - // TODO: get rid of this explicit matrix, this can be done directly with MBLayout types. - size_t nMBSize = pMBLayout->GetSize(); - Matrix newBoundary(CPUDEVICE); - newBoundary.Resize(nSlices, nMBSize); - newBoundary.AssignRowSliceValuesOf(pMBLayout->GetM(), sent_start, nSlices); - fill(pMBLayout->GetV().begin(), pMBLayout->GetV().end(), MinibatchPackingFlags::None); - for (size_t nt = 0; nt < nMBSize; nt++) - { - for (size_t ns = 0; ns < nSlices; ns++) - { - if (newBoundary(ns, nt) == ((int)MinibatchPackingFlags::SequenceStart)) - pMBLayout->GetV()[nt] |= MinibatchPackingFlags::SequenceStart; - if (newBoundary(ns, nt) == ((int)MinibatchPackingFlags::SequenceEnd)) - pMBLayout->GetV()[nt] |= MinibatchPackingFlags::SequenceEnd; - } - } - } - - return rv; - } - - static AdaptationRegType ParseAdaptationRegType(wstring s) - { - msra::strfun::tolower_ascii(s); - if (s == L"" || s == L"none") - return AdaptationRegType::None; - else if (s == L"kl" || s == L"klreg") - return AdaptationRegType::KL; - else - throw std::invalid_argument("ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are (None | KL)"); - } - - static GradientsUpdateType ParseGradUpdateType(wstring s) - { - msra::strfun::tolower_ascii(s); - if (s == L"" || s == L"none" || s == L"normal" || s == L"simple") - return GradientsUpdateType::None; - else if (s == L"adagrad") - return GradientsUpdateType::AdaGrad; - else if (s == L"rmsprop") - return GradientsUpdateType::RmsProp; - else - throw std::invalid_argument("ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are (None | AdaGrad | RmsProp )"); - } - - static ParallelizationMethod ParseParallelizationMethod(wstring s) - { - msra::strfun::tolower_ascii(s); - if ((s == L"") || (s == L"none")) - return ParallelizationMethod::None; - else if (s == L"dataparallelsgd") - return ParallelizationMethod::DataParallelSGD; - else if (s == L"modelaveragingsgd") - return ParallelizationMethod::ModelAveragingSGD; - else - throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)"); - } - - static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s) - { - // TODO: why allow so many variants? - msra::strfun::tolower_ascii(s); - if (s == L"false" || s == L"none") - return LearningRateSearchAlgorithm::None; - else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before") - return LearningRateSearchAlgorithm::SearchBeforeEpoch; - else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after") - return LearningRateSearchAlgorithm::AdjustAfterEpoch; - else - throw std::invalid_argument("autoAdjustLR: Invalid learning rate search type. Valid values are (None | SearchBeforeEpoch | AdjustAfterEpoch)"); - } - -template - SGD::SGD(const ConfigParameters& configSGD) - { - ConfigArray learningRatesPerMBStr = configSGD("learningRatesPerMB", ""); - m_needToNormalizeLRByParallUtterance = false; - m_needToNormalizeMomentumByParallUtterance = false; - floatargvector learningRatesPerMB = learningRatesPerMBStr; - - ConfigArray learningRatesPerSampleStr = configSGD("learningRatesPerSample", ""); - floatargvector learningRatesPerSample = learningRatesPerSampleStr; - - std::string executionEngineValue = configSGD("executionEngine", "synchronous"); - - // AutoAdjust Parameters - ConfigParameters configAALR(configSGD("AutoAdjust", "")); - LearningRateSearchAlgorithm autoAdjustLRType = ParseLearningRateSearchType(configAALR("autoAdjustLR", "None")); - double reduceLearnRateIfImproveLessThan = configAALR("reduceLearnRateIfImproveLessThan", "0"); - bool continueReduce = (bool) configAALR("continueReduce", "false"); - size_t learnRateAdjustInterval = (size_t) configAALR("learnRateAdjustInterval", "1"); - double learnRateDecreaseFactor = configAALR("learnRateDecreaseFactor", "0.618"); - double increaseLearnRateIfImproveMoreThan = configAALR("increaseLearnRateIfImproveMoreThan", "1#INF"); - double learnRateIncreaseFactor = configAALR("learnRateIncreaseFactor", "1.382"); - - // AutoAdjust Auto Adjust Minibatch Parameters - bool autoAdjustMinibatch = (bool) configAALR("autoAdjustMinibatch", "false"); - size_t minibatchSizeTuningFrequency = configAALR("minibatchSizeTuningFrequency", "1"); - size_t minibatchSizeTuningMax = configAALR("minibatchSizeTuningMax", "1048576"); - size_t minibatchSearchCriterionErrorMargin = configAALR("minibatchSearchCriterionErrorMargin", "1"); - - // the number of minibatches used to search - // the learning rate. It’s typically set to 10-20% of - // the total minibatches in an epoch. - ConfigArray minibatch4LRSearch = configAALR("numMiniBatch4LRSearch", "500"); - intargvector numMiniBatch4LRSearch = minibatch4LRSearch; - - size_t numPrevLearnRates = configAALR("numPrevLearnRates", "5"); - size_t numBestSearchEpoch = configAALR("numBestSearchEpoch", "1"); - bool loadBestModel = configAALR("loadBestModel", "true"); - bool useCVSetControlLRIfCVExists = configAALR("UseCVSetControlLRIfCVExists", "true"); - bool useEvalCriterionControlLR = configAALR("UseEvalCriterionControlLR", "false"); - - - ConfigArray minibatchSize = configSGD("minibatchSize", "256"); - intargvector mbSize = minibatchSize; - - // the number of samples in each epoch (0 means, use all the samples in each epoch). - size_t epochSize = configSGD("epochSize", "0"); - - // the total number of epochs to run. - size_t maxEpochs = configSGD("maxEpochs"); - - ConfigArray momentumPerMBStr = configSGD("momentumPerMB", ""); - floatargvector momentumPerMB = momentumPerMBStr; - - ConfigArray momentumPerSampleStr = configSGD("momentumPerSample", ""); - floatargvector momentumPerSample = momentumPerSampleStr; - - wstring modelPath = configSGD("modelPath"); - wstring trainCriterionNodeName = configSGD("trainCriterionNodeName", ""); - wstring evalCriterionNodeName = configSGD("evalCriterionNodeName", ""); - - size_t maxTempMemSizeInSamplesForCNN = configSGD("maxTempMemSizeInSamplesForCNN", "0"); - - int traceLevel = configSGD("traceLevel", "0"); - size_t numMBsToShowResult = configSGD("numMBsToShowResult", "10"); - size_t numMBsToCUDAProfile = configSGD("numMBsToCUDAProfile", "0"); - - bool keepCheckPointFiles = configSGD("keepCheckPointFiles", "false"); - - bool gradientClippingWithTruncation = configSGD("gradientClippingWithTruncation", "true"); - double clippingThresholdPerSample = configSGD("clippingThresholdPerSample", "1#INF"); - - ConfigArray dropoutRatesStr = configSGD("dropoutRate", "0.0"); - floatargvector dropoutRates = dropoutRatesStr; - - GradientUpdateInfo gUpdateInfo; - GradientsUpdateType gradUpdateType = ParseGradUpdateType(configSGD("gradUpdateType", "None")); - double gaussianNoiseInjecStd = configSGD("gaussianNoiseInjectStd", "0"); - gUpdateInfo.mType = gradUpdateType; - gUpdateInfo.mGaussianNoiseInjectStd = (float) gaussianNoiseInjecStd; - - // extract RMSProp parameters from config, if they exist. Default to reasonable values. - RMSPropInfo rpi; - rpi.dec = (double) configSGD("rms_wgt_dec", "0.75"); - rpi.inc = (double) configSGD("rms_wgt_inc", "1.2"); - rpi.min = (double) configSGD("rms_wgt_min", "0.1"); - rpi.max = (double) configSGD("rms_wgt_max", "10.0"); - rpi.gamma = (double) configSGD("rms_gamma", "0.99"); - - bool needAveMultiplier = (bool) configSGD("normWithAveMultiplier", "true"); - double L2RegWeight = (double) configSGD("L2RegWeight", "0"); - double L1RegWeight = (double) configSGD("L1RegWeight", "0"); - - /// for backward support. future setup should use gradUpdateType=AdaGrad, instead of - /// useAdagrad=true - bool useAdagrad = configSGD("useAdagrad", "false"); - if (useAdagrad) - { - gradUpdateType = GradientsUpdateType::AdaGrad; - gUpdateInfo.mType = gradUpdateType; - } - - AdaptationRegType adaptationRegType = ParseAdaptationRegType(configSGD("adaptationRegType", "None")); - double adaptationRegWeight = configSGD("adaptationRegWeight", "0"); - - /// gradient check setup - bool doGradientCheck = configSGD("gradientcheck", "false"); - double gradientCheckSigDigit = configSGD("sigFigs", "6"); - - if (doGradientCheck && sizeof(ElemType) != sizeof(double)) - LogicError("Gradient check needs to use precision = double"); - m_doUnitTest = configSGD("unittest", "false"); - - bool validateAfterModelReloading = configSGD("validateAfterModelReloading", "true"); - - bool UsingAllDataForPreComputedNode = configSGD("UseAllDataForPreComputedNode", "true"); - - // Parallel training - m_parallelizationMethod = ParallelizationMethod::None; - m_distGradAgg = nullptr; - m_gradHeader = nullptr; - m_numGradientBits = 32; - m_zeroThresholdFor1Bit = true; - m_enableDistributedMBReading = false; - m_parallelizationStartEpochNum = 0; - m_nFramesBetweenMASync = 40000; // default 40k frames - - if ((g_mpi != nullptr) && configSGD.ExistsCurrent("ParallelTrain")) - { - ConfigParameters configParallelTrain(configSGD("ParallelTrain", "")); - m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain("parallelizationMethod", "None")); - m_parallelizationStartEpochNum = configParallelTrain("parallelizationStartEpoch", "1"); - m_parallelizationStartEpochNum -= 1; // Epoch numbers internally are 0 based - m_enableDistributedMBReading = configParallelTrain("distributedMBReading", "false"); - - if (configParallelTrain.ExistsCurrent("DataParallelSGD")) - { - ConfigParameters configDataParallelSGD(configParallelTrain("DataParallelSGD", "")); - const char* defaultGradientBitsStr = (sizeof(ElemType) == sizeof(float)) ? "32" : "64"; - m_numGradientBits = configDataParallelSGD("gradientBits", defaultGradientBitsStr); - m_zeroThresholdFor1Bit = configDataParallelSGD("useZeroThresholdFor1BitQuantization", "true"); - if ((m_numGradientBits < 1) || (m_numGradientBits > (8 * sizeof(ElemType)))) - { - throw std::invalid_argument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!"); - } - } - - if (configParallelTrain.ExistsCurrent("ModelAveragingSGD") ) - { - ConfigParameters configMASGD(configParallelTrain("ModelAveragingSGD", "")); - m_nFramesBetweenMASync = configMASGD("SyncFrequencyInFrames", "40000"); - m_iMASyncStatsTrace = configMASGD("MAPerfStats", "0"); - } - - } - - // TODO: the number of parameters of this function is waaay to little! - Init(learningRatesPerMB, - learningRatesPerSample, - mbSize, - epochSize, - maxEpochs, - modelPath, - momentumPerMB, - momentumPerSample, - gradientClippingWithTruncation, - clippingThresholdPerSample, - autoAdjustLRType, - increaseLearnRateIfImproveMoreThan, - learnRateIncreaseFactor, - reduceLearnRateIfImproveLessThan, - continueReduce, - learnRateDecreaseFactor, - dropoutRates, - loadBestModel, - numMiniBatch4LRSearch, - numPrevLearnRates, - numBestSearchEpoch, - traceLevel, - numMBsToShowResult, - numMBsToCUDAProfile, - maxTempMemSizeInSamplesForCNN, - gUpdateInfo, - keepCheckPointFiles, - adaptationRegType, - adaptationRegWeight, - trainCriterionNodeName, - evalCriterionNodeName, - doGradientCheck, - gradientCheckSigDigit, - validateAfterModelReloading, - rpi, - learnRateAdjustInterval, - UsingAllDataForPreComputedNode, - needAveMultiplier, - L2RegWeight, - L1RegWeight, - autoAdjustMinibatch, - minibatchSizeTuningFrequency, - minibatchSizeTuningMax, - useCVSetControlLRIfCVExists, - useEvalCriterionControlLR, - minibatchSearchCriterionErrorMargin); - } - - //autoLearnRateSearchType is applied only if the learning rate for the epoch is not specified in learningRatesPerMB and learningRatesPerSample - template - void SGD::Init(const floatargvector& learningRatesPerMB, - const floatargvector& learningRatesPerSample, - const intargvector& mbSize, - const size_t epochSize, - const size_t maxEpochs, - const wstring& modelPath, - const floatargvector& momentumPerMB, - const floatargvector& momentumPerSample, - const bool gradientClippingWithTruncation, - const double clippingThresholdPerSample, - const LearningRateSearchAlgorithm autoLearnRateSearchType, - const double increaseLearnRateIfImproveMoreThan, - const double learnRateIncreaseFactor, - const double reduceLearnRateIfImproveLessThan, - const bool continueReduce, - const double learnRateDecreaseFactor, - floatargvector dropoutRates, - const bool loadBestModel, - const intargvector& numMiniBatch4LRSearch, - const size_t numPrevLearnRates, - const size_t numBestSearchEpoch, - const int traceLevel, - const size_t numMBsToShowResult, - const size_t numMBsToCUDAProfile, - const size_t maxTempMemSizeInSamplesForCNN, - const GradientUpdateInfo gradUpdateType, - const bool keepCheckPointFiles, - const AdaptationRegType adaptationRegType, - const double adaptationRegWeight, - const wstring trainCriterionNodeName, - const wstring evalCriterionNodeName, - const bool doGradientCheck, - const double gradientCheckSigDigit, - const bool validateAfterModelReloading, - RMSPropInfo rpi, - size_t learnRateAdjustInterval, - const bool UsingAllDataForPreComputed, - const bool needAveMultiplier, - const double L2RegWeight, - const double L1RegWeight, - const bool autoAdjustMinibatch, - const size_t minibatchSizeTuningFrequency, - const size_t minibatchSizeTuningMax, - const bool useCVSetControlLRIfCVExists, - const bool useEvalCriterionControlLR, - const size_t minibatchSearchCriterionErrorMargin) - { - m_numPrevLearnRates = numPrevLearnRates; - m_prevChosenMinibatchSize = 0; - m_autoAdjustMinibatch = autoAdjustMinibatch; - m_minibatchSizeTuningMax = minibatchSizeTuningMax; - m_minibatchSizeTuningFrequency = minibatchSizeTuningFrequency; - m_minibatchSearchCriterionErrorMargin = minibatchSearchCriterionErrorMargin; - - m_mbSize = mbSize; - - // the number of samples in each epoch (0 means, use all the samples in each epoch). - m_epochSize = epochSize; - if (m_epochSize == 0) - { - m_epochSize = requestDataSize; - } - - // the total number of epochs to run. - m_maxEpochs = maxEpochs; - - m_gradientClippingWithTruncation = gradientClippingWithTruncation; - m_modelPath = modelPath; - m_autoLearnRateSearchType = autoLearnRateSearchType; - m_traceLevel = traceLevel; - m_loadBestModel = loadBestModel; - m_increaseLearnRateIfImproveMoreThan = increaseLearnRateIfImproveMoreThan; - m_learnRateIncreaseFactor = learnRateIncreaseFactor; - m_reduceLearnRateIfImproveLessThan = reduceLearnRateIfImproveLessThan; - m_continueReduce = continueReduce; - - //minimum interval is 1 epoch - m_learnRateAdjustInterval = max((size_t) 1, learnRateAdjustInterval); - - m_learnRateDecreaseFactor = learnRateDecreaseFactor; - m_clippingThresholdPerSample = abs(clippingThresholdPerSample); - m_numMiniBatch4LRSearch = numMiniBatch4LRSearch; - m_dropoutRates = dropoutRates; - m_numMBsToShowResult = int(numMBsToShowResult); - m_numMBsToCUDAProfile = int(numMBsToCUDAProfile); - m_numBestSearchEpoch = numBestSearchEpoch; - m_maxTempMemSizeInSamplesForCNN = maxTempMemSizeInSamplesForCNN; - m_gradType = gradUpdateType; - m_rpi = rpi; - m_keepCheckPointFiles = keepCheckPointFiles; - - m_adaptationRegType = adaptationRegType; - m_adaptationRegWeight = adaptationRegWeight; - - m_trainCriterionNodeName = trainCriterionNodeName; - m_evalCriterionNodeName = evalCriterionNodeName; - m_useAllDataForPreComputedNode = UsingAllDataForPreComputed; - - m_needAveMultiplier = needAveMultiplier; - m_L2RegWeight = L2RegWeight; - m_L1RegWeight = L1RegWeight; - - for (size_t i = 0; i < m_mbSize.size(); i++) - { - if (m_epochSize != requestDataSize && m_epochSize < m_mbSize[i]) - { - throw std::invalid_argument("epoch size must be larger than mbsize."); - } - } - - if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None && - (learningRatesPerSample.size() == 0 && learningRatesPerMB.size() == 0)) - { - throw std::invalid_argument("If autoLearnRateSearchType is false " - "you must specify the learningRatesPerSample " - "or learningRatesPerMB parameter."); - } - - if (learningRatesPerSample.size() > 0 && learningRatesPerMB.size() > 0) - { - throw std::invalid_argument("You specified both learningRatesPerSample " - "and learningRatesPerMB. Please comment " - "out one of them."); - } - else if (learningRatesPerSample.size() > 0) - { - m_learningRatesPerSample = learningRatesPerSample; - } - else if (learningRatesPerMB.size() > 0) - { - int LRSize = (int) max(learningRatesPerMB.size(), m_mbSize.size()); - m_learningRatesPerSample.resize(LRSize); - for (int i = 0; i < LRSize; i++) - { - m_learningRatesPerSample[i] = learningRatesPerMB[i] / m_mbSize[i]; - } - m_needToNormalizeLRByParallUtterance = true; - } - - if (momentumPerSample.size() > 0 && momentumPerMB.size() > 0) - { - throw std::invalid_argument("You specified both momentumPerSample " - "and momentumPerMB. Please comment " - "out one of them."); - } - else if (momentumPerSample.size() > 0) - { - m_momentumPerSample = momentumPerSample; - int momentumVectorSize = m_momentumPerSample.size(); - for (int i = 0; i < momentumVectorSize; i++) - { - if ((m_momentumPerSample[i] >= 1) || (m_momentumPerSample[i] < 0)) - { - throw std::invalid_argument("momentumPerSample must be in [0, 1)."); - } - } - } - else if (momentumPerMB.size() > 0) - { - int momentumVectorSize = (int)max(momentumPerMB.size(), m_mbSize.size()); - m_momentumPerSample.resize(momentumVectorSize); - for (int i = 0; i < momentumVectorSize; i++) - { - if ((momentumPerMB[i] >= 1) || (momentumPerMB[i] < 0)) - InvalidArgument("momentumPerMB must be in [0, 1)."); - m_momentumPerSample[i] = (float)pow(momentumPerMB[i], 1.0 / m_mbSize[i]); - } - - m_needToNormalizeMomentumByParallUtterance = true; - } - else - { - int momentumVectorSize = m_mbSize.size(); - m_momentumPerSample.resize(momentumVectorSize); - for (int i = 0; i < momentumVectorSize; i++) - m_momentumPerSample[i] = (float)pow(0.9f, 1.0 / m_mbSize[i]); - } - - if (m_learnRateDecreaseFactor > 1 || m_learnRateIncreaseFactor < 1) - InvalidArgument("learnRateIncreaseFactor must be >= 1 and learnRateDecreaseFactor must be <= 1."); - - for (size_t i = 0; i < m_dropoutRates.size(); i++) - if (m_dropoutRates[i] >= 1 || m_dropoutRates[i] < 0) - InvalidArgument("dropoutRate must be >= 0 and < 1."); - - if (m_adaptationRegWeight > 1 || m_adaptationRegWeight < 0) - InvalidArgument("adaptationRegWeight must be in [0 1]"); - - m_minLearnRate = 1e-9f; - - m_needAdaptRegularization = false; - - m_doGradientCheck = doGradientCheck; - m_gradientCheckSigDigit = gradientCheckSigDigit; - m_validateAfterModelReloading = validateAfterModelReloading; - - m_useCVSetControlLRIfCVExists = useCVSetControlLRIfCVExists; - m_useEvalCriterionControlLR = useEvalCriterionControlLR; - - msra::files::make_intermediate_dirs(m_modelPath); - } - - template - void SGD::Adapt(wstring origModelFileName, wstring refNodeName, - IDataReader* trainSetDataReader, - IDataReader* validationSetDataReader, - const DEVICEID_TYPE deviceID, const bool makeMode) - { - if (origModelFileName == L"" || trainSetDataReader == nullptr) - InvalidArgument("origModel and trainSetDataReader should not be null."); - - int startEpoch = DetermineStartEpoch(makeMode); - if (startEpoch == m_maxEpochs) - { - fprintf(stderr, "Final model exists. No further training is necessary.\n"); - return; - } - - ComputationNetwork net(deviceID); - if (startEpoch >= 0) - { - wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); - fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); - net.LoadFromFile(modelFileName); - } - else - { - fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str()); - net.LoadFromFile(origModelFileName); - } - - startEpoch = max(startEpoch, 0); - - ComputationNetwork refNet(deviceID); - m_needAdaptRegularization = m_adaptationRegType != AdaptationRegType::None && m_adaptationRegWeight > 0; - if (m_needAdaptRegularization) - { - fprintf(stderr, "Load reference Network From the original model file %ls.\n", origModelFileName.c_str()); - refNet.LoadFromFile(origModelFileName); - } - - ComputationNodeBasePtr refNode; - if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL) - { - fprintf(stderr, "Checking refNodeName %ls.\n", origModelFileName.c_str()); - if (refNodeName == L"") - InvalidArgument("refNodeName does not exist and is needed when adaptationRegType is KL."); - refNode = refNet.GetNodeFromName(refNodeName); - } - - TrainOrAdaptModel(startEpoch, net, refNet, refNode, trainSetDataReader, validationSetDataReader); - } - - template - void SGD::SequenceTrain(IComputationNetBuilder* netBuilder, wstring origModelFileName, - IDataReader* trainSetDataReader, IDataReader* validationSetDataReader, - const DEVICEID_TYPE deviceID, const bool makeMode) - { - if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr) - InvalidArgument("netBuilder, origModel and trainSetDataReader should not be null."); - - int startEpoch = DetermineStartEpoch(makeMode); - if (startEpoch == m_maxEpochs) - { - fprintf(stderr, "Final model exists. No further training is necessary.\n"); - return; - } - - // Initializes the model from original model. - ComputationNetwork origNet(deviceID); - ComputationNetwork* sequenceNet = - (startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : &origNet; - std::vector addedFeatureNodes; - std::vector replacedCriterionNodes; - if (startEpoch < 0) - { - // Loads models. - origNet.LoadFromFile(origModelFileName); - - // Processes feature nodes. - std::vector & sequenceFeatureNodes = sequenceNet->FeatureNodes(); - for (size_t i = 0; i < sequenceFeatureNodes.size(); ++i) - { - if (!origNet.NodeNameExist(sequenceFeatureNodes[i]->NodeName())) - { - addedFeatureNodes.push_back(sequenceFeatureNodes[i]); - origNet.AddFeatureNode(sequenceFeatureNodes[i]); - } - } - - // Processes criterion nodes. - auto & origCriterionNodes = GetTrainCriterionNodes(origNet); - auto & sequenceCriterionNodes = GetTrainCriterionNodes(*sequenceNet); - if (origCriterionNodes.size() == 0 || sequenceCriterionNodes.size() == 0) - { - throw std::runtime_error("Training criterion node does not exist."); - } - replacedCriterionNodes.push_back(origCriterionNodes[0]); - origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]); - origNet.ResetEvalTimeStamp(); - } - - wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); - if (startEpoch >= 0) - fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); - else - fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str()); - ComputationNetwork *net = (startEpoch < 0) ? &origNet : netBuilder->LoadNetworkFromFile(modelFileName); - - startEpoch = max(startEpoch, 0); - - TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader); - - // Handles deletions carefully here. - if (startEpoch < 0) - { - for (size_t i = 0; i < addedFeatureNodes.size(); ++i) - origNet.RemoveFeatureNode(addedFeatureNodes[i]); - auto & origCriterionNodes = GetTrainCriterionNodes(origNet); - origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]); - } - } - - static double MomentumPerMB(double momentumPerSample, size_t minibatchSize) - { - return pow(momentumPerSample, minibatchSize); - } - - template - void SGD::Train(IComputationNetBuilder* netBuilder, - IDataReader* trainSetDataReader, - IDataReader* validationSetDataReader, - const bool makeMode) - { - if (netBuilder == nullptr || trainSetDataReader == nullptr) - InvalidArgument("netBuilder and trainSetDataReader should not be null.\n"); - int startEpoch = DetermineStartEpoch(makeMode); - if (startEpoch == m_maxEpochs) - { - fprintf(stderr, "Final model exists. No further training is necessary.\n"); - return; - } - - wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); - if (startEpoch >= 0) - fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); - - ComputationNetwork* net = startEpoch < 0 ? netBuilder->BuildNetworkFromDescription() : - netBuilder->LoadNetworkFromFile(modelFileName); - // TODO: BUGBUG: if not starting from checkpoint, need to synchronize initial model - // strategy should be to run the initializer above on mpiRank==0, and then broadcast parameters. - - /* if (m_doUnitTest) - { - if (net.UnitTest() == false) - LogicError("unit test on decoder network not passed"); - - return; - }*/ - - startEpoch = max(startEpoch, 0); - m_needAdaptRegularization = false; - - TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader); - } - -// protected: - - // Get{Train,Eval}CriterionNodes() return a reference that is, unfortunately, dependent on the network. - // So we hold those inside here. Not very nice. Also not thread-safe. This may go away once we fix sequence-to-sequence models properly. - static map> tmpCriterionNodeSets; - // TODO: test this, then remove this comment - - template - std::vector & SGD::GetTrainCriterionNodes(ComputationNetwork& net) - { - fprintf(stderr, "GetTrainCriterionNodes %ls ...\n", m_trainCriterionNodeName.c_str()); - if (!m_trainCriterionNodeName.empty()) - { - tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_trainCriterionNodeName); - return tmpCriterionNodeSets[&net]; - } - else - return net.FinalCriterionNodes(); - } - - template - std::vector & SGD::GetEvalCriterionNodes(ComputationNetwork& net) - { - fprintf(stderr, "GetEvalCriterionNodes %ls ...\n", m_evalCriterionNodeName.c_str()); - if (!m_evalCriterionNodeName.empty()) - { - tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_evalCriterionNodeName); - return tmpCriterionNodeSets[&net]; - } - else - return net.EvaluationNodes(); - } - - template - void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetwork& net, - ComputationNetwork& refNet, - ComputationNodeBasePtr refNode, - IDataReader* trainSetDataReader, - IDataReader* validationSetDataReader) - { - auto & featureNodes = net.FeatureNodes(); - auto & labelNodes = net.LabelNodes(); - auto & criterionNodes = GetTrainCriterionNodes(net); - auto & evaluationNodes = GetEvalCriterionNodes(net); - - std::map*>* inputMatrices = new std::map*>(); - for (size_t i = 0; i < featureNodes.size(); i++) - { - // TODO: instead, remember the nodes directly, to be able to handle both float and double nodes; current version will crash for mixed networks - (*inputMatrices)[featureNodes[i]->NodeName()] = &dynamic_pointer_cast>(featureNodes[i])->FunctionValues(); - } - - for (size_t i = 0; i < labelNodes.size(); i++) - { - (*inputMatrices)[labelNodes[i]->NodeName()] = &dynamic_pointer_cast>(labelNodes[i])->FunctionValues(); - } - - // used for KLD regularized adaptation. For all other adaptation techniques - // use MEL to edit the model and using normal training algorithm - std::vector refFeatureNodes; - if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) - { - refFeatureNodes.resize(featureNodes.size()); - for (size_t i = 0; i < featureNodes.size(); i++) - { - //we need to keep this info to handle deletion - refFeatureNodes[i] = refNet.GetNodeFromName(featureNodes[i]->NodeName()); - refNet.ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]); - } - - refNet.RebuildNetwork(refNode); - } - - //initializing weights and gradient holder - //only one criterion so far TODO: support multiple ones? - auto & learnableNodes = net.LearnableNodes(criterionNodes[0]); - std::list> smoothedGradients; - - for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) - { - ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); - smoothedGradients.push_back(Matrix(node->FunctionValues().GetNumRows(), - node->FunctionValues().GetNumCols(), - net.GetDeviceId())); - } - - double epochCriterion, avgCriterion, prevCriterion, lrControlCriterion; - lrControlCriterion = epochCriterion = avgCriterion = prevCriterion = std::numeric_limits::infinity(); - size_t epochsNotCountedInAvgCriterion = startEpoch % m_learnRateAdjustInterval; - - std::vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); - - std::vector evalNodeNames; - for (size_t i = 0; i < evaluationNodes.size(); i++) - evalNodeNames.push_back(evaluationNodes[i]->NodeName()); - - size_t totalSamplesSeen = 0; - double learnRatePerSample = 0.5f / m_mbSize[startEpoch]; - - double learningRateAdjustmentFactor = 1.0f; - vector prevLearnRates; - prevLearnRates.resize(m_numPrevLearnRates); - for (int i = 0; i < m_numPrevLearnRates; i++) - prevLearnRates[i] = -1.0; - - //precompute mean and invStdDev nodes and save initial model - if (PreCompute(net, trainSetDataReader, featureNodes, labelNodes, inputMatrices) || startEpoch == 0) - { - // Synchronize all ranks before writing the model to ensure that - // everyone is done loading the model - if (g_mpi != nullptr) - g_mpi->WaitAll(); - - net.SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1)); - } - - // first, we need to normalize the effect of nbruttsineachrecurrentiter - if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeLRByParallUtterance) - { - for (auto& x : m_learningRatesPerSample) - x /= (float)trainSetDataReader->GetNumParallelSequences(); - } - - // first, we need to normalize the effect of nbruttsineachrecurrentiter for momemtum - if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) - { - for (auto& x : m_momentumPerSample) - x = (float)pow(x, 1.0 / trainSetDataReader->GetNumParallelSequences()); - } - - bool learnRateInitialized = false; - if (startEpoch > 0) - { - learnRateInitialized = LoadCheckPointInfo(startEpoch - 1, - /*out*/ totalSamplesSeen, - /*out*/ learnRatePerSample, - smoothedGradients, - /*out*/ prevCriterion, - /*out*/ m_prevChosenMinibatchSize); - if (learnRateInitialized) - prevLearnRates[startEpoch % m_numPrevLearnRates] = learnRatePerSample; - } - - if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch && - !learnRateInitialized && m_learningRatesPerSample.size() <= startEpoch) - { - InvalidArgument( - "When using \"AdjustAfterEpoch\", there must either exist a checkpoint file, " - "or an explicit learning rate must be specified in config for the starting epoch."); - } - - unsigned long dropOutSeed = 1; - double prevDropoutRate = 0; - - bool learnRateReduced = false; - - ComputationNetwork::SetMaxTempMemSizeForCNN(net, criterionNodes[0], m_maxTempMemSizeInSamplesForCNN); - if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) - ComputationNetwork::SetMaxTempMemSizeForCNN(refNet, refNode, m_maxTempMemSizeInSamplesForCNN); - - // --- MAIN EPOCH LOOP - - for (int i = startEpoch; i < (int)m_maxEpochs; i++) - { - // Synchronize all ranks before proceeding to ensure that - // rank 0 has finished writing the previous model file - if (g_mpi != nullptr) - g_mpi->WaitAll(); - - Timer timer; - timer.Start(); - - // set dropout rate - ComputationNetwork::SetDropoutRate(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropOutSeed); - - // learning rate adjustment - if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None || - (m_learningRatesPerSample.size() > 0 && m_learningRatesPerSample.size() > i)) - { - learnRatePerSample = m_learningRatesPerSample[i]; - } - else if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch) - { - double largestPrevLearnRatePerSample = prevLearnRates[0]; - for (int j = 1; j < m_numPrevLearnRates; j++) - largestPrevLearnRatePerSample = max(largestPrevLearnRatePerSample, prevLearnRates[j]); - - // return a reasonable learning rate based on the initial minibatchSize - double newLearningRatePerSample = SearchForBestLearnRate(net, refNet, refNode, i, learnRatePerSample, - trainSetDataReader, featureNodes, labelNodes, - criterionNodes, evaluationNodes, inputMatrices, - learnableNodes, smoothedGradients, - learnRateInitialized, largestPrevLearnRatePerSample); - learningRateAdjustmentFactor = newLearningRatePerSample / learnRatePerSample; - learnRatePerSample = newLearningRatePerSample; - - // save per sample learn rate to support changeable minibatchSize - prevLearnRates[i % m_numPrevLearnRates] = learnRatePerSample; - } - - learnRateInitialized = true; - - if (learnRatePerSample < m_minLearnRate) - { - fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training stops.\n", - i + 1, learnRatePerSample, m_minLearnRate); - if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None) - net.SaveToFile(m_modelPath); - break; - } - - size_t chosenMinibatchSize; - size_t actualMinibatchSize; - - // Through the command line or config file the user can set minibatch sizes on a per epoch - // basis for a set number of epochs. For epochs after that point, m_mbSize.size(), either - // we just keep using - // the last minibatch size, or we use tuning to try and find a better one. - if (m_autoAdjustMinibatch && i >= m_mbSize.size()) - { - size_t numFramesToUseInSearch = m_numMiniBatch4LRSearch[i] * m_mbSize[i]; - if (m_epochSize != requestDataSize) - { - // ensure the numFramesToUseInSearch does not exceed the total number of frames in the epoch - numFramesToUseInSearch = min(numFramesToUseInSearch, m_epochSize); - } - - // Use tuning to try and find a better minibatch size - chosenMinibatchSize = AdaptiveMinibatchSizing(net, refNet, refNode, i, - numFramesToUseInSearch, - trainSetDataReader, learnRatePerSample, - m_mbSize[i], featureNodes, labelNodes, - criterionNodes, evaluationNodes, - inputMatrices, learnableNodes, - smoothedGradients, learningRateAdjustmentFactor); - m_prevChosenMinibatchSize = chosenMinibatchSize; - } - else - { - // use the explicitly set minibatch size - chosenMinibatchSize = m_mbSize[i]; - } - - actualMinibatchSize = chosenMinibatchSize; - if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) - actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->GetNumParallelSequences(); - - fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n", - i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize)); - - TrainOneEpoch(net, - refNet, - refNode, - i, - m_epochSize, - trainSetDataReader, - learnRatePerSample, - chosenMinibatchSize, - featureNodes, - labelNodes, - criterionNodes, - evaluationNodes, - inputMatrices, - learnableNodes, smoothedGradients, - epochCriterion, epochEvalErrors, totalSamplesSeen); - - timer.Stop(); - double epochTime = timer.ElapsedSeconds(); - - if (m_useEvalCriterionControlLR) - lrControlCriterion = epochEvalErrors[0]; - else - lrControlCriterion = epochCriterion; - - fprintf(stderr, - "Finished Epoch[%d]: [Training Set] TrainLossPerSample = %.8g; ", - i + 1, epochCriterion); - if (epochEvalErrors.size() == 1) - { - fprintf(stderr, - "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g; EpochTime=%.8g\n", - epochEvalErrors[0], learnRatePerSample, epochTime); - } - else - { - fprintf(stderr, "EvalErrPerSample "); - for (size_t j = 0; j < epochEvalErrors.size(); j++) - fprintf(stderr, "[%lu]=%.8g; ", j, epochEvalErrors[j]); - - fprintf(stderr, "Ave LearnRatePerSample = %.10g; Epoch Time=%.8g\n", - learnRatePerSample, epochTime); - - fprintf(stderr, "Finished Epoch[%d]: Criterion Node [%ls] Per Sample = %.8g\n", - i + 1, criterionNodes[0]->NodeName().c_str(), epochCriterion); - - for (size_t j = 0; j < epochEvalErrors.size(); j++) - { - fprintf(stderr, "Finished Epoch[%d]: Evaluation Node [%ls] Per Sample = %.8g\n", - i + 1, evalNodeNames[j].c_str(), epochEvalErrors[j]); - } - } - - if ((g_mpi == nullptr) || g_mpi->IsMainNode()) - { - if (validationSetDataReader != trainSetDataReader && validationSetDataReader != nullptr) - { - SimpleEvaluator evalforvalidation(net); - vector cvSetTrainAndEvalNodes; - cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName()); - cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName()); - - vector vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]); - fprintf(stderr, "Finished Epoch[%d]: [Validation Set] TrainLossPerSample = %.8g; EvalErrPerSample = %.8g\n", - i + 1, vScore[0], vScore[1]); - - if (m_useCVSetControlLRIfCVExists) - { - if (m_useEvalCriterionControlLR) - lrControlCriterion = vScore[1]; - else - lrControlCriterion = vScore[0]; //the first one is the training criterion. - } - } - } - - // broadcast epochCriterion to make sure each processor will have the same learning rate schedule - if ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && (g_mpi->NumNodesInUse() > 1)) - g_mpi->Bcast(&epochCriterion, 1, g_mpi->MainNodeRank()); - - bool loadedPrevModel = false; - size_t epochsSinceLastLearnRateAdjust = i % m_learnRateAdjustInterval + 1; - if (avgCriterion == std::numeric_limits::infinity()) - { - avgCriterion = lrControlCriterion; - } - else - { - avgCriterion = ((epochsSinceLastLearnRateAdjust - 1 - epochsNotCountedInAvgCriterion) * - avgCriterion + lrControlCriterion) / - (epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion); - } - - if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch && - m_learningRatesPerSample.size() <= i && epochsSinceLastLearnRateAdjust == m_learnRateAdjustInterval) - { - if (std::isnan(avgCriterion) || (prevCriterion - avgCriterion < 0 && prevCriterion != std::numeric_limits::infinity())) - { - if (m_loadBestModel) - { - net.LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1), - m_validateAfterModelReloading); - net.ResetEvalTimeStamp(); - LoadCheckPointInfo(i - 1, - /*out*/ totalSamplesSeen, - /*out*/ learnRatePerSample, - smoothedGradients, - /*out*/ prevCriterion, - /*out*/ m_prevChosenMinibatchSize); - fprintf(stderr, "Loaded the previous model which has better training criterion.\n"); - loadedPrevModel = true; - } - } - - if (m_continueReduce) - { - if (std::isnan(avgCriterion) || - (prevCriterion - avgCriterion <= m_reduceLearnRateIfImproveLessThan * prevCriterion && - prevCriterion != std::numeric_limits::infinity())) - { - if (learnRateReduced == false) - learnRateReduced = true; - else - { - net.SaveToFile(GetModelNameForEpoch(i, true)); - - fprintf(stderr, "Finished training and saved final model\n\n"); - break; - } - } - - if (learnRateReduced) - { - learnRatePerSample *= m_learnRateDecreaseFactor; - fprintf(stderr, "learnRatePerSample reduced to %.8g\n", learnRatePerSample); - } - } - else - { - if (std::isnan(avgCriterion) || - (prevCriterion - avgCriterion <= m_reduceLearnRateIfImproveLessThan * prevCriterion && - prevCriterion != std::numeric_limits::infinity())) - { - - learnRatePerSample *= m_learnRateDecreaseFactor; - fprintf(stderr, "learnRatePerSample reduced to %.8g\n", learnRatePerSample); - } - else if (prevCriterion - avgCriterion > m_increaseLearnRateIfImproveMoreThan * prevCriterion && - prevCriterion != std::numeric_limits::infinity()) - { - learnRatePerSample *= m_learnRateIncreaseFactor; - fprintf(stderr, "learnRatePerSample increased to %.8g\n", learnRatePerSample); - } - } - } - else - { - if (std::isnan(avgCriterion)) - RuntimeError("The training criterion is not a number (NAN). Stop\n"); - } - - // not loading previous values then set them - if (!loadedPrevModel && epochsSinceLastLearnRateAdjust == m_learnRateAdjustInterval) - { - prevCriterion = avgCriterion; - epochsNotCountedInAvgCriterion = 0; - } - - // Synchronize all ranks before proceeding to ensure that - // nobody tries reading the checkpoint file at the same time - // as rank 0 deleting it below - if (g_mpi != nullptr) - g_mpi->WaitAll(); - - // persist model and check-point info - if ((g_mpi == nullptr) || g_mpi->IsMainNode()) - { - net.SaveToFile(GetModelNameForEpoch(i)); - SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize); - if (!m_keepCheckPointFiles) - { - // delete previous checkpoint file to save space - _wunlink(GetCheckPointFileNameForEpoch(i - 1).c_str()); - } - } - - if (learnRatePerSample < 1e-12) - { - fprintf(stderr, "learnRate per sample is reduced to %.8g which is below 1e-12. stop training.\n", - learnRatePerSample); - } - } - - // --- END OF MAIN EPOCH LOOP - - // since we linked feature nodes. we need to remove it from the deletion - if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) - { - for (size_t i = 0; i < refFeatureNodes.size(); i++) - { - // note we need to handle deletion carefully - refNet.ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]); - } - } - - delete inputMatrices; - } - -// protected: - - // return true if precomputation is executed. - template - bool SGD::PreCompute(ComputationNetwork& net, - IDataReader* trainSetDataReader, - std::vector & featureNodes, - std::vector & labelNodes, - std::map*>* inputMatrices) - { - std::list nodes = net.GetNodesRequiringPreComputation(); - - if (nodes.size() == 0) - { - fprintf(stderr, "No PreCompute nodes found, skipping PreCompute step\n"); - return false; - } - - fprintf(stderr, "Found %lu PreCompute nodes\n", nodes.size()); - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) - { - auto node = static_pointer_cast>(*nodeIter); - fprintf(stderr, "\tNodeName: %ls\n", (node->NodeName()).c_str()); - } - - //compute - //trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , requestDataSize); - // trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , m_epochSize); // only based on one epoch - // [1/12/2015 erw] to support large dataset, we usually partition whole dataset into several epoch's, - // so we need to use all the data to do precomputing - if (m_useAllDataForPreComputedNode) - { - // using all the data - trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0); - } - else - { - // using all the data - trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize); - } - - while (trainSetDataReader->GetMinibatch(*inputMatrices)) - { - ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - - net.SetActualMiniBatchSizeFromFeatures(); - trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); - net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); - - // TODO: Exactly this loop should be INSIDE ComputationNetwork--pass the nodes array instead! - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) - net.Evaluate(*nodeIter); - } - - // mark done - for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) - { - auto node = static_pointer_cast>(*nodeIter); - node->MarkComputed(true); - } - - return true; - } - - // return a reasonable initial learning rate based on the initial mbsize - template - double SGD::SearchForBestLearnRate(ComputationNetwork& net, - ComputationNetwork& refNet, - const ComputationNodeBasePtr refNode, const int epochNumber, - const double curLearnRate, - IDataReader* trainSetDataReader, - const std::vector & featureNodes, - const std::vector & labelNodes, - const std::vector & criterionNodes, - const std::vector & evaluationNodes, - std::map*>* inputMatrices, - const std::list & learnableNodes, - std::list>& smoothedGradients, - const bool learnRateInitialized, - const double largestPrevLearnRatePerSample) - { - double epochCriterion = std::numeric_limits::infinity(); - double prevCriterion = std::numeric_limits::infinity(); - vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); - - size_t totalSamplesSeen = 0; - double bestLearnRatePerSample = curLearnRate; - - size_t numFramesToUseInSearch = m_numMiniBatch4LRSearch[epochNumber] * m_mbSize[epochNumber]; - if (m_epochSize != requestDataSize) - { - // ensure the numFramesToUseInSearch does not exceed the total number of frames in the epoch - numFramesToUseInSearch = min(numFramesToUseInSearch, m_epochSize); - } - - double baseCriterion; - - double minLearnRate = m_minLearnRate * 0.3f; - double learnRatePerSample = 1.0f / 8.0f / 0.618f / sqrt((double)m_mbSize[epochNumber]); - - if (learnRateInitialized && largestPrevLearnRatePerSample > 0) - { - //largestPrevLearnRatePerSample is per sample, first 0.618f is for compensation, second one is for safety - learnRatePerSample = largestPrevLearnRatePerSample / 0.618f / 0.618f; - } - - int baseModelEpoch = epochNumber - 1; - net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading); - net.ResetEvalTimeStamp(); - - double learnRate = learnRatePerSample; - size_t dummyMinibatchSize = 0; - LoadCheckPointInfo(baseModelEpoch, - /*out*/ totalSamplesSeen, - /*out*/ learnRate, - smoothedGradients, - /*out*/ prevCriterion, - /*out*/ dummyMinibatchSize); - - // if model is not changed this is what we will get - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, - numFramesToUseInSearch, trainSetDataReader, 0, m_mbSize[epochNumber], - featureNodes, labelNodes, - criterionNodes, evaluationNodes, - inputMatrices, learnableNodes, - smoothedGradients, /*out*/ baseCriterion, - /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, - "BaseAdaptiveLearnRateSearch:"); - - if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch) - { - if (prevCriterion == std::numeric_limits::infinity()) - prevCriterion = baseCriterion; - - double ratio = 0.3; - - if (m_epochSize != requestDataSize) - ratio = pow(((double)numFramesToUseInSearch) / m_epochSize, 1.0f / 2); - - baseCriterion = max(ratio * prevCriterion + (1 - ratio) * baseCriterion, baseCriterion); - } - - do - { - learnRatePerSample *= 0.618; - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, - numFramesToUseInSearch, trainSetDataReader, - learnRatePerSample, m_mbSize[epochNumber], featureNodes, - labelNodes, criterionNodes, - evaluationNodes, inputMatrices, - learnableNodes, smoothedGradients, - /*out*/ epochCriterion, /*out*/ epochEvalErrors, - /*out*/ totalSamplesSeen, "AdaptiveLearnRateSearch:"); - - } while (std::isnan(epochCriterion) || (epochCriterion > baseCriterion && learnRatePerSample > minLearnRate)); - - bestLearnRatePerSample = learnRatePerSample; - - //grid search for the first m_numBestSearchEpoch epochs - if (epochNumber < m_numBestSearchEpoch) - { - double leftLearnRatePerSample = 0.01 / m_mbSize[epochNumber]; - double rightLearnRatePerSample = learnRatePerSample; - double leftCriterion, rightCriterion = epochCriterion; - - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, - numFramesToUseInSearch, trainSetDataReader, - leftLearnRatePerSample, m_mbSize[epochNumber], - featureNodes, labelNodes, - criterionNodes, evaluationNodes, - inputMatrices, learnableNodes, - smoothedGradients, /*out*/ leftCriterion, - /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, - "DetailBaseAdaptiveLearnRateSearch:"); - - while (rightLearnRatePerSample > leftLearnRatePerSample * 1.2) - { - if (rightCriterion > leftCriterion) - { - rightLearnRatePerSample *= 0.618; - - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, - epochNumber, numFramesToUseInSearch, - trainSetDataReader, - rightLearnRatePerSample, m_mbSize[epochNumber], - featureNodes, labelNodes, - criterionNodes, - evaluationNodes, - inputMatrices, - learnableNodes, - smoothedGradients, - /*out*/ rightCriterion, - /*out*/ epochEvalErrors, - /*out*/ totalSamplesSeen, - "DetailRightAdaptiveLearnRateSearch:"); - } - else - { - leftLearnRatePerSample /= 0.618; - - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, - epochNumber, numFramesToUseInSearch, - trainSetDataReader, - leftLearnRatePerSample, m_mbSize[epochNumber], - featureNodes, labelNodes, - criterionNodes, - evaluationNodes, - inputMatrices, - learnableNodes, - smoothedGradients, - /*out*/ leftCriterion, - /*out*/ epochEvalErrors, - /*out*/ totalSamplesSeen, - "DetailLeftAdaptiveLearnRateSearch:"); - } - } - - bestLearnRatePerSample = (leftCriterion < rightCriterion) ? leftLearnRatePerSample : - rightLearnRatePerSample; - } - - fprintf(stderr, "Best Learn Rate Per Sample for Epoch[%d] = %.10g baseCriterion=%.10g\n", - epochNumber + 1, bestLearnRatePerSample, baseCriterion); - - return bestLearnRatePerSample; - } - - template - void SGD::TrainOneMiniEpochAndReloadModel(ComputationNetwork& net, - ComputationNetwork& refNet, - const ComputationNodeBasePtr refNode, const int epochNumber, - const size_t epochSize, IDataReader* trainSetDataReader, - const double learnRatePerSample, - const size_t minibatchSize, - const std::vector & featureNodes, - const std::vector & labelNodes, - const std::vector & criterionNodes, - const std::vector & evaluationNodes, - std::map*>* inputMatrices, - const std::list & learnableNodes, - std::list>& smoothedGradients, - /*out*/ double& epochCriterion, - /*out*/ std::vector& epochEvalErrors, - /*out*/ size_t& totalSamplesSeen, - std::string prefixMsg) - { - TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize, - trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes, - labelNodes, criterionNodes, evaluationNodes, - inputMatrices, learnableNodes, smoothedGradients, - /*out*/ epochCriterion, /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, - prefixMsg); - - fprintf(stderr, "Finished Mini-Epoch For LearnRate Selection: TrainLossPerSample = %.8g;", epochCriterion); - - if (epochEvalErrors.size() == 1) - fprintf(stderr, "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g\n", epochEvalErrors[0], learnRatePerSample); - else - { - fprintf(stderr, "EvalErrPerSample "); - for (size_t i = 0; i < epochEvalErrors.size(); i++) - fprintf(stderr, "[%lu] = %.8g; ", i, epochEvalErrors[i]); - fprintf(stderr, "Ave LearnRatePerSample = %.10g\n", learnRatePerSample); - } - - int baseModelEpoch = epochNumber - 1; - net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading); - net.ResetEvalTimeStamp(); - - double dummyLearnRate; - double dummtPrevCriterion; - size_t dummyMinibatchSize = 0; - LoadCheckPointInfo(baseModelEpoch, - /*out*/ totalSamplesSeen, - /*out*/ dummyLearnRate, - smoothedGradients, - /*out*/ dummtPrevCriterion, - /*out*/ dummyMinibatchSize); - } - - template - size_t SGD::AdaptiveMinibatchSizing(ComputationNetwork& net, - ComputationNetwork& refNet, - const ComputationNodeBasePtr refNode, - const int epochNumber, - const size_t numFramesToUseInSearch, - IDataReader* trainSetDataReader, - const double learnRatePerSample, - const size_t initialMinibatchSize, - const std::vector & featureNodes, - const std::vector & labelNodes, - const std::vector & criterionNodes, - const std::vector & evaluationNodes, - std::map*>* inputMatrices, - const std::list & learnableNodes, - std::list>& smoothedGradients, - const double learningRateAdjustmentFactor) - { - size_t minMinibatchSize = initialMinibatchSize; - size_t chosenMinibatchSize = initialMinibatchSize; - - // do some pre-adjustment based on LR - // Basically we assume that the LR for epoch 1 is safe for mbsize. - // If LR control led to a smaller LR, then we can safely increase the lower bound of the MB size. - double learningRateChangeSoFar = m_learningRatesPerSample[epochNumber] / m_learningRatesPerSample[0]; - learningRateChangeSoFar *= learningRateAdjustmentFactor; - - // increasing by the full factor is found to be too aggressive; sqrt() seems more robust - learningRateChangeSoFar = sqrt(learningRateChangeSoFar); - - // LR was indeed reduced - if (learningRateChangeSoFar < 1.0f) - { - // we can safely increase MB size (note: this may be bigger than our max) - minMinibatchSize = (size_t)(minMinibatchSize / learningRateChangeSoFar); - } - - if (epochNumber < 2 && m_prevChosenMinibatchSize != 0) - { - // newly started training: any previous MB size stored in the model is to be ignored - fprintf(stderr, "before epoch .2, previous minibatchSize %zd is " - "considered invalid -> resetting\n", m_prevChosenMinibatchSize); - m_prevChosenMinibatchSize = 0; - } - - // check if we need to skip - if (m_prevChosenMinibatchSize != 0 && - (epochNumber + 1) > m_minibatchSizeTuningFrequency && - (epochNumber + 1) % m_minibatchSizeTuningFrequency != 0) - { - fprintf(stderr, "AdaptiveMinibatchSearch: Search for a better minibatchSize " - "in epoch %d skipped, keeping minibatchSize of %zd\n", - epochNumber + 1, m_prevChosenMinibatchSize); - chosenMinibatchSize = m_prevChosenMinibatchSize; - } - else - { - if (m_prevChosenMinibatchSize != 0) - { - // if m_prevChosenMinibatchSize (the chosen minibatch size for the previous epoch) div 2 - // is higher than initialMinibatchSize (the minibatch size we start with for this epoch), - // then start the search with m_prevChosenMinibatchSize/2 instead of initialMinibatchSize. - fprintf(stderr, "AdaptiveMinibatchSearch: Limiting minMinibatchSize to " - "largest of previous minibatchSize = (%d / 2) or %d\n", - (int) m_prevChosenMinibatchSize, (int) minMinibatchSize); - minMinibatchSize = max(minMinibatchSize, m_prevChosenMinibatchSize / 2); - } - - size_t maxMinibatchSize = m_minibatchSizeTuningMax; - - // only grow at most 2 x compared to previous step - if (m_prevChosenMinibatchSize != 0.0f) - { - assert(m_prevChosenMinibatchSize >= chosenMinibatchSize); - - fprintf(stderr, "AdaptiveMinibatchSearch: Limiting maxMinibatchSize to " - "previous minibatchSize %zd*2\n", m_prevChosenMinibatchSize); - maxMinibatchSize = min(maxMinibatchSize, m_prevChosenMinibatchSize * 2); - } - - chosenMinibatchSize = SearchForBestMinibatchSize(net, refNet, refNode, epochNumber, - numFramesToUseInSearch, trainSetDataReader, - learnRatePerSample, featureNodes, - labelNodes, criterionNodes, - evaluationNodes, inputMatrices, - learnableNodes, smoothedGradients, - minMinibatchSize, maxMinibatchSize); - } - - return chosenMinibatchSize; - } - - static size_t RoundToMultipleOf64(float val) - { - return 64 * (size_t)((val + 32) / 64); - } - - static size_t RoundToMultipleOf64(size_t val) - { - return 64 * ((val + 32) / 64); - } - - // uses a small percentage of training data of minibatch to - // speculatively train with various MB sizes; then picks the best - template - size_t SGD::SearchForBestMinibatchSize(ComputationNetwork& net, - ComputationNetwork& refNet, - const ComputationNodeBasePtr refNode, - const int epochNumber, - const size_t numFramesToUseInSearch, - IDataReader* trainSetDataReader, - const double learnRatePerSample, - const std::vector & featureNodes, - const std::vector & labelNodes, - const std::vector & criterionNodes, - const std::vector & evaluationNodes, - std::map*>* inputMatrices, - const std::list & learnableNodes, - std::list>& smoothedGradients, - const size_t minMinibatchSize, const size_t maxMinibatchSize) - { - // may happen for automatically reduced learning rates - if (minMinibatchSize > maxMinibatchSize) - { - return maxMinibatchSize; - } - - size_t trialMinibatchSize = 0; - bool isFirstIteration = true; - double baseCriterion = 0; - - // increase the minibatch size by a factor of sqrt(2) in each step. - const float minibatchSizeTuningFactor = sqrtf(2.0f); - - size_t lastTriedTrialMinibatchSize = 0; - double lastTriedTrialEpochCriterion = 0; - for (float trialMinibatchSizeFloat = (float)minMinibatchSize; - trialMinibatchSizeFloat <= maxMinibatchSize; - trialMinibatchSizeFloat *= minibatchSizeTuningFactor) - { - // round mbsize to something meaningful - trialMinibatchSize = RoundToMultipleOf64(trialMinibatchSizeFloat); - - fprintf(stderr, "\nAdaptiveMinibatchSearch: Evaluating trial minibatchSize=%zd out of range %zd..%zd ...\n\n", - trialMinibatchSize, RoundToMultipleOf64(minMinibatchSize), RoundToMultipleOf64(maxMinibatchSize)); - - size_t totalSamplesSeen; - std::vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); - double epochCriterion = std::numeric_limits::infinity(); - - // Train on a few minibatches and so we can observe the epochCriterion as we try increasing - // minibatches with iteration of this loop. - TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, - numFramesToUseInSearch, trainSetDataReader, - learnRatePerSample, trialMinibatchSize, featureNodes, - labelNodes, criterionNodes, - evaluationNodes, inputMatrices, - learnableNodes, smoothedGradients, - /*out*/ epochCriterion, /*out*/ epochEvalErrors, - /*out*/ totalSamplesSeen, - isFirstIteration ? "BaseAdaptiveMinibatchSearch:" : - "AdaptiveMinibatchSearch:"); - - if (isFirstIteration) - { - // for the first iteration of the loop only, set baseCriterion - // to the result we got from TrainOneMiniEpochAndReloadModel(). - baseCriterion = epochCriterion; - lastTriedTrialMinibatchSize = trialMinibatchSize; - lastTriedTrialEpochCriterion = baseCriterion; - isFirstIteration = false; - - fprintf(stderr, "AdaptiveMinibatchSearch: Computed BaseCriterion %.10g\n", baseCriterion); - } - else if (!std::isnan(epochCriterion) && - (epochCriterion > (baseCriterion * (1.0 + ( m_minibatchSearchCriterionErrorMargin / 100.0))))) - { - // As soon as we see the Criterion (a measure of error) start to get larger than the - // Criterion we started with, we stop. - // TODO: if this is too sensitive, we can add a margin on the bases of percentage of - // baseCriterion. - break; - } - else - { - lastTriedTrialMinibatchSize = trialMinibatchSize; - lastTriedTrialEpochCriterion = epochCriterion; - if (trialMinibatchSizeFloat * minibatchSizeTuningFactor <= maxMinibatchSize) - { - fprintf(stderr, "AdaptiveMinibatchSearch: Keep searching... " - "EpochCriterion = %.10g vs BaseCriterion = %.10g\n", - epochCriterion, baseCriterion); - } - } - } - fprintf(stderr, "AdaptiveMinibatchSearch: Search successful!!! Chose new minibatchSize of %d. " - "EpochCriterion = %.10g vs BaseCriterion = %.10g\n\n", - (int) lastTriedTrialMinibatchSize, lastTriedTrialEpochCriterion, baseCriterion); - - - return lastTriedTrialMinibatchSize; - } - - // Tries to compute derivatives for the whole utterances, which will be - // fed to the neural network as features. - template - void SGD::AttemptUtteranceDerivativeFeatures(ComputationNetwork& net, - IDataReader* trainSetDataReader, - const std::vector & featureNodes, - std::map*>* inputMatrices) - { - // Tries to read an utterance and run forward computation on the - // whole utterance. - assert(trainSetDataReader != NULL); - std::vector>> uttInfo; - auto pMBLayout = make_shared(); - while (trainSetDataReader->GetMinibatchCopy(uttInfo, *inputMatrices, pMBLayout)) - { - ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - - auto & outputNodes = net.OutputNodes(); - if (outputNodes.empty()) - LogicError("no output node was found."); - - net.SetActualMiniBatchSizeFromFeatures(); - trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); - net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); - net.Evaluate(outputNodes[0]); // Only evaluate the first output - trainSetDataReader->SetNetOutput(uttInfo, - dynamic_pointer_cast>(outputNodes[0])->FunctionValues(), - pMBLayout); - } - } - - static string GeneratePaddedFloatOrExpFormat(int padSize, int precision, double value) - { - char format[16]; - char buffer[512]; - - sprintf(format, "%%.%dg", precision); - sprintf(buffer, format, value); - - for (int i = 0; i < strlen(buffer); i++) - { - if (buffer[i] == 'e' || buffer[i] == 'E') - { - sprintf(format, "%%%d.%de", padSize, precision); - return format; - } - } - sprintf(format, "%%%d.%df", padSize, precision); - return format; - } - - template - size_t SGD::TrainOneEpoch(ComputationNetwork& net, - ComputationNetwork& refNet, - const ComputationNodeBasePtr refNode, - const int epochNumber, - const size_t epochSize, - IDataReader* trainSetDataReader, - const double learnRatePerSample, - size_t tunedMBSize, - const std::vector & featureNodes, - const std::vector & labelNodes, - const std::vector & criterionNodes, - const std::vector & evaluationNodes, - std::map*>* inputMatrices, - const std::list & learnableNodes, - std::list>& smoothedGradients, - /*out*/ double& epochCriterion, - /*out*/ std::vector& epochEvalErrors, - /*out*/ size_t& totalSamplesSeen, - std::string prefixMsg) - { - // Since we are getting timing resolution of under microsecond we use double precision - // to ensure that we have enough digits to represent small time measurements. - double totalTimeInMBs = 0; - double epochCriterionLastMBs = 0; - - int numSamplesLastMBs = 0; - std::vector epochEvalErrorsLastMBs(epochEvalErrors.size(), 0); - - // initialize statistics - size_t totalEpochSamples = 0; - - int numMBsRun = 0; - - size_t numEvalNodes = epochEvalErrors.size(); - - // NOTE: the following two local matrices are not used in distGradAgg path - // assume only one training criterion node for each epoch - - Matrix localEpochCriterion(1, 1, net.GetDeviceId()); - Matrix localEpochEvalErrors(1, numEvalNodes, net.GetDeviceId()); - - localEpochCriterion.SetValue(0); - localEpochEvalErrors.SetValue(0); - - bool useGradientAggregation = ((m_parallelizationMethod == ParallelizationMethod::DataParallelSGD) && - (epochNumber >= m_parallelizationStartEpochNum)); - bool useModelAveraging = ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && - (epochNumber >= m_parallelizationStartEpochNum)); - bool useParallelTrain = useGradientAggregation || useModelAveraging; - - // MA-related variables - size_t nSamplesSinceLastModelSync = 0; - size_t nSynced = 0; - float nSecondsOnMASync = 0; - float nSecondsSinceLastMAPerfReport = 0; - - if (useGradientAggregation) - { - epochCriterion = double(0.0); - epochEvalErrors.assign(numEvalNodes, double(0.0)); - } - - Profiler profiler(m_numMBsToCUDAProfile); - - // resetting this, so profiling is performed for one epoch only - m_numMBsToCUDAProfile = 0; - - bool useDistributedMBReading = useParallelTrain && - m_enableDistributedMBReading && - trainSetDataReader->SupportsDistributedMBRead(); - if (useDistributedMBReading) - { - trainSetDataReader->StartDistributedMinibatchLoop(tunedMBSize, epochNumber, g_mpi->CurrentNodeRank(), g_mpi->NumNodesInUse(), m_epochSize); - } - else - { - trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, m_epochSize); - } - - AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices); - - fprintf(stderr, "\nStarting minibatch loop"); - if (useGradientAggregation) - { - fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)", (int)g_mpi->CurrentNodeRank(), (int)g_mpi->NumNodesInUse(), (int)m_numGradientBits); - } - - if (useDistributedMBReading) - { - fprintf(stderr, ", Distributed reading is ENABLED"); - } - fprintf(stderr, ".\n"); - - Timer timer; - timer.Start(); - - // --- MAIN MINIBATCH LOOP - - for (;;) - { - bool wasDataRead = trainSetDataReader->GetMinibatch(*inputMatrices); - - if (useDistributedMBReading) - { - // In case of distributed reading, the current node needs to continue even with a minibatch size of 0 if any - // other node in the group has a non-zero size minibatch to process. This is needed to ensure that - // the gradient aggregation barriers do not get stuck and also to ensure that all nodes update their weights - // properly using the aggregate gradients from other nodes before moving on to the next epoch even though the current - // node itself may not have any gradient contribution. - std::array numNodesWithDataToProcess; - numNodesWithDataToProcess[0] = wasDataRead ? 1 : 0; - g_mpi->AllReduce(numNodesWithDataToProcess); - - if (numNodesWithDataToProcess[0] == 0) - { - break; - } - } - else if (!wasDataRead) - { - break; - } - - size_t actualMBSize = 0; - if (wasDataRead) - { - size_t nSlices = trainSetDataReader->GetNumParallelSequences(); - MBLayoutPtr pMBLayout; - if (!useDistributedMBReading && useParallelTrain) - { - // TODO: refactor this as a function - if (trainSetDataReader->RequireSentenceSeg()) - { - pMBLayout = make_shared(); // items get filled in - DecimateMinibatchWithSentences(*inputMatrices, - g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), - nSlices, pMBLayout, - trainSetDataReader); - } - else - { - DecimateMinibatch(*inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank()); - } - } - - actualMBSize = net.SetActualMiniBatchSizeFromFeatures(); - if (actualMBSize != 0) - { - if (!useDistributedMBReading && useParallelTrain && trainSetDataReader->RequireSentenceSeg()) - { - *net.GetMBLayoutPtr() = *pMBLayout; - // TODO: ^^ we should just pass pointers; this current code is semantically identical to before the change to MBLayout - net.VerifyActualNumParallelSequences(nSlices); - } - else - { - trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); - net.VerifyActualNumParallelSequences(nSlices); - } - - nSamplesSinceLastModelSync += actualMBSize; - - ComputationNetwork::UpdateEvalTimeStamps(featureNodes); - ComputationNetwork::UpdateEvalTimeStamps(labelNodes); - -#ifndef EVALDLL - if (m_doGradientCheck && GradientCheck(net, criterionNodes, learnableNodes, 0) == false) - LogicError("cannot pass gradient checker"); -#endif - // TODO: currently only support one node regularization - if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) - { -#if 1 - size_t actualMBSize2 = refNet.SetActualMiniBatchSizeFromFeatures(); - if (actualMBSize2 != actualMBSize) - LogicError("TrainOneEpoch: refNet has different MB size than main net??"); -#else - refNet.SetActualMiniBatchSize(actualMBSize); // TODO: SetActualMiniBatchSizeFromFeatures() should have the same result, no? -#endif - *refNet.GetMBLayoutPtr() = *net.GetMBLayoutPtr(); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently) - refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); - - refNet.Evaluate(refNode); - Matrix::ScaleAndAdd((ElemType)m_adaptationRegWeight, - dynamic_pointer_cast>(refNode)->FunctionValues(), - (ElemType)(1.0 - m_adaptationRegWeight), - dynamic_pointer_cast>(labelNodes[0])->FunctionValues()); - } - - //compute eval node first since when gradient is computed the forward function values - //may be changed and need to be recomputed when gradient and function value share the same matrix - for (size_t i = 0; i < numEvalNodes; i++) - { - net.Evaluate(evaluationNodes[i]); - } - - // only compute gradient when learning rate is large enough - if (learnRatePerSample > m_minLearnRate * 0.01) - { - // use only the first criterion. Is there any possibility to use more? - net.ComputeGradient(criterionNodes[0]); - } - else - { - // use only the first criterion. Is there any possibility to use more? - net.Evaluate(criterionNodes[0]); - } - } - } - - //for now since we share the same label masking flag we call this on the network. - //Later, when we apply different labels on different nodes - //we need to add code to call this function multiple times, one for each criteria node - size_t numSamplesWithLabel = net.GetNumSamplesWithLabel(actualMBSize); - - // Sum of actualMBSize across all nodes when using parallel training - size_t aggregateNumSamples = actualMBSize; - size_t aggregateNumSamplesWithLabel = numSamplesWithLabel; - - //distributed gradient aggregation - if (!useGradientAggregation) - { - if (actualMBSize != 0) - { - Matrix::AddElementToElement(dynamic_pointer_cast>(criterionNodes[0])->FunctionValues(), 0, 0, localEpochCriterion, 0, 0); - for (size_t i = 0; i < numEvalNodes; i++) - Matrix::AddElementToElement(dynamic_pointer_cast>(evaluationNodes[i])->FunctionValues(), 0, 0, localEpochEvalErrors, 0, i); - } - } - else - { - LazyInitDistGradAgg(learnableNodes, numEvalNodes, m_traceLevel); - - //prepare the header - m_gradHeader->numEvalNode = numEvalNodes; - m_gradHeader->numSamples = actualMBSize; - m_gradHeader->numSamplesWithLabel = numSamplesWithLabel; - m_gradHeader->criterion = wasDataRead ? criterionNodes[0]->Get00Element() : 0.0; - for (size_t i = 0; i < numEvalNodes; i++) - m_gradHeader->evalErrors[i] = wasDataRead ? evaluationNodes[i]->Get00Element() : 0.0; - - m_distGradAgg->AggregateGradients(m_gradHeader, epochNumber); - - aggregateNumSamples = m_gradHeader->numSamples; - aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel; - epochCriterion += m_gradHeader->criterion; - for (size_t i = 0; ievalErrors[i]; - } - - //update model parameters - if ((aggregateNumSamples > 0) && (learnRatePerSample > m_minLearnRate * 0.01)) - { - auto smoothedGradientIter = smoothedGradients.begin(); - for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++, smoothedGradientIter++) - { - ComputationNodeBasePtr node = *nodeIter; - Matrix& smoothedGradient = *smoothedGradientIter; - - UpdateWeights(node, smoothedGradient, learnRatePerSample, - m_momentumPerSample[epochNumber], aggregateNumSamples, - m_L2RegWeight, m_L1RegWeight, - m_needAveMultiplier); - } - } - - if (useModelAveraging && (g_mpi->NumNodesInUse() > 1)) - { - size_t processedSamples = 0; - float secondsSinceLastSyncFinished = 0; - float secondsSpentOnSync = 0; - if (ModelAveragingProcessing(nSamplesSinceLastModelSync, learnableNodes, processedSamples, - secondsSinceLastSyncFinished, secondsSpentOnSync)) - { - // if a sync happens, do some extra work - nSamplesSinceLastModelSync = 0; - nSynced++; - - nSecondsOnMASync += secondsSpentOnSync; - nSecondsSinceLastMAPerfReport += secondsSinceLastSyncFinished; - - if (m_iMASyncStatsTrace > 0) - { - if (nSynced % m_iMASyncStatsTrace == 0) - { - fprintf(stderr, "\t\t-----(model averaging stats) %d-th sync, %8.2f seconds since last report, %5.2f seconds on communication\n", - (int)nSynced, nSecondsSinceLastMAPerfReport, nSecondsOnMASync); - nSecondsOnMASync = 0; - nSecondsSinceLastMAPerfReport = 0; - } - } - } - aggregateNumSamplesWithLabel = processedSamples; - } - - timer.Stop(); - numMBsRun++; - if (m_traceLevel > 0) - { - totalTimeInMBs += timer.ElapsedSeconds(); - numSamplesLastMBs += useModelAveraging ? int(actualMBSize) : int(aggregateNumSamplesWithLabel); - - if (numMBsRun % m_numMBsToShowResult == 0) - { - // get the epoch Values updated - if (!useGradientAggregation) - { - timer.Restart(); - epochCriterion = localEpochCriterion.Get00Element(); - for (size_t i = 0; i < numEvalNodes; i++) - epochEvalErrors[i] = localEpochEvalErrors(0, i); - timer.Stop(); - - // Add the last trailing compute - totalTimeInMBs += timer.ElapsedSeconds(); - } - - double trainLossPerSample = (epochCriterion - epochCriterionLastMBs) / numSamplesLastMBs; - string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d of %d]: SamplesSeen = %d; TrainLossPerSample = " + - GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; "; - fprintf(stderr, formatString.c_str(), - prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1, - numMBsRun, epochSize / tunedMBSize, numSamplesLastMBs, trainLossPerSample); - - for (size_t i = 0; i < numEvalNodes; i++) - { - double evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs; - formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; "; - fprintf(stderr, formatString.c_str(), i, evalError); - } - - double totalTimePerSample = (1000.0 * totalTimeInMBs) / numSamplesLastMBs; - formatString = "TotalTime = " + GeneratePaddedFloatOrExpFormat(0, 5, totalTimeInMBs) + "s; TotalTimePerSample = " + - GeneratePaddedFloatOrExpFormat(0, 5, totalTimePerSample) + "ms; SamplesPerSecond = %d\n"; - fprintf(stderr, formatString.c_str(), - totalTimeInMBs, totalTimePerSample, - static_cast(numSamplesLastMBs / totalTimeInMBs)); - - fflush(stderr); - - // reset statistics - totalTimeInMBs = 0; - numSamplesLastMBs = 0; - - epochCriterionLastMBs = epochCriterion; - for (size_t i = 0; i < numEvalNodes; i++) - epochEvalErrorsLastMBs[i] = epochEvalErrors[i]; - - if (std::isnan(epochCriterion)) - RuntimeError("The training criterion is not a number (NAN). Stop\n"); - } - } - - timer.Restart(); - totalEpochSamples += aggregateNumSamplesWithLabel; - totalSamplesSeen += aggregateNumSamplesWithLabel; - - if (totalEpochSamples >= epochSize) - break; - - // call DataEnd function - // DataEnd does reader specific process if sentence ending is reached - trainSetDataReader->DataEnd(endDataSentence); - - // Tries to set up derivative features for the next utterance. - AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices); - - profiler.NextSample(); - } - - // --- END MAIN MINIBATCH LOOP - - if (useModelAveraging && (g_mpi->NumNodesInUse() > 1) ) - { - // may not be synced after epoch finished, so do the sync here - int residualSampels = (int)nSamplesSinceLastModelSync; - g_mpi->AllReduce(&residualSampels, 1); - totalSamplesSeen += residualSampels; - totalEpochSamples += residualSampels; - ModelAveragingSync(nSamplesSinceLastModelSync, learnableNodes); - nSynced++; - nSamplesSinceLastModelSync = 0; - } - - if (useGradientAggregation) - { - epochCriterion /= float(totalEpochSamples); - for (size_t i = 0; i< numEvalNodes; i++) - epochEvalErrors[i] /= totalEpochSamples; - } - else - { - localEpochCriterion /= float(totalEpochSamples); - localEpochEvalErrors /= float(totalEpochSamples); - - epochCriterion = localEpochCriterion.Get00Element(); - for (size_t i = 0; i < numEvalNodes; i++) - epochEvalErrors[i] = localEpochEvalErrors(0, i); - } - - - if (useModelAveraging && (g_mpi->NumNodesInUse() > 1)) - { - // merge epochCriterion and epochEvalErrors over nodes - g_mpi->AllReduce(&epochCriterion, 1); - g_mpi->AllReduce(epochEvalErrors); - } - return totalEpochSamples; - } - - template - void SGD::LazyInitDistGradAgg(const std::list& learnableNodes, int numEvalNodes, int traceLevel) - { - if (m_parallelizationMethod == ParallelizationMethod::DataParallelSGD) - { - if (m_distGradAgg == nullptr) - { - std::vector*> learnParamsGradients; - learnParamsGradients.reserve(learnableNodes.size()); - for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) - { - ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); - learnParamsGradients.push_back(&(node->GradientValues())); - } - - m_distGradAgg = new AllReduceDistGradAggregator(learnParamsGradients, numEvalNodes, m_numGradientBits, g_mpi, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, traceLevel); - } - - if (m_gradHeader == nullptr) - { - m_gradHeader = DistGradHeader::Create(numEvalNodes); - } - } - } - - template - bool SGD::ModelAveragingProcessing(size_t nSamplesSinceLastSync, const std::list& learnableNodes, size_t& nProcessedFrames, - float& SecondsSinceLastSyncFinished, float& SecondsSpentOnSync) - { - ////////////////////////////////////////////////////////////////////////// - // the current strategy is that after each minibatch, we will sync between processors - // to decide whether a sync need to be performed. This is definitely not optimal, - // which we will fix it later. - - // TODO: the way we handle timer is not very good - ////////////////////////////////////////////////////////////////////////// - static bool first = true ; - static Timer MAtimer; - if (first) - { - MAtimer.Start(); - first = false; - } - - char bNeedToSync = (char)0; // use char for bool - if (g_mpi->IsMainNode() && nSamplesSinceLastSync >= m_nFramesBetweenMASync) - { - // only the main node can decide whether a sync need to be performed - bNeedToSync = (char)1; - } - g_mpi->Bcast(&bNeedToSync, 1, g_mpi->MainNodeRank()); - if (bNeedToSync) - { - MAtimer.Stop(); - double elapsedsec = MAtimer.ElapsedSeconds(); - SecondsSinceLastSyncFinished = first ? 0 : (float) elapsedsec ; - MAtimer.Start(); - nProcessedFrames = ModelAveragingSync((int)nSamplesSinceLastSync, learnableNodes); - MAtimer.Stop(); - SecondsSpentOnSync = (float)MAtimer.ElapsedSeconds(); - - MAtimer.Start(); - } - else - { - nProcessedFrames = 0; - return false; - } - return true; - } - - template - size_t SGD::ModelAveragingSync(int nSamplesSinceLastSync, const std::list& learnableNodes) - { - if (g_mpi->NumNodesInUse() <= 1) - { - return nSamplesSinceLastSync; - } - - //======================================== - // Sec. 1 calculate factor - //======================================== - float factor = 0; - int nTotalSamples = nSamplesSinceLastSync; - g_mpi->AllReduce(&nTotalSamples, 1); - if (nTotalSamples <= 0) - { - // prepare for overflow - factor = 1.0f / g_mpi->NumNodesInUse(); - } - else - { - factor = (nSamplesSinceLastSync + 0.0f) / nTotalSamples; - } - - //======================================== - // Sec. 2 sync models based on factor - // Note: this is suboptimal at the moment: - // we do the averaging for each node in a sequence manner, i.e., - // (node1) GPU->CPU->MPI_AllReduce -> (node2)GPU->CPU->MPI_AllReduce - // we can improve it by using a pipeline - // (node1) GPU -> CPU -> MPI_AllReduce - // (node2) GPU -> CPU -> MPI_AllReduce - // (node3) GPU -> CPU -> MPI_AllReduce - //======================================== - for (auto iter = learnableNodes.begin(); iter != learnableNodes.end(); iter++) - { - ComputationNodeBasePtr pNode = *iter; - if (!pNode->NeedGradient()) - continue; - - Matrix& mat = dynamic_pointer_cast>(pNode)->FunctionValues(); - // 1. normalize the weight matrix - Matrix::Scale(factor, mat); - // 2. send weight matrix over MPI nodes; - ElemType* px = mat.CopyToArray(); - size_t nx = mat.GetNumElements(); - - // 3. inplace sum - g_mpi->AllReduce(px, nx); - mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), px); - // 4. clean up - delete []px; - } - - return nTotalSamples; - } - -// public: - // UpdateWeightsS - static version of UpdateWeights() - // not static since it wants to access protected methods on the SGD object - template - /*static*/ void SGD::UpdateWeightsS(const SGD* sgd, Matrix& functionValues, - Matrix& gradientValues, - Matrix& smoothedGradient, - const double learnRatePerSample, - const double momentumPerSample, - size_t actualMBSize, - const double L2RegWeight, - const double L1RegWeight, - const bool needAveMultiplier) - { - // we use simple linear (instead of log linear) scaling here - const double momentum = MomentumPerMB(momentumPerSample, actualMBSize); -#if DUMPOUTPUT - fprintf(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n", - learnRatePerSample, momentum, actualMBSize); - fprintf(stderr, "sgd->GradUpdateType()=%d, sgd->GradientUpdateNoiseStd()=%0.8f\n", - sgd->GradUpdateType(), sgd->GradientUpdateNoiseStd()); - gradientValues.Print("Gradient Input"); - smoothedGradient.Print("Smoothed Gradient Input"); -#endif - - // make actualMBSize is a valid value - assert(actualMBSize > 0); - - //clipping gradients to prevent outliers - sgd->ClipGradient(gradientValues, actualMBSize); - - GradientsUpdateType adpType = sgd->GradUpdateType(); - double noiseStd = sgd->GradientUpdateNoiseStd(); - Matrix sgdUpdateNoise((DEVICEID_TYPE)functionValues.GetDeviceId()); - if (noiseStd > 0) - { - // get the gradient structure since gradient is sparse - sgdUpdateNoise.SetValue(gradientValues); - - // reset its value to random - sgdUpdateNoise.SetGaussianRandomValue(0, (ElemType)noiseStd); - } - - // L2 regularizer - if (L2RegWeight > 0) - { - // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample - Matrix::ScaleAndAdd((ElemType)(L2RegWeight * actualMBSize), functionValues, gradientValues); - } - - if (adpType == GradientsUpdateType::None) - { - smoothedGradient.NormalGrad(gradientValues, functionValues, - (ElemType)learnRatePerSample, (ElemType)momentum); - } - else if (adpType == GradientsUpdateType::AdaGrad || - (adpType == GradientsUpdateType::RmsProp && gradientValues.GetMatrixType() == MatrixType::SPARSE)) - { - //rmsprop for sparse is not implemented yet, delegate it with adagrad - - double aveMultiplier = smoothedGradient.Adagrad(gradientValues, needAveMultiplier); - Matrix::ScaleAndAdd((ElemType)(-learnRatePerSample / aveMultiplier), gradientValues, functionValues); - } - else if (adpType == GradientsUpdateType::RmsProp) - { - double aveMultiplier = smoothedGradient.RmsProp(gradientValues, (ElemType)sgd->m_rpi.gamma, - (ElemType)sgd->m_rpi.inc, (ElemType)sgd->m_rpi.max, - (ElemType)sgd->m_rpi.dec, (ElemType)sgd->m_rpi.min, needAveMultiplier); - Matrix::ScaleAndAdd((ElemType)(-learnRatePerSample / aveMultiplier), gradientValues, functionValues); - } - - if (noiseStd > 0) - { - Matrix::ScaleAndAdd(1.0, sgdUpdateNoise, functionValues); - } - - // L1 regularizer with proximal gradient descent method - if (L1RegWeight > 0) - { - // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample - functionValues.InplaceSoftThreshold((ElemType)(learnRatePerSample * L1RegWeight * actualMBSize)); - } - -#if DUMPOUTPUT - functionValues.Print("Parameter Update"); -#endif - } - -// protected: - - // UpdateWeights - update the weights in - template - void SGD::UpdateWeights(const ComputationNodeBasePtr node, - Matrix& smoothedGradient, - const double learnRatePerSample, - const double momentumPerSample, - const size_t actualMBSize, - const double L2RegWeight, const double L1RegWeight, - const bool needAveMultiplier) const - { -#if DUMPOUTPUT - fprintf(stderr, "Update_%ls\n", node->NodeName().c_str()); -#endif - UpdateWeightsS(this, dynamic_pointer_cast>(node)->FunctionValues(), dynamic_pointer_cast>(node)->GradientValues(), - smoothedGradient, learnRatePerSample, momentumPerSample, - actualMBSize, L2RegWeight, L1RegWeight, - needAveMultiplier); - node->UpdateEvalTimeStamp(); - } - - template - void SGD::ClipGradient(Matrix& gradient, const size_t actualMBSize) const - { - if (m_clippingThresholdPerSample != std::numeric_limits::infinity()) - { - double maxGradientPerMB = m_clippingThresholdPerSample * actualMBSize; - if (m_gradientClippingWithTruncation) - gradient.InplaceTruncate((ElemType)(maxGradientPerMB)); - else - { - // norm2 normalized - double gradientNorm = gradient.FrobeniusNorm(); - if (gradientNorm > maxGradientPerMB) - { - double normFactor = maxGradientPerMB / gradientNorm; - gradient *= (ElemType)normFactor; - } - } - } - } - - template - void SGD::SaveCheckPointInfo(const size_t epoch, const size_t totalSamplesSeen, - const double learnRatePerSample, - const std::list>& smoothedGradients, - const double prevCriterion, - const size_t minibatchSize) - { - // In case of parallel training only the main node should we saving the checkpoint to prevent - // the parallel training nodes from colliding to write the same file - if ((g_mpi == nullptr) || g_mpi->IsMainNode()) - { - wstring checkPointFileName = GetCheckPointFileNameForEpoch(int(epoch)); - // Saving into temporary file and then renaming it to the checkPointFileName - // This is a standard trick to avoid havign corrupted checkpoints files if process dies during writing - wstring tempFileName = checkPointFileName + L".tmp"; - - { - File fstream(tempFileName, FileOptions::fileOptionsBinary | FileOptions::fileOptionsWrite); - fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BCKP"); - - fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BLearnRate"); - fstream << totalSamplesSeen << learnRatePerSample << prevCriterion; - fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ELearnRate"); - - fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BMinibatchSize"); - fstream << minibatchSize; - fstream.PutMarker(FileMarker::fileMarkerEndSection, L"EMinibatchSize"); - - fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BGradient"); - - for (auto smoothedGradientIter = smoothedGradients.begin(); smoothedGradientIter != smoothedGradients.end(); smoothedGradientIter++) - { - const Matrix& smoothedGradient = *smoothedGradientIter; - fstream << smoothedGradient; - } - - fstream.PutMarker(FileMarker::fileMarkerEndSection, L"EGradient"); - - fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ECKP"); - - // Ensuring that data is written - fstream.Flush(); - } - - renameOrDie(tempFileName, checkPointFileName); - } - } - - template - bool SGD::LoadCheckPointInfo(const size_t epochNumber, - /*out*/ size_t& totalSamplesSeen, - /*out*/ double& learnRatePerSample, - std::list>& smoothedGradients, - /*out*/ double& prevCriterion, - /*out*/ size_t& minibatchSize) - { - wstring checkPointFileName = GetCheckPointFileNameForEpoch(int(epochNumber)); - if (!fexists(checkPointFileName.c_str())) - { - fprintf(stderr, "Warning: checkpoint file is missing. learning parameters will be initialized from 0\n"); - return false; - } - - File fstream(checkPointFileName, - FileOptions::fileOptionsBinary | FileOptions::fileOptionsRead); - fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BCKP"); - - fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BLearnRate"); - fstream >> totalSamplesSeen >> learnRatePerSample >> prevCriterion; - fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ELearnRate"); - - if (fstream.TryGetMarker(FileMarker::fileMarkerBeginSection, L"BMinibatchSize")) - { - fstream >> minibatchSize; - fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EMinibatchSize"); - } - else - { - minibatchSize = m_mbSize[epochNumber]; - } - - fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BGradient"); - - for (auto smoothedGradientIter = smoothedGradients.begin(); smoothedGradientIter != smoothedGradients.end(); smoothedGradientIter++) - { - Matrix& smoothedGradient = *smoothedGradientIter; - fstream >> smoothedGradient; - } - fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EGradient"); - - fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ECKP"); - - return true; - } - - template - wstring SGD::GetCheckPointFileNameForEpoch(const int epoch) - { - return GetModelNameForEpoch(epoch) + L".ckp"; - } - - template - wstring SGD::GetModelNameForEpoch(const int epoch, bool bLastModel) - { - int epoch1Base = epoch + 1; - if (epoch1Base == m_maxEpochs || bLastModel) - { - return m_modelPath; - } - else - { - wstring w = msra::strfun::wstrprintf(L"%ls.%d", m_modelPath.c_str(), (int)epoch1Base); - return w; - } - - } - - // return -1 if nothing exists - template // TODO: needed? - int SGD::DetermineStartEpoch(const bool makeMode) - { - if (!makeMode) - { - // always start from scratch - return -1; - } - - int firstEpoch = -1; - - wstring curEpochFile = GetModelNameForEpoch(int(m_maxEpochs) - 1); - for (int e = int(m_maxEpochs) - 1; e >= -1; e--) - { - const wstring prevEpochFile = GetModelNameForEpoch(e - 1); - - if (msra::files::fuptodate(curEpochFile, prevEpochFile, false)) - { - firstEpoch = size_t(e) + 1; - break; - } - else - { - curEpochFile = prevEpochFile; - } - } - - return firstEpoch; - } - -#define EPSILON 1e-5 - - template - bool SGD::GradientCheck(ComputationNetwork& net, - const std::vector & criterionNodes, - const std::list & learnableNodes, - int npos) - { - vector errMsgs; - - // gradient checking - for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) - { - ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); - char wstrtmp[2048]; - - for (size_t itry = 0; itry < min((size_t)50, node->FunctionValues().GetNumElements()); itry++) - { - /// no support to sparse matrix yet - int irow = (int) fmod(rand(), node->FunctionValues().GetNumRows() - 1); - int icol = (int) fmod(rand(), node->FunctionValues().GetNumCols() - 1); - irow = max(0, irow); - icol = max(0, icol); - - fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str()); - - double eOrg = node->FunctionValues()(irow, icol); - node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); - - node->UpdateEvalTimeStamp(); - - // use only the first criterion. Is - net.ComputeGradient(criterionNodes[npos]); - - if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE) - { - break; - } - - //double mbEvalCri = - //criterionNode should be a scalar - // TODO: why is this value not used? - criterionNodes[npos]->Get00Element(); - double eGradErr = node->GradientValues()(irow, icol); - node->GradientValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); - - double ePos = eOrg + EPSILON; - double eNeg = eOrg - EPSILON; - - node->FunctionValues()(irow, icol) = (ElemType)ePos; - node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); - - node->UpdateEvalTimeStamp(); - net.Evaluate(criterionNodes[npos]); - //criterionNode should be a scalar - - double mbEvalCriPos = criterionNodes[npos]->Get00Element(); // TODO: make Get00Element() a function of ComputationNodeBase - - node->FunctionValues()(irow, icol) = (ElemType)eNeg; - node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); - - node->UpdateEvalTimeStamp(); - net.Evaluate(criterionNodes[npos]); - - // criterionNode should be a scalar - double mbEvalCriNeg = criterionNodes[npos]->Get00Element(); - - // back to its orginal parameter value - node->FunctionValues()(irow, icol) = (ElemType)eOrg; - node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); - - // check if they are consistent - double eGradNum = ((mbEvalCriPos - mbEvalCriNeg) / (ePos - eNeg)); - double threshold = pow(10.0, - max(0.0, - ceil(log10(min(fabs(eGradErr), - fabs(eGradNum))))) - (int)m_gradientCheckSigDigit); - double diff = fabs(eGradErr - eGradNum); - bool wrong = (std::isnan(diff) || diff > threshold); - if (wrong) - { - fprintf(stderr, "\nd%ls Numeric gradient = %e, Error BP gradient = %e\n", - node->NodeName().c_str(), eGradNum, eGradErr); - sprintf(wstrtmp, "\nd%ls Numeric gradient = %e, Error BP gradient = %e\n", - node->NodeName().c_str(), eGradNum, eGradErr); - errMsgs.push_back(wstrtmp); - } - } - } - - return errMsgs.size() == 0; - } - -template class SGD; -template class SGD; - -// TODO: does not build--but part is used directly from CNTK.cpp -//template class MultiNetworksSGD; -//template class MultiNetworksSGD; - -}}} +// SGD.cpp -- implements SGD with all bells and whistles, parallelization, randomizatiom, etc. + +#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings + +#include "Basics.h" +#include "SGD.h" +#include "AllReduceDistGradAggregator.h" + +#include + +namespace Microsoft { namespace MSR { namespace CNTK { + + using namespace std; + + template + void DecimateMinibatch(std::map*>& mb, int numProcessor, int myID) + { + int rank = myID; + int procs = numProcessor; + + size_t rv = 0; + if (procs > 1) + { + for (auto it = mb.begin(); it != mb.end(); ++it) + { + MSR::CNTK::Matrix &mat = *(it->second); + size_t nCols = mat.GetNumCols(); + size_t col_start = (nCols * rank) / procs; + size_t col_end = (nCols * (rank + 1)) / procs; + if (col_end > nCols) + { + // this shouldn't happen + col_end = nCols; + } + + if (col_end == col_start) + { + MSR::CNTK::Matrix tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE); + mat.SetValue(tmp); + } + else + { + MSR::CNTK::Matrix tmp = mat.ColumnSlice(col_start, col_end - col_start); + mat.SetValue(tmp); + } + + if (rv == 0) + { + rv = mat.GetNumCols(); + } + else + { + if (rv != mat.GetNumCols()) + { + throw std::logic_error("Uneven number of columns among inputs."); + } + } + } + } + } + + template + size_t DecimateMinibatchWithSentences(std::map*> &mb, /* (input) matrix to be decimated */ + int rank, int numprocs, /* (input) rank info */ + size_t& nSlices, /* (input/output): on input, # parallel sentence total , on output, # paralel sentence in this node */ + MBLayoutPtr pMBLayout, // gets filled in + IDataReader* trainDataReader) /* (input) to have access to reader */ + { + // For RNN, a input Matrix is organized in the following way: + // | x_t^1 x_t^2 ... x_t^N | .... | x_{t+T-1}^1 ... x_{t+T-1}^N | + // |<---- block 1 ---->| .... |<------ block T ----->| + // N is the nSlice (input) + // The decimation here is to split each block to individual GPUs + // So After decimation + // | x_t^{st} ... x_t^{en-1}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en-1} | + // Each block now has nSlice/nProcs + // + // Correspondingly, the SentenceBoundary and PackingFlags will be revised + trainDataReader->CopyMBLayoutTo(pMBLayout); // fill this + + size_t rv = 0; + size_t nOrigParallelUtts = nSlices; + static bool warned = false; + if (numprocs > 1) + { + // decide new parallel utterances + size_t sent_start = 0; + size_t sent_end = 0; + if (nOrigParallelUtts % numprocs != 0) + { + if (!warned) + { + /* give a warning of potential bandwidth wasting */ + fprintf(stderr, "WARNING: %d GPUs are used in model averaging, but the number of parallel utterances are %d, a potential training speed degradation.\n", + (int)g_mpi->NumNodesInUse(), (int)nOrigParallelUtts); + warned = true; + } + if (rank == numprocs - 1) + { + nSlices = nOrigParallelUtts - (nOrigParallelUtts / numprocs + 1) * (numprocs - 1); + sent_start = (nOrigParallelUtts / numprocs + 1) * (numprocs - 1); + sent_end = nOrigParallelUtts; + } + else + { + nSlices = nOrigParallelUtts / numprocs + 1; + sent_start = nSlices * rank; + sent_end = nSlices * (rank + 1); + if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts; + } + } + else + { + nSlices = nOrigParallelUtts / numprocs; + sent_start = rank*nSlices; + sent_end = (rank + 1)*nSlices; + if (sent_end > nOrigParallelUtts) sent_end = nOrigParallelUtts; + } + // decimate data + for (auto it = mb.begin(); it != mb.end(); ++it) + { + MSR::CNTK::Matrix &mat = *(it->second); + size_t nCols = mat.GetNumCols(); + + if (nCols % nOrigParallelUtts != 0) + { + // this should not happen for DNN, RNN with truncated BPTT, not sure about other special stuff ... + RuntimeError("ERROR: minibatch size %d, but with %d parallel utterances\n", nCols, nOrigParallelUtts); + } + size_t nBlocks = nCols / nOrigParallelUtts; + // for RNN, nBlocks is the size of truncated BPTT + if (sent_end == sent_start) + { + // should never happen, print debug info + RuntimeError("ERROR: in DecimateMinibatch, col_st=col_en=%d, nCol=%d, nBlock=%d, nParaUtts=%d, nGPU=%d\n", + (int)sent_start, (int)nCols, (int)nBlocks, (int)nOrigParallelUtts, (int)numprocs); + } + + MSR::CNTK::Matrix tmp(mat.GetNumRows(), nSlices*nBlocks, mat.GetPreferredDeviceId(), mat.GetMatrixType()); + + // do the column slice for each block + for (size_t iblock = 0; iblock < nBlocks; iblock++) + { + tmp.SetColumnSlice(mat.ColumnSlice(nOrigParallelUtts*iblock + sent_start, nSlices), + iblock*nSlices, nSlices); + } + mat.SetValue(tmp); + + // assert the cols are even among nodes + if (0 == rv) + { + rv = mat.GetNumCols(); + } + else + { + if (rv != mat.GetNumCols()) + throw std::logic_error("Uneven number of columns among inputs."); + } + } + // revise sentence boundary and packing flags + // TODO: get rid of this explicit matrix, this can be done directly with MBLayout types. + size_t nMBSize = pMBLayout->GetSize(); + Matrix newBoundary(CPUDEVICE); + newBoundary.Resize(nSlices, nMBSize); + newBoundary.AssignRowSliceValuesOf(pMBLayout->GetM(), sent_start, nSlices); + fill(pMBLayout->GetV().begin(), pMBLayout->GetV().end(), MinibatchPackingFlags::None); + for (size_t nt = 0; nt < nMBSize; nt++) + { + for (size_t ns = 0; ns < nSlices; ns++) + { + if (newBoundary(ns, nt) == ((int)MinibatchPackingFlags::SequenceStart)) + pMBLayout->GetV()[nt] |= MinibatchPackingFlags::SequenceStart; + if (newBoundary(ns, nt) == ((int)MinibatchPackingFlags::SequenceEnd)) + pMBLayout->GetV()[nt] |= MinibatchPackingFlags::SequenceEnd; + } + } + } + + return rv; + } + + static AdaptationRegType ParseAdaptationRegType(wstring s) + { + msra::strfun::tolower_ascii(s); + if (s == L"" || s == L"none") + return AdaptationRegType::None; + else if (s == L"kl" || s == L"klreg") + return AdaptationRegType::KL; + else + throw std::invalid_argument("ParseAdaptationRegType: Invalid Adaptation Regularization Type. Valid values are (None | KL)"); + } + + static GradientsUpdateType ParseGradUpdateType(wstring s) + { + msra::strfun::tolower_ascii(s); + if (s == L"" || s == L"none" || s == L"normal" || s == L"simple") + return GradientsUpdateType::None; + else if (s == L"adagrad") + return GradientsUpdateType::AdaGrad; + else if (s == L"rmsprop") + return GradientsUpdateType::RmsProp; + else if (s == L"fsadagrad") + return GradientsUpdateType::FSAdaGrad; + else + throw std::invalid_argument("ParseGradUpdateType: Invalid Gradient Updating Type. Valid values are (None | AdaGrad | RmsProp | FSAdaGrad )"); + } + + static ParallelizationMethod ParseParallelizationMethod(wstring s) + { + msra::strfun::tolower_ascii(s); + if ((s == L"") || (s == L"none")) + return ParallelizationMethod::None; + else if (s == L"dataparallelsgd") + return ParallelizationMethod::DataParallelSGD; + else if (s == L"modelaveragingsgd") + return ParallelizationMethod::ModelAveragingSGD; + else + throw std::invalid_argument("ParseParallelizationMethod: Invalid Parallelization Method. Valid values are (None | DataParallelSGD | ModelAveragingSGD)"); + } + + static LearningRateSearchAlgorithm ParseLearningRateSearchType(wstring s) + { + // TODO: why allow so many variants? + msra::strfun::tolower_ascii(s); + if (s == L"false" || s == L"none") + return LearningRateSearchAlgorithm::None; + else if (s == L"searchbeforeepoch" || s == L"beforeepoch" || s == L"before") + return LearningRateSearchAlgorithm::SearchBeforeEpoch; + else if (s == L"adjustafterepoch" || s == L"afterepoch" || s == L"after") + return LearningRateSearchAlgorithm::AdjustAfterEpoch; + else + throw std::invalid_argument("autoAdjustLR: Invalid learning rate search type. Valid values are (None | SearchBeforeEpoch | AdjustAfterEpoch)"); + } + +template + SGD::SGD(const ConfigParameters& configSGD) + { + ConfigArray learningRatesPerMBStr = configSGD("learningRatesPerMB", ""); + m_needToNormalizeLRByParallUtterance = false; + m_needToNormalizeMomentumByParallUtterance = false; + floatargvector learningRatesPerMB = learningRatesPerMBStr; + + ConfigArray learningRatesPerSampleStr = configSGD("learningRatesPerSample", ""); + floatargvector learningRatesPerSample = learningRatesPerSampleStr; + + std::string executionEngineValue = configSGD("executionEngine", "synchronous"); + + // AutoAdjust Parameters + ConfigParameters configAALR(configSGD("AutoAdjust", "")); + LearningRateSearchAlgorithm autoAdjustLRType = ParseLearningRateSearchType(configAALR("autoAdjustLR", "None")); + double reduceLearnRateIfImproveLessThan = configAALR("reduceLearnRateIfImproveLessThan", "0"); + bool continueReduce = (bool) configAALR("continueReduce", "false"); + size_t learnRateAdjustInterval = (size_t) configAALR("learnRateAdjustInterval", "1"); + double learnRateDecreaseFactor = configAALR("learnRateDecreaseFactor", "0.618"); + double increaseLearnRateIfImproveMoreThan = configAALR("increaseLearnRateIfImproveMoreThan", "1#INF"); + double learnRateIncreaseFactor = configAALR("learnRateIncreaseFactor", "1.382"); + + // AutoAdjust Auto Adjust Minibatch Parameters + bool autoAdjustMinibatch = (bool) configAALR("autoAdjustMinibatch", "false"); + size_t minibatchSizeTuningFrequency = configAALR("minibatchSizeTuningFrequency", "1"); + size_t minibatchSizeTuningMax = configAALR("minibatchSizeTuningMax", "1048576"); + size_t minibatchSearchCriterionErrorMargin = configAALR("minibatchSearchCriterionErrorMargin", "1"); + + // the number of minibatches used to search + // the learning rate. It’s typically set to 10-20% of + // the total minibatches in an epoch. + ConfigArray minibatch4LRSearch = configAALR("numMiniBatch4LRSearch", "500"); + intargvector numMiniBatch4LRSearch = minibatch4LRSearch; + + size_t numPrevLearnRates = configAALR("numPrevLearnRates", "5"); + size_t numBestSearchEpoch = configAALR("numBestSearchEpoch", "1"); + bool loadBestModel = configAALR("loadBestModel", "true"); + bool useCVSetControlLRIfCVExists = configAALR("UseCVSetControlLRIfCVExists", "true"); + bool useEvalCriterionControlLR = configAALR("UseEvalCriterionControlLR", "false"); + + + ConfigArray minibatchSize = configSGD("minibatchSize", "256"); + intargvector mbSize = minibatchSize; + + // the number of samples in each epoch (0 means, use all the samples in each epoch). + size_t epochSize = configSGD("epochSize", "0"); + + // the total number of epochs to run. + size_t maxEpochs = configSGD("maxEpochs"); + + ConfigArray momentumPerMBStr = configSGD("momentumPerMB", ""); + floatargvector momentumPerMB = momentumPerMBStr; + + ConfigArray momentumPerSampleStr = configSGD("momentumPerSample", ""); + floatargvector momentumPerSample = momentumPerSampleStr; + + wstring modelPath = configSGD("modelPath"); + wstring trainCriterionNodeName = configSGD("trainCriterionNodeName", ""); + wstring evalCriterionNodeName = configSGD("evalCriterionNodeName", ""); + + size_t maxTempMemSizeInSamplesForCNN = configSGD("maxTempMemSizeInSamplesForCNN", "0"); + + int traceLevel = configSGD("traceLevel", "0"); + size_t numMBsToShowResult = configSGD("numMBsToShowResult", "10"); + size_t numMBsToCUDAProfile = configSGD("numMBsToCUDAProfile", "0"); + + bool keepCheckPointFiles = configSGD("keepCheckPointFiles", "false"); + + bool gradientClippingWithTruncation = configSGD("gradientClippingWithTruncation", "true"); + double clippingThresholdPerSample = configSGD("clippingThresholdPerSample", "1#INF"); + + ConfigArray dropoutRatesStr = configSGD("dropoutRate", "0.0"); + floatargvector dropoutRates = dropoutRatesStr; + + GradientUpdateInfo gUpdateInfo; + GradientsUpdateType gradUpdateType = ParseGradUpdateType(configSGD("gradUpdateType", "None")); + double gaussianNoiseInjecStd = configSGD("gaussianNoiseInjectStd", "0"); + gUpdateInfo.mType = gradUpdateType; + gUpdateInfo.mGaussianNoiseInjectStd = (float) gaussianNoiseInjecStd; + + // extract RMSProp parameters from config, if they exist. Default to reasonable values. + RMSPropInfo rpi; + rpi.dec = (double) configSGD("rms_wgt_dec", "0.75"); + rpi.inc = (double) configSGD("rms_wgt_inc", "1.2"); + rpi.min = (double) configSGD("rms_wgt_min", "0.1"); + rpi.max = (double) configSGD("rms_wgt_max", "10.0"); + rpi.gamma = (double) configSGD("rms_gamma", "0.99"); + + bool needAveMultiplier = (bool) configSGD("normWithAveMultiplier", "true"); + double L2RegWeight = (double) configSGD("L2RegWeight", "0"); + double L1RegWeight = (double) configSGD("L1RegWeight", "0"); + + /// for backward support. future setup should use gradUpdateType=AdaGrad, instead of + /// useAdagrad=true + bool useAdagrad = configSGD("useAdagrad", "false"); + if (useAdagrad) + { + gradUpdateType = GradientsUpdateType::AdaGrad; + gUpdateInfo.mType = gradUpdateType; + } + + AdaptationRegType adaptationRegType = ParseAdaptationRegType(configSGD("adaptationRegType", "None")); + double adaptationRegWeight = configSGD("adaptationRegWeight", "0"); + + /// gradient check setup + bool doGradientCheck = configSGD("gradientcheck", "false"); + double gradientCheckSigDigit = configSGD("sigFigs", "6"); + + if (doGradientCheck && sizeof(ElemType) != sizeof(double)) + LogicError("Gradient check needs to use precision = double"); + m_doUnitTest = configSGD("unittest", "false"); + + bool validateAfterModelReloading = configSGD("validateAfterModelReloading", "true"); + + bool UsingAllDataForPreComputedNode = configSGD("UseAllDataForPreComputedNode", "true"); + + // Parallel training + m_parallelizationMethod = ParallelizationMethod::None; + m_distGradAgg = nullptr; + m_gradHeader = nullptr; + m_numGradientBits = 32; + m_zeroThresholdFor1Bit = true; + m_enableDistributedMBReading = false; + m_parallelizationStartEpochNum = 0; + m_nFramesBetweenMASync = 40000; // default 40k frames + + if ((g_mpi != nullptr) && configSGD.ExistsCurrent("ParallelTrain")) + { + ConfigParameters configParallelTrain(configSGD("ParallelTrain", "")); + m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain("parallelizationMethod", "None")); + m_parallelizationStartEpochNum = configParallelTrain("parallelizationStartEpoch", "1"); + m_parallelizationStartEpochNum -= 1; // Epoch numbers internally are 0 based + m_enableDistributedMBReading = configParallelTrain("distributedMBReading", "false"); + + if (configParallelTrain.ExistsCurrent("DataParallelSGD")) + { + ConfigParameters configDataParallelSGD(configParallelTrain("DataParallelSGD", "")); + const char* defaultGradientBitsStr = (sizeof(ElemType) == sizeof(float)) ? "32" : "64"; + m_numGradientBits = configDataParallelSGD("gradientBits", defaultGradientBitsStr); + m_zeroThresholdFor1Bit = configDataParallelSGD("useZeroThresholdFor1BitQuantization", "true"); + if ((m_numGradientBits < 1) || (m_numGradientBits > (8 * sizeof(ElemType)))) + { + throw std::invalid_argument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!"); + } + } + + if (configParallelTrain.ExistsCurrent("ModelAveragingSGD") ) + { + ConfigParameters configMASGD(configParallelTrain("ModelAveragingSGD", "")); + m_nFramesBetweenMASync = configMASGD("SyncFrequencyInFrames", "40000"); + m_iMASyncStatsTrace = configMASGD("MAPerfStats", "0"); + } + + } + + // TODO: the number of parameters of this function is waaay to little! + Init(learningRatesPerMB, + learningRatesPerSample, + mbSize, + epochSize, + maxEpochs, + modelPath, + momentumPerMB, + momentumPerSample, + gradientClippingWithTruncation, + clippingThresholdPerSample, + autoAdjustLRType, + increaseLearnRateIfImproveMoreThan, + learnRateIncreaseFactor, + reduceLearnRateIfImproveLessThan, + continueReduce, + learnRateDecreaseFactor, + dropoutRates, + loadBestModel, + numMiniBatch4LRSearch, + numPrevLearnRates, + numBestSearchEpoch, + traceLevel, + numMBsToShowResult, + numMBsToCUDAProfile, + maxTempMemSizeInSamplesForCNN, + gUpdateInfo, + keepCheckPointFiles, + adaptationRegType, + adaptationRegWeight, + trainCriterionNodeName, + evalCriterionNodeName, + doGradientCheck, + gradientCheckSigDigit, + validateAfterModelReloading, + rpi, + learnRateAdjustInterval, + UsingAllDataForPreComputedNode, + needAveMultiplier, + L2RegWeight, + L1RegWeight, + autoAdjustMinibatch, + minibatchSizeTuningFrequency, + minibatchSizeTuningMax, + useCVSetControlLRIfCVExists, + useEvalCriterionControlLR, + minibatchSearchCriterionErrorMargin); + } + + //autoLearnRateSearchType is applied only if the learning rate for the epoch is not specified in learningRatesPerMB and learningRatesPerSample + template + void SGD::Init(const floatargvector& learningRatesPerMB, + const floatargvector& learningRatesPerSample, + const intargvector& mbSize, + const size_t epochSize, + const size_t maxEpochs, + const wstring& modelPath, + const floatargvector& momentumPerMB, + const floatargvector& momentumPerSample, + const bool gradientClippingWithTruncation, + const double clippingThresholdPerSample, + const LearningRateSearchAlgorithm autoLearnRateSearchType, + const double increaseLearnRateIfImproveMoreThan, + const double learnRateIncreaseFactor, + const double reduceLearnRateIfImproveLessThan, + const bool continueReduce, + const double learnRateDecreaseFactor, + floatargvector dropoutRates, + const bool loadBestModel, + const intargvector& numMiniBatch4LRSearch, + const size_t numPrevLearnRates, + const size_t numBestSearchEpoch, + const int traceLevel, + const size_t numMBsToShowResult, + const size_t numMBsToCUDAProfile, + const size_t maxTempMemSizeInSamplesForCNN, + const GradientUpdateInfo gradUpdateType, + const bool keepCheckPointFiles, + const AdaptationRegType adaptationRegType, + const double adaptationRegWeight, + const wstring trainCriterionNodeName, + const wstring evalCriterionNodeName, + const bool doGradientCheck, + const double gradientCheckSigDigit, + const bool validateAfterModelReloading, + RMSPropInfo rpi, + size_t learnRateAdjustInterval, + const bool UsingAllDataForPreComputed, + const bool needAveMultiplier, + const double L2RegWeight, + const double L1RegWeight, + const bool autoAdjustMinibatch, + const size_t minibatchSizeTuningFrequency, + const size_t minibatchSizeTuningMax, + const bool useCVSetControlLRIfCVExists, + const bool useEvalCriterionControlLR, + const size_t minibatchSearchCriterionErrorMargin) + { + m_numPrevLearnRates = numPrevLearnRates; + m_prevChosenMinibatchSize = 0; + m_autoAdjustMinibatch = autoAdjustMinibatch; + m_minibatchSizeTuningMax = minibatchSizeTuningMax; + m_minibatchSizeTuningFrequency = minibatchSizeTuningFrequency; + m_minibatchSearchCriterionErrorMargin = minibatchSearchCriterionErrorMargin; + + m_mbSize = mbSize; + + // the number of samples in each epoch (0 means, use all the samples in each epoch). + m_epochSize = epochSize; + if (m_epochSize == 0) + { + m_epochSize = requestDataSize; + } + + // the total number of epochs to run. + m_maxEpochs = maxEpochs; + + m_gradientClippingWithTruncation = gradientClippingWithTruncation; + m_modelPath = modelPath; + m_autoLearnRateSearchType = autoLearnRateSearchType; + m_traceLevel = traceLevel; + m_loadBestModel = loadBestModel; + m_increaseLearnRateIfImproveMoreThan = increaseLearnRateIfImproveMoreThan; + m_learnRateIncreaseFactor = learnRateIncreaseFactor; + m_reduceLearnRateIfImproveLessThan = reduceLearnRateIfImproveLessThan; + m_continueReduce = continueReduce; + + //minimum interval is 1 epoch + m_learnRateAdjustInterval = max((size_t) 1, learnRateAdjustInterval); + + m_learnRateDecreaseFactor = learnRateDecreaseFactor; + m_clippingThresholdPerSample = abs(clippingThresholdPerSample); + m_numMiniBatch4LRSearch = numMiniBatch4LRSearch; + m_dropoutRates = dropoutRates; + m_numMBsToShowResult = int(numMBsToShowResult); + m_numMBsToCUDAProfile = int(numMBsToCUDAProfile); + m_numBestSearchEpoch = numBestSearchEpoch; + m_maxTempMemSizeInSamplesForCNN = maxTempMemSizeInSamplesForCNN; + m_gradType = gradUpdateType; + m_rpi = rpi; + m_keepCheckPointFiles = keepCheckPointFiles; + + m_adaptationRegType = adaptationRegType; + m_adaptationRegWeight = adaptationRegWeight; + + m_trainCriterionNodeName = trainCriterionNodeName; + m_evalCriterionNodeName = evalCriterionNodeName; + m_useAllDataForPreComputedNode = UsingAllDataForPreComputed; + + m_needAveMultiplier = needAveMultiplier; + m_L2RegWeight = L2RegWeight; + m_L1RegWeight = L1RegWeight; + + for (size_t i = 0; i < m_mbSize.size(); i++) + { + if (m_epochSize != requestDataSize && m_epochSize < m_mbSize[i]) + { + throw std::invalid_argument("epoch size must be larger than mbsize."); + } + } + + if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None && + (learningRatesPerSample.size() == 0 && learningRatesPerMB.size() == 0)) + { + throw std::invalid_argument("If autoLearnRateSearchType is false " + "you must specify the learningRatesPerSample " + "or learningRatesPerMB parameter."); + } + + if (learningRatesPerSample.size() > 0 && learningRatesPerMB.size() > 0) + { + throw std::invalid_argument("You specified both learningRatesPerSample " + "and learningRatesPerMB. Please comment " + "out one of them."); + } + else if (learningRatesPerSample.size() > 0) + { + m_learningRatesPerSample = learningRatesPerSample; + } + else if (learningRatesPerMB.size() > 0) + { + int LRSize = (int) max(learningRatesPerMB.size(), m_mbSize.size()); + m_learningRatesPerSample.resize(LRSize); + for (int i = 0; i < LRSize; i++) + { + m_learningRatesPerSample[i] = learningRatesPerMB[i] / m_mbSize[i]; + } + m_needToNormalizeLRByParallUtterance = true; + } + + if (momentumPerSample.size() > 0 && momentumPerMB.size() > 0) + { + throw std::invalid_argument("You specified both momentumPerSample " + "and momentumPerMB. Please comment " + "out one of them."); + } + else if (momentumPerSample.size() > 0) + { + m_momentumPerSample = momentumPerSample; + int momentumVectorSize = m_momentumPerSample.size(); + for (int i = 0; i < momentumVectorSize; i++) + { + if ((m_momentumPerSample[i] >= 1) || (m_momentumPerSample[i] < 0)) + { + throw std::invalid_argument("momentumPerSample must be in [0, 1)."); + } + } + } + else if (momentumPerMB.size() > 0) + { + int momentumVectorSize = (int)max(momentumPerMB.size(), m_mbSize.size()); + m_momentumPerSample.resize(momentumVectorSize); + for (int i = 0; i < momentumVectorSize; i++) + { + if ((momentumPerMB[i] >= 1) || (momentumPerMB[i] < 0)) + InvalidArgument("momentumPerMB must be in [0, 1)."); + m_momentumPerSample[i] = (float)pow(momentumPerMB[i], 1.0 / m_mbSize[i]); + } + + m_needToNormalizeMomentumByParallUtterance = true; + } + else + { + int momentumVectorSize = m_mbSize.size(); + m_momentumPerSample.resize(momentumVectorSize); + for (int i = 0; i < momentumVectorSize; i++) + m_momentumPerSample[i] = (float)pow(0.9f, 1.0 / m_mbSize[i]); + } + + if (m_learnRateDecreaseFactor > 1 || m_learnRateIncreaseFactor < 1) + InvalidArgument("learnRateIncreaseFactor must be >= 1 and learnRateDecreaseFactor must be <= 1."); + + for (size_t i = 0; i < m_dropoutRates.size(); i++) + if (m_dropoutRates[i] >= 1 || m_dropoutRates[i] < 0) + InvalidArgument("dropoutRate must be >= 0 and < 1."); + + if (m_adaptationRegWeight > 1 || m_adaptationRegWeight < 0) + InvalidArgument("adaptationRegWeight must be in [0 1]"); + + m_minLearnRate = 1e-9f; + + m_needAdaptRegularization = false; + + m_doGradientCheck = doGradientCheck; + m_gradientCheckSigDigit = gradientCheckSigDigit; + m_validateAfterModelReloading = validateAfterModelReloading; + + m_useCVSetControlLRIfCVExists = useCVSetControlLRIfCVExists; + m_useEvalCriterionControlLR = useEvalCriterionControlLR; + + msra::files::make_intermediate_dirs(m_modelPath); + } + + template + void SGD::Adapt(wstring origModelFileName, wstring refNodeName, + IDataReader* trainSetDataReader, + IDataReader* validationSetDataReader, + const DEVICEID_TYPE deviceID, const bool makeMode) + { + if (origModelFileName == L"" || trainSetDataReader == nullptr) + InvalidArgument("origModel and trainSetDataReader should not be null."); + + int startEpoch = DetermineStartEpoch(makeMode); + if (startEpoch == m_maxEpochs) + { + fprintf(stderr, "Final model exists. No further training is necessary.\n"); + return; + } + + ComputationNetwork net(deviceID); + if (startEpoch >= 0) + { + wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); + fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); + net.LoadFromFile(modelFileName); + } + else + { + fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str()); + net.LoadFromFile(origModelFileName); + } + + startEpoch = max(startEpoch, 0); + + ComputationNetwork refNet(deviceID); + m_needAdaptRegularization = m_adaptationRegType != AdaptationRegType::None && m_adaptationRegWeight > 0; + if (m_needAdaptRegularization) + { + fprintf(stderr, "Load reference Network From the original model file %ls.\n", origModelFileName.c_str()); + refNet.LoadFromFile(origModelFileName); + } + + ComputationNodeBasePtr refNode; + if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL) + { + fprintf(stderr, "Checking refNodeName %ls.\n", origModelFileName.c_str()); + if (refNodeName == L"") + InvalidArgument("refNodeName does not exist and is needed when adaptationRegType is KL."); + refNode = refNet.GetNodeFromName(refNodeName); + } + + TrainOrAdaptModel(startEpoch, net, refNet, refNode, trainSetDataReader, validationSetDataReader); + } + + template + void SGD::SequenceTrain(IComputationNetBuilder* netBuilder, wstring origModelFileName, + IDataReader* trainSetDataReader, IDataReader* validationSetDataReader, + const DEVICEID_TYPE deviceID, const bool makeMode) + { + if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr) + InvalidArgument("netBuilder, origModel and trainSetDataReader should not be null."); + + int startEpoch = DetermineStartEpoch(makeMode); + if (startEpoch == m_maxEpochs) + { + fprintf(stderr, "Final model exists. No further training is necessary.\n"); + return; + } + + // Initializes the model from original model. + ComputationNetwork origNet(deviceID); + ComputationNetwork* sequenceNet = + (startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : &origNet; + std::vector addedFeatureNodes; + std::vector replacedCriterionNodes; + if (startEpoch < 0) + { + // Loads models. + origNet.LoadFromFile(origModelFileName); + + // Processes feature nodes. + std::vector & sequenceFeatureNodes = sequenceNet->FeatureNodes(); + for (size_t i = 0; i < sequenceFeatureNodes.size(); ++i) + { + if (!origNet.NodeNameExist(sequenceFeatureNodes[i]->NodeName())) + { + addedFeatureNodes.push_back(sequenceFeatureNodes[i]); + origNet.AddFeatureNode(sequenceFeatureNodes[i]); + } + } + + // Processes criterion nodes. + auto & origCriterionNodes = GetTrainCriterionNodes(origNet); + auto & sequenceCriterionNodes = GetTrainCriterionNodes(*sequenceNet); + if (origCriterionNodes.size() == 0 || sequenceCriterionNodes.size() == 0) + { + throw std::runtime_error("Training criterion node does not exist."); + } + replacedCriterionNodes.push_back(origCriterionNodes[0]); + origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]); + origNet.ResetEvalTimeStamp(); + } + + wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); + if (startEpoch >= 0) + fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); + else + fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str()); + ComputationNetwork *net = (startEpoch < 0) ? &origNet : netBuilder->LoadNetworkFromFile(modelFileName); + + startEpoch = max(startEpoch, 0); + + TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader); + + // Handles deletions carefully here. + if (startEpoch < 0) + { + for (size_t i = 0; i < addedFeatureNodes.size(); ++i) + origNet.RemoveFeatureNode(addedFeatureNodes[i]); + auto & origCriterionNodes = GetTrainCriterionNodes(origNet); + origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]); + } + } + + static double MomentumPerMB(double momentumPerSample, size_t minibatchSize) + { + return pow(momentumPerSample, minibatchSize); + } + + template + void SGD::Train(IComputationNetBuilder* netBuilder, + IDataReader* trainSetDataReader, + IDataReader* validationSetDataReader, + const bool makeMode) + { + if (netBuilder == nullptr || trainSetDataReader == nullptr) + InvalidArgument("netBuilder and trainSetDataReader should not be null.\n"); + int startEpoch = DetermineStartEpoch(makeMode); + if (startEpoch == m_maxEpochs) + { + fprintf(stderr, "Final model exists. No further training is necessary.\n"); + return; + } + + wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1); + if (startEpoch >= 0) + fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str()); + + ComputationNetwork* net = startEpoch < 0 ? netBuilder->BuildNetworkFromDescription() : + netBuilder->LoadNetworkFromFile(modelFileName); + // TODO: BUGBUG: if not starting from checkpoint, need to synchronize initial model + // strategy should be to run the initializer above on mpiRank==0, and then broadcast parameters. + + /* if (m_doUnitTest) + { + if (net.UnitTest() == false) + LogicError("unit test on decoder network not passed"); + + return; + }*/ + + startEpoch = max(startEpoch, 0); + m_needAdaptRegularization = false; + + TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader); + } + +// protected: + + // Get{Train,Eval}CriterionNodes() return a reference that is, unfortunately, dependent on the network. + // So we hold those inside here. Not very nice. Also not thread-safe. This may go away once we fix sequence-to-sequence models properly. + static map> tmpCriterionNodeSets; + // TODO: test this, then remove this comment + + template + std::vector & SGD::GetTrainCriterionNodes(ComputationNetwork& net) + { + fprintf(stderr, "GetTrainCriterionNodes %ls ...\n", m_trainCriterionNodeName.c_str()); + if (!m_trainCriterionNodeName.empty()) + { + tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_trainCriterionNodeName); + return tmpCriterionNodeSets[&net]; + } + else + return net.FinalCriterionNodes(); + } + + template + std::vector & SGD::GetEvalCriterionNodes(ComputationNetwork& net) + { + fprintf(stderr, "GetEvalCriterionNodes %ls ...\n", m_evalCriterionNodeName.c_str()); + if (!m_evalCriterionNodeName.empty()) + { + tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_evalCriterionNodeName); + return tmpCriterionNodeSets[&net]; + } + else + return net.EvaluationNodes(); + } + + template + void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetwork& net, + ComputationNetwork& refNet, + ComputationNodeBasePtr refNode, + IDataReader* trainSetDataReader, + IDataReader* validationSetDataReader) + { + auto & featureNodes = net.FeatureNodes(); + auto & labelNodes = net.LabelNodes(); + auto & criterionNodes = GetTrainCriterionNodes(net); + auto & evaluationNodes = GetEvalCriterionNodes(net); + + std::map*>* inputMatrices = new std::map*>(); + for (size_t i = 0; i < featureNodes.size(); i++) + { + // TODO: instead, remember the nodes directly, to be able to handle both float and double nodes; current version will crash for mixed networks + (*inputMatrices)[featureNodes[i]->NodeName()] = &dynamic_pointer_cast>(featureNodes[i])->FunctionValues(); + } + + for (size_t i = 0; i < labelNodes.size(); i++) + { + (*inputMatrices)[labelNodes[i]->NodeName()] = &dynamic_pointer_cast>(labelNodes[i])->FunctionValues(); + } + + // used for KLD regularized adaptation. For all other adaptation techniques + // use MEL to edit the model and using normal training algorithm + std::vector refFeatureNodes; + if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) + { + refFeatureNodes.resize(featureNodes.size()); + for (size_t i = 0; i < featureNodes.size(); i++) + { + //we need to keep this info to handle deletion + refFeatureNodes[i] = refNet.GetNodeFromName(featureNodes[i]->NodeName()); + refNet.ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]); + } + + refNet.RebuildNetwork(refNode); + } + + //initializing weights and gradient holder + //only one criterion so far TODO: support multiple ones? + auto & learnableNodes = net.LearnableNodes(criterionNodes[0]); + std::list> smoothedGradients; + + for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) + { + ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); + smoothedGradients.push_back(Matrix(node->FunctionValues().GetNumRows(), + node->FunctionValues().GetNumCols(), + net.GetDeviceId())); + } + + double epochCriterion, avgCriterion, prevCriterion, lrControlCriterion; + lrControlCriterion = epochCriterion = avgCriterion = prevCriterion = std::numeric_limits::infinity(); + size_t epochsNotCountedInAvgCriterion = startEpoch % m_learnRateAdjustInterval; + + std::vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); + + std::vector evalNodeNames; + for (size_t i = 0; i < evaluationNodes.size(); i++) + evalNodeNames.push_back(evaluationNodes[i]->NodeName()); + + size_t totalSamplesSeen = 0; + double learnRatePerSample = 0.5f / m_mbSize[startEpoch]; + + double learningRateAdjustmentFactor = 1.0f; + vector prevLearnRates; + prevLearnRates.resize(m_numPrevLearnRates); + for (int i = 0; i < m_numPrevLearnRates; i++) + prevLearnRates[i] = -1.0; + + //precompute mean and invStdDev nodes and save initial model + if (PreCompute(net, trainSetDataReader, featureNodes, labelNodes, inputMatrices) || startEpoch == 0) + { + // Synchronize all ranks before writing the model to ensure that + // everyone is done loading the model + if (g_mpi != nullptr) + g_mpi->WaitAll(); + + net.SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1)); + } + + // first, we need to normalize the effect of nbruttsineachrecurrentiter + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeLRByParallUtterance) + { + for (auto& x : m_learningRatesPerSample) + x /= (float)trainSetDataReader->GetNumParallelSequences(); + } + + // first, we need to normalize the effect of nbruttsineachrecurrentiter for momemtum + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) + { + for (auto& x : m_momentumPerSample) + x = (float)pow(x, 1.0 / trainSetDataReader->GetNumParallelSequences()); + } + + bool learnRateInitialized = false; + if (startEpoch > 0) + { + learnRateInitialized = LoadCheckPointInfo(startEpoch - 1, + /*out*/ totalSamplesSeen, + /*out*/ learnRatePerSample, + smoothedGradients, + /*out*/ prevCriterion, + /*out*/ m_prevChosenMinibatchSize); + if (learnRateInitialized) + prevLearnRates[startEpoch % m_numPrevLearnRates] = learnRatePerSample; + } + + if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch && + !learnRateInitialized && m_learningRatesPerSample.size() <= startEpoch) + { + InvalidArgument( + "When using \"AdjustAfterEpoch\", there must either exist a checkpoint file, " + "or an explicit learning rate must be specified in config for the starting epoch."); + } + + unsigned long dropOutSeed = 1; + double prevDropoutRate = 0; + + bool learnRateReduced = false; + + ComputationNetwork::SetMaxTempMemSizeForCNN(net, criterionNodes[0], m_maxTempMemSizeInSamplesForCNN); + if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) + ComputationNetwork::SetMaxTempMemSizeForCNN(refNet, refNode, m_maxTempMemSizeInSamplesForCNN); + + // --- MAIN EPOCH LOOP + + for (int i = startEpoch; i < (int)m_maxEpochs; i++) + { + // Synchronize all ranks before proceeding to ensure that + // rank 0 has finished writing the previous model file + if (g_mpi != nullptr) + g_mpi->WaitAll(); + + Timer timer; + timer.Start(); + + // set dropout rate + ComputationNetwork::SetDropoutRate(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropOutSeed); + + // learning rate adjustment + if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None || + (m_learningRatesPerSample.size() > 0 && m_learningRatesPerSample.size() > i)) + { + learnRatePerSample = m_learningRatesPerSample[i]; + } + else if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch) + { + double largestPrevLearnRatePerSample = prevLearnRates[0]; + for (int j = 1; j < m_numPrevLearnRates; j++) + largestPrevLearnRatePerSample = max(largestPrevLearnRatePerSample, prevLearnRates[j]); + + // return a reasonable learning rate based on the initial minibatchSize + double newLearningRatePerSample = SearchForBestLearnRate(net, refNet, refNode, i, learnRatePerSample, + trainSetDataReader, featureNodes, labelNodes, + criterionNodes, evaluationNodes, inputMatrices, + learnableNodes, smoothedGradients, + learnRateInitialized, largestPrevLearnRatePerSample); + learningRateAdjustmentFactor = newLearningRatePerSample / learnRatePerSample; + learnRatePerSample = newLearningRatePerSample; + + // save per sample learn rate to support changeable minibatchSize + prevLearnRates[i % m_numPrevLearnRates] = learnRatePerSample; + } + + learnRateInitialized = true; + + if (learnRatePerSample < m_minLearnRate) + { + fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training stops.\n", + i + 1, learnRatePerSample, m_minLearnRate); + if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None) + net.SaveToFile(m_modelPath); + break; + } + + size_t chosenMinibatchSize; + size_t actualMinibatchSize; + + // Through the command line or config file the user can set minibatch sizes on a per epoch + // basis for a set number of epochs. For epochs after that point, m_mbSize.size(), either + // we just keep using + // the last minibatch size, or we use tuning to try and find a better one. + if (m_autoAdjustMinibatch && i >= m_mbSize.size()) + { + size_t numFramesToUseInSearch = m_numMiniBatch4LRSearch[i] * m_mbSize[i]; + if (m_epochSize != requestDataSize) + { + // ensure the numFramesToUseInSearch does not exceed the total number of frames in the epoch + numFramesToUseInSearch = min(numFramesToUseInSearch, m_epochSize); + } + + // Use tuning to try and find a better minibatch size + chosenMinibatchSize = AdaptiveMinibatchSizing(net, refNet, refNode, i, + numFramesToUseInSearch, + trainSetDataReader, learnRatePerSample, + m_mbSize[i], featureNodes, labelNodes, + criterionNodes, evaluationNodes, + inputMatrices, learnableNodes, + smoothedGradients, learningRateAdjustmentFactor); + m_prevChosenMinibatchSize = chosenMinibatchSize; + } + else + { + // use the explicitly set minibatch size + chosenMinibatchSize = m_mbSize[i]; + } + + actualMinibatchSize = chosenMinibatchSize; + if (trainSetDataReader->GetNumParallelSequences() > 1 && m_needToNormalizeMomentumByParallUtterance) + actualMinibatchSize = chosenMinibatchSize * trainSetDataReader->GetNumParallelSequences(); + + fprintf(stderr, "Starting Epoch %d: learning rate per sample = %f momentum = %f \n", + i + 1, learnRatePerSample, MomentumPerMB(m_momentumPerSample[i], actualMinibatchSize)); + + TrainOneEpoch(net, + refNet, + refNode, + i, + m_epochSize, + trainSetDataReader, + learnRatePerSample, + chosenMinibatchSize, + featureNodes, + labelNodes, + criterionNodes, + evaluationNodes, + inputMatrices, + learnableNodes, smoothedGradients, + epochCriterion, epochEvalErrors, totalSamplesSeen); + + timer.Stop(); + double epochTime = timer.ElapsedSeconds(); + + if (m_useEvalCriterionControlLR) + lrControlCriterion = epochEvalErrors[0]; + else + lrControlCriterion = epochCriterion; + + fprintf(stderr, + "Finished Epoch[%d]: [Training Set] TrainLossPerSample = %.8g; ", + i + 1, epochCriterion); + if (epochEvalErrors.size() == 1) + { + fprintf(stderr, + "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g; EpochTime=%.8g\n", + epochEvalErrors[0], learnRatePerSample, epochTime); + } + else + { + fprintf(stderr, "EvalErrPerSample "); + for (size_t j = 0; j < epochEvalErrors.size(); j++) + fprintf(stderr, "[%lu]=%.8g; ", j, epochEvalErrors[j]); + + fprintf(stderr, "Ave LearnRatePerSample = %.10g; Epoch Time=%.8g\n", + learnRatePerSample, epochTime); + + fprintf(stderr, "Finished Epoch[%d]: Criterion Node [%ls] Per Sample = %.8g\n", + i + 1, criterionNodes[0]->NodeName().c_str(), epochCriterion); + + for (size_t j = 0; j < epochEvalErrors.size(); j++) + { + fprintf(stderr, "Finished Epoch[%d]: Evaluation Node [%ls] Per Sample = %.8g\n", + i + 1, evalNodeNames[j].c_str(), epochEvalErrors[j]); + } + } + + if ((g_mpi == nullptr) || g_mpi->IsMainNode()) + { + if (validationSetDataReader != trainSetDataReader && validationSetDataReader != nullptr) + { + SimpleEvaluator evalforvalidation(net); + vector cvSetTrainAndEvalNodes; + cvSetTrainAndEvalNodes.push_back(criterionNodes[0]->NodeName()); + cvSetTrainAndEvalNodes.push_back(evaluationNodes[0]->NodeName()); + + vector vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]); + fprintf(stderr, "Finished Epoch[%d]: [Validation Set] TrainLossPerSample = %.8g; EvalErrPerSample = %.8g\n", + i + 1, vScore[0], vScore[1]); + + if (m_useCVSetControlLRIfCVExists) + { + if (m_useEvalCriterionControlLR) + lrControlCriterion = vScore[1]; + else + lrControlCriterion = vScore[0]; //the first one is the training criterion. + } + } + } + + // broadcast epochCriterion to make sure each processor will have the same learning rate schedule + if ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && (g_mpi->NumNodesInUse() > 1)) + g_mpi->Bcast(&epochCriterion, 1, g_mpi->MainNodeRank()); + + bool loadedPrevModel = false; + size_t epochsSinceLastLearnRateAdjust = i % m_learnRateAdjustInterval + 1; + if (avgCriterion == std::numeric_limits::infinity()) + { + avgCriterion = lrControlCriterion; + } + else + { + avgCriterion = ((epochsSinceLastLearnRateAdjust - 1 - epochsNotCountedInAvgCriterion) * + avgCriterion + lrControlCriterion) / + (epochsSinceLastLearnRateAdjust - epochsNotCountedInAvgCriterion); + } + + if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::AdjustAfterEpoch && + m_learningRatesPerSample.size() <= i && epochsSinceLastLearnRateAdjust == m_learnRateAdjustInterval) + { + if (std::isnan(avgCriterion) || (prevCriterion - avgCriterion < 0 && prevCriterion != std::numeric_limits::infinity())) + { + if (m_loadBestModel) + { + net.LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1), + m_validateAfterModelReloading); + net.ResetEvalTimeStamp(); + LoadCheckPointInfo(i - 1, + /*out*/ totalSamplesSeen, + /*out*/ learnRatePerSample, + smoothedGradients, + /*out*/ prevCriterion, + /*out*/ m_prevChosenMinibatchSize); + fprintf(stderr, "Loaded the previous model which has better training criterion.\n"); + loadedPrevModel = true; + } + } + + if (m_continueReduce) + { + if (std::isnan(avgCriterion) || + (prevCriterion - avgCriterion <= m_reduceLearnRateIfImproveLessThan * prevCriterion && + prevCriterion != std::numeric_limits::infinity())) + { + if (learnRateReduced == false) + learnRateReduced = true; + else + { + net.SaveToFile(GetModelNameForEpoch(i, true)); + + fprintf(stderr, "Finished training and saved final model\n\n"); + break; + } + } + + if (learnRateReduced) + { + learnRatePerSample *= m_learnRateDecreaseFactor; + fprintf(stderr, "learnRatePerSample reduced to %.8g\n", learnRatePerSample); + } + } + else + { + if (std::isnan(avgCriterion) || + (prevCriterion - avgCriterion <= m_reduceLearnRateIfImproveLessThan * prevCriterion && + prevCriterion != std::numeric_limits::infinity())) + { + + learnRatePerSample *= m_learnRateDecreaseFactor; + fprintf(stderr, "learnRatePerSample reduced to %.8g\n", learnRatePerSample); + } + else if (prevCriterion - avgCriterion > m_increaseLearnRateIfImproveMoreThan * prevCriterion && + prevCriterion != std::numeric_limits::infinity()) + { + learnRatePerSample *= m_learnRateIncreaseFactor; + fprintf(stderr, "learnRatePerSample increased to %.8g\n", learnRatePerSample); + } + } + } + else + { + if (std::isnan(avgCriterion)) + RuntimeError("The training criterion is not a number (NAN). Stop\n"); + } + + // not loading previous values then set them + if (!loadedPrevModel && epochsSinceLastLearnRateAdjust == m_learnRateAdjustInterval) + { + prevCriterion = avgCriterion; + epochsNotCountedInAvgCriterion = 0; + } + + // Synchronize all ranks before proceeding to ensure that + // nobody tries reading the checkpoint file at the same time + // as rank 0 deleting it below + if (g_mpi != nullptr) + g_mpi->WaitAll(); + + // persist model and check-point info + if ((g_mpi == nullptr) || g_mpi->IsMainNode()) + { + net.SaveToFile(GetModelNameForEpoch(i)); + SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize); + if (!m_keepCheckPointFiles) + { + // delete previous checkpoint file to save space + _wunlink(GetCheckPointFileNameForEpoch(i - 1).c_str()); + } + } + + if (learnRatePerSample < 1e-12) + { + fprintf(stderr, "learnRate per sample is reduced to %.8g which is below 1e-12. stop training.\n", + learnRatePerSample); + } + } + + // --- END OF MAIN EPOCH LOOP + + // since we linked feature nodes. we need to remove it from the deletion + if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) + { + for (size_t i = 0; i < refFeatureNodes.size(); i++) + { + // note we need to handle deletion carefully + refNet.ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]); + } + } + + delete inputMatrices; + } + +// protected: + + // return true if precomputation is executed. + template + bool SGD::PreCompute(ComputationNetwork& net, + IDataReader* trainSetDataReader, + std::vector & featureNodes, + std::vector & labelNodes, + std::map*>* inputMatrices) + { + std::list nodes = net.GetNodesRequiringPreComputation(); + + if (nodes.size() == 0) + { + fprintf(stderr, "No PreCompute nodes found, skipping PreCompute step\n"); + return false; + } + + fprintf(stderr, "Found %lu PreCompute nodes\n", nodes.size()); + for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) + { + auto node = static_pointer_cast>(*nodeIter); + fprintf(stderr, "\tNodeName: %ls\n", (node->NodeName()).c_str()); + } + + //compute + //trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , requestDataSize); + // trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0 , m_epochSize); // only based on one epoch + // [1/12/2015 erw] to support large dataset, we usually partition whole dataset into several epoch's, + // so we need to use all the data to do precomputing + if (m_useAllDataForPreComputedNode) + { + // using all the data + trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0); + } + else + { + // using all the data + trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize); + } + + while (trainSetDataReader->GetMinibatch(*inputMatrices)) + { + ComputationNetwork::UpdateEvalTimeStamps(featureNodes); + ComputationNetwork::UpdateEvalTimeStamps(labelNodes); + + net.SetActualMiniBatchSizeFromFeatures(); + trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); + + // TODO: Exactly this loop should be INSIDE ComputationNetwork--pass the nodes array instead! + for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) + net.Evaluate(*nodeIter); + } + + // mark done + for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++) + { + auto node = static_pointer_cast>(*nodeIter); + node->MarkComputed(true); + } + + return true; + } + + // return a reasonable initial learning rate based on the initial mbsize + template + double SGD::SearchForBestLearnRate(ComputationNetwork& net, + ComputationNetwork& refNet, + const ComputationNodeBasePtr refNode, const int epochNumber, + const double curLearnRate, + IDataReader* trainSetDataReader, + const std::vector & featureNodes, + const std::vector & labelNodes, + const std::vector & criterionNodes, + const std::vector & evaluationNodes, + std::map*>* inputMatrices, + const std::list & learnableNodes, + std::list>& smoothedGradients, + const bool learnRateInitialized, + const double largestPrevLearnRatePerSample) + { + double epochCriterion = std::numeric_limits::infinity(); + double prevCriterion = std::numeric_limits::infinity(); + vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); + + size_t totalSamplesSeen = 0; + double bestLearnRatePerSample = curLearnRate; + + size_t numFramesToUseInSearch = m_numMiniBatch4LRSearch[epochNumber] * m_mbSize[epochNumber]; + if (m_epochSize != requestDataSize) + { + // ensure the numFramesToUseInSearch does not exceed the total number of frames in the epoch + numFramesToUseInSearch = min(numFramesToUseInSearch, m_epochSize); + } + + double baseCriterion; + + double minLearnRate = m_minLearnRate * 0.3f; + double learnRatePerSample = 1.0f / 8.0f / 0.618f / sqrt((double)m_mbSize[epochNumber]); + + if (learnRateInitialized && largestPrevLearnRatePerSample > 0) + { + //largestPrevLearnRatePerSample is per sample, first 0.618f is for compensation, second one is for safety + learnRatePerSample = largestPrevLearnRatePerSample / 0.618f / 0.618f; + } + + int baseModelEpoch = epochNumber - 1; + net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading); + net.ResetEvalTimeStamp(); + + double learnRate = learnRatePerSample; + size_t dummyMinibatchSize = 0; + LoadCheckPointInfo(baseModelEpoch, + /*out*/ totalSamplesSeen, + /*out*/ learnRate, + smoothedGradients, + /*out*/ prevCriterion, + /*out*/ dummyMinibatchSize); + + // if model is not changed this is what we will get + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, + numFramesToUseInSearch, trainSetDataReader, 0, m_mbSize[epochNumber], + featureNodes, labelNodes, + criterionNodes, evaluationNodes, + inputMatrices, learnableNodes, + smoothedGradients, /*out*/ baseCriterion, + /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, + "BaseAdaptiveLearnRateSearch:"); + + if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch) + { + if (prevCriterion == std::numeric_limits::infinity()) + prevCriterion = baseCriterion; + + double ratio = 0.3; + + if (m_epochSize != requestDataSize) + ratio = pow(((double)numFramesToUseInSearch) / m_epochSize, 1.0f / 2); + + baseCriterion = max(ratio * prevCriterion + (1 - ratio) * baseCriterion, baseCriterion); + } + + do + { + learnRatePerSample *= 0.618; + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, + numFramesToUseInSearch, trainSetDataReader, + learnRatePerSample, m_mbSize[epochNumber], featureNodes, + labelNodes, criterionNodes, + evaluationNodes, inputMatrices, + learnableNodes, smoothedGradients, + /*out*/ epochCriterion, /*out*/ epochEvalErrors, + /*out*/ totalSamplesSeen, "AdaptiveLearnRateSearch:"); + + } while (std::isnan(epochCriterion) || (epochCriterion > baseCriterion && learnRatePerSample > minLearnRate)); + + bestLearnRatePerSample = learnRatePerSample; + + //grid search for the first m_numBestSearchEpoch epochs + if (epochNumber < m_numBestSearchEpoch) + { + double leftLearnRatePerSample = 0.01 / m_mbSize[epochNumber]; + double rightLearnRatePerSample = learnRatePerSample; + double leftCriterion, rightCriterion = epochCriterion; + + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, + numFramesToUseInSearch, trainSetDataReader, + leftLearnRatePerSample, m_mbSize[epochNumber], + featureNodes, labelNodes, + criterionNodes, evaluationNodes, + inputMatrices, learnableNodes, + smoothedGradients, /*out*/ leftCriterion, + /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, + "DetailBaseAdaptiveLearnRateSearch:"); + + while (rightLearnRatePerSample > leftLearnRatePerSample * 1.2) + { + if (rightCriterion > leftCriterion) + { + rightLearnRatePerSample *= 0.618; + + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, + epochNumber, numFramesToUseInSearch, + trainSetDataReader, + rightLearnRatePerSample, m_mbSize[epochNumber], + featureNodes, labelNodes, + criterionNodes, + evaluationNodes, + inputMatrices, + learnableNodes, + smoothedGradients, + /*out*/ rightCriterion, + /*out*/ epochEvalErrors, + /*out*/ totalSamplesSeen, + "DetailRightAdaptiveLearnRateSearch:"); + } + else + { + leftLearnRatePerSample /= 0.618; + + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, + epochNumber, numFramesToUseInSearch, + trainSetDataReader, + leftLearnRatePerSample, m_mbSize[epochNumber], + featureNodes, labelNodes, + criterionNodes, + evaluationNodes, + inputMatrices, + learnableNodes, + smoothedGradients, + /*out*/ leftCriterion, + /*out*/ epochEvalErrors, + /*out*/ totalSamplesSeen, + "DetailLeftAdaptiveLearnRateSearch:"); + } + } + + bestLearnRatePerSample = (leftCriterion < rightCriterion) ? leftLearnRatePerSample : + rightLearnRatePerSample; + } + + fprintf(stderr, "Best Learn Rate Per Sample for Epoch[%d] = %.10g baseCriterion=%.10g\n", + epochNumber + 1, bestLearnRatePerSample, baseCriterion); + + return bestLearnRatePerSample; + } + + template + void SGD::TrainOneMiniEpochAndReloadModel(ComputationNetwork& net, + ComputationNetwork& refNet, + const ComputationNodeBasePtr refNode, const int epochNumber, + const size_t epochSize, IDataReader* trainSetDataReader, + const double learnRatePerSample, + const size_t minibatchSize, + const std::vector & featureNodes, + const std::vector & labelNodes, + const std::vector & criterionNodes, + const std::vector & evaluationNodes, + std::map*>* inputMatrices, + const std::list & learnableNodes, + std::list>& smoothedGradients, + /*out*/ double& epochCriterion, + /*out*/ std::vector& epochEvalErrors, + /*out*/ size_t& totalSamplesSeen, + std::string prefixMsg) + { + TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize, + trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes, + labelNodes, criterionNodes, evaluationNodes, + inputMatrices, learnableNodes, smoothedGradients, + /*out*/ epochCriterion, /*out*/ epochEvalErrors, /*out*/ totalSamplesSeen, + prefixMsg); + + fprintf(stderr, "Finished Mini-Epoch For LearnRate Selection: TrainLossPerSample = %.8g;", epochCriterion); + + if (epochEvalErrors.size() == 1) + fprintf(stderr, "EvalErrPerSample = %.8g; Ave LearnRatePerSample = %.10g\n", epochEvalErrors[0], learnRatePerSample); + else + { + fprintf(stderr, "EvalErrPerSample "); + for (size_t i = 0; i < epochEvalErrors.size(); i++) + fprintf(stderr, "[%lu] = %.8g; ", i, epochEvalErrors[i]); + fprintf(stderr, "Ave LearnRatePerSample = %.10g\n", learnRatePerSample); + } + + int baseModelEpoch = epochNumber - 1; + net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading); + net.ResetEvalTimeStamp(); + + double dummyLearnRate; + double dummtPrevCriterion; + size_t dummyMinibatchSize = 0; + LoadCheckPointInfo(baseModelEpoch, + /*out*/ totalSamplesSeen, + /*out*/ dummyLearnRate, + smoothedGradients, + /*out*/ dummtPrevCriterion, + /*out*/ dummyMinibatchSize); + } + + template + size_t SGD::AdaptiveMinibatchSizing(ComputationNetwork& net, + ComputationNetwork& refNet, + const ComputationNodeBasePtr refNode, + const int epochNumber, + const size_t numFramesToUseInSearch, + IDataReader* trainSetDataReader, + const double learnRatePerSample, + const size_t initialMinibatchSize, + const std::vector & featureNodes, + const std::vector & labelNodes, + const std::vector & criterionNodes, + const std::vector & evaluationNodes, + std::map*>* inputMatrices, + const std::list & learnableNodes, + std::list>& smoothedGradients, + const double learningRateAdjustmentFactor) + { + size_t minMinibatchSize = initialMinibatchSize; + size_t chosenMinibatchSize = initialMinibatchSize; + + // do some pre-adjustment based on LR + // Basically we assume that the LR for epoch 1 is safe for mbsize. + // If LR control led to a smaller LR, then we can safely increase the lower bound of the MB size. + double learningRateChangeSoFar = m_learningRatesPerSample[epochNumber] / m_learningRatesPerSample[0]; + learningRateChangeSoFar *= learningRateAdjustmentFactor; + + // increasing by the full factor is found to be too aggressive; sqrt() seems more robust + learningRateChangeSoFar = sqrt(learningRateChangeSoFar); + + // LR was indeed reduced + if (learningRateChangeSoFar < 1.0f) + { + // we can safely increase MB size (note: this may be bigger than our max) + minMinibatchSize = (size_t)(minMinibatchSize / learningRateChangeSoFar); + } + + if (epochNumber < 2 && m_prevChosenMinibatchSize != 0) + { + // newly started training: any previous MB size stored in the model is to be ignored + fprintf(stderr, "before epoch .2, previous minibatchSize %zd is " + "considered invalid -> resetting\n", m_prevChosenMinibatchSize); + m_prevChosenMinibatchSize = 0; + } + + // check if we need to skip + if (m_prevChosenMinibatchSize != 0 && + (epochNumber + 1) > m_minibatchSizeTuningFrequency && + (epochNumber + 1) % m_minibatchSizeTuningFrequency != 0) + { + fprintf(stderr, "AdaptiveMinibatchSearch: Search for a better minibatchSize " + "in epoch %d skipped, keeping minibatchSize of %zd\n", + epochNumber + 1, m_prevChosenMinibatchSize); + chosenMinibatchSize = m_prevChosenMinibatchSize; + } + else + { + if (m_prevChosenMinibatchSize != 0) + { + // if m_prevChosenMinibatchSize (the chosen minibatch size for the previous epoch) div 2 + // is higher than initialMinibatchSize (the minibatch size we start with for this epoch), + // then start the search with m_prevChosenMinibatchSize/2 instead of initialMinibatchSize. + fprintf(stderr, "AdaptiveMinibatchSearch: Limiting minMinibatchSize to " + "largest of previous minibatchSize = (%d / 2) or %d\n", + (int) m_prevChosenMinibatchSize, (int) minMinibatchSize); + minMinibatchSize = max(minMinibatchSize, m_prevChosenMinibatchSize / 2); + } + + size_t maxMinibatchSize = m_minibatchSizeTuningMax; + + // only grow at most 2 x compared to previous step + if (m_prevChosenMinibatchSize != 0.0f) + { + assert(m_prevChosenMinibatchSize >= chosenMinibatchSize); + + fprintf(stderr, "AdaptiveMinibatchSearch: Limiting maxMinibatchSize to " + "previous minibatchSize %zd*2\n", m_prevChosenMinibatchSize); + maxMinibatchSize = min(maxMinibatchSize, m_prevChosenMinibatchSize * 2); + } + + chosenMinibatchSize = SearchForBestMinibatchSize(net, refNet, refNode, epochNumber, + numFramesToUseInSearch, trainSetDataReader, + learnRatePerSample, featureNodes, + labelNodes, criterionNodes, + evaluationNodes, inputMatrices, + learnableNodes, smoothedGradients, + minMinibatchSize, maxMinibatchSize); + } + + return chosenMinibatchSize; + } + + static size_t RoundToMultipleOf64(float val) + { + return 64 * (size_t)((val + 32) / 64); + } + + static size_t RoundToMultipleOf64(size_t val) + { + return 64 * ((val + 32) / 64); + } + + // uses a small percentage of training data of minibatch to + // speculatively train with various MB sizes; then picks the best + template + size_t SGD::SearchForBestMinibatchSize(ComputationNetwork& net, + ComputationNetwork& refNet, + const ComputationNodeBasePtr refNode, + const int epochNumber, + const size_t numFramesToUseInSearch, + IDataReader* trainSetDataReader, + const double learnRatePerSample, + const std::vector & featureNodes, + const std::vector & labelNodes, + const std::vector & criterionNodes, + const std::vector & evaluationNodes, + std::map*>* inputMatrices, + const std::list & learnableNodes, + std::list>& smoothedGradients, + const size_t minMinibatchSize, const size_t maxMinibatchSize) + { + // may happen for automatically reduced learning rates + if (minMinibatchSize > maxMinibatchSize) + { + return maxMinibatchSize; + } + + size_t trialMinibatchSize = 0; + bool isFirstIteration = true; + double baseCriterion = 0; + + // increase the minibatch size by a factor of sqrt(2) in each step. + const float minibatchSizeTuningFactor = sqrtf(2.0f); + + size_t lastTriedTrialMinibatchSize = 0; + double lastTriedTrialEpochCriterion = 0; + for (float trialMinibatchSizeFloat = (float)minMinibatchSize; + trialMinibatchSizeFloat <= maxMinibatchSize; + trialMinibatchSizeFloat *= minibatchSizeTuningFactor) + { + // round mbsize to something meaningful + trialMinibatchSize = RoundToMultipleOf64(trialMinibatchSizeFloat); + + fprintf(stderr, "\nAdaptiveMinibatchSearch: Evaluating trial minibatchSize=%zd out of range %zd..%zd ...\n\n", + trialMinibatchSize, RoundToMultipleOf64(minMinibatchSize), RoundToMultipleOf64(maxMinibatchSize)); + + size_t totalSamplesSeen; + std::vector epochEvalErrors(evaluationNodes.size(), std::numeric_limits::infinity()); + double epochCriterion = std::numeric_limits::infinity(); + + // Train on a few minibatches and so we can observe the epochCriterion as we try increasing + // minibatches with iteration of this loop. + TrainOneMiniEpochAndReloadModel(net, refNet, refNode, epochNumber, + numFramesToUseInSearch, trainSetDataReader, + learnRatePerSample, trialMinibatchSize, featureNodes, + labelNodes, criterionNodes, + evaluationNodes, inputMatrices, + learnableNodes, smoothedGradients, + /*out*/ epochCriterion, /*out*/ epochEvalErrors, + /*out*/ totalSamplesSeen, + isFirstIteration ? "BaseAdaptiveMinibatchSearch:" : + "AdaptiveMinibatchSearch:"); + + if (isFirstIteration) + { + // for the first iteration of the loop only, set baseCriterion + // to the result we got from TrainOneMiniEpochAndReloadModel(). + baseCriterion = epochCriterion; + lastTriedTrialMinibatchSize = trialMinibatchSize; + lastTriedTrialEpochCriterion = baseCriterion; + isFirstIteration = false; + + fprintf(stderr, "AdaptiveMinibatchSearch: Computed BaseCriterion %.10g\n", baseCriterion); + } + else if (!std::isnan(epochCriterion) && + (epochCriterion > (baseCriterion * (1.0 + ( m_minibatchSearchCriterionErrorMargin / 100.0))))) + { + // As soon as we see the Criterion (a measure of error) start to get larger than the + // Criterion we started with, we stop. + // TODO: if this is too sensitive, we can add a margin on the bases of percentage of + // baseCriterion. + break; + } + else + { + lastTriedTrialMinibatchSize = trialMinibatchSize; + lastTriedTrialEpochCriterion = epochCriterion; + if (trialMinibatchSizeFloat * minibatchSizeTuningFactor <= maxMinibatchSize) + { + fprintf(stderr, "AdaptiveMinibatchSearch: Keep searching... " + "EpochCriterion = %.10g vs BaseCriterion = %.10g\n", + epochCriterion, baseCriterion); + } + } + } + fprintf(stderr, "AdaptiveMinibatchSearch: Search successful!!! Chose new minibatchSize of %d. " + "EpochCriterion = %.10g vs BaseCriterion = %.10g\n\n", + (int) lastTriedTrialMinibatchSize, lastTriedTrialEpochCriterion, baseCriterion); + + + return lastTriedTrialMinibatchSize; + } + + // Tries to compute derivatives for the whole utterances, which will be + // fed to the neural network as features. + template + void SGD::AttemptUtteranceDerivativeFeatures(ComputationNetwork& net, + IDataReader* trainSetDataReader, + const std::vector & featureNodes, + std::map*>* inputMatrices) + { + // Tries to read an utterance and run forward computation on the + // whole utterance. + assert(trainSetDataReader != NULL); + std::vector>> uttInfo; + auto pMBLayout = make_shared(); + while (trainSetDataReader->GetMinibatchCopy(uttInfo, *inputMatrices, pMBLayout)) + { + ComputationNetwork::UpdateEvalTimeStamps(featureNodes); + + auto & outputNodes = net.OutputNodes(); + if (outputNodes.empty()) + LogicError("no output node was found."); + + net.SetActualMiniBatchSizeFromFeatures(); + trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); + net.Evaluate(outputNodes[0]); // Only evaluate the first output + trainSetDataReader->SetNetOutput(uttInfo, + dynamic_pointer_cast>(outputNodes[0])->FunctionValues(), + pMBLayout); + } + } + + static string GeneratePaddedFloatOrExpFormat(int padSize, int precision, double value) + { + char format[16]; + char buffer[512]; + + sprintf(format, "%%.%dg", precision); + sprintf(buffer, format, value); + + for (int i = 0; i < strlen(buffer); i++) + { + if (buffer[i] == 'e' || buffer[i] == 'E') + { + sprintf(format, "%%%d.%de", padSize, precision); + return format; + } + } + sprintf(format, "%%%d.%df", padSize, precision); + return format; + } + + template + size_t SGD::TrainOneEpoch(ComputationNetwork& net, + ComputationNetwork& refNet, + const ComputationNodeBasePtr refNode, + const int epochNumber, + const size_t epochSize, + IDataReader* trainSetDataReader, + const double learnRatePerSample, + size_t tunedMBSize, + const std::vector & featureNodes, + const std::vector & labelNodes, + const std::vector & criterionNodes, + const std::vector & evaluationNodes, + std::map*>* inputMatrices, + const std::list & learnableNodes, + std::list>& smoothedGradients, + /*out*/ double& epochCriterion, + /*out*/ std::vector& epochEvalErrors, + /*out*/ size_t& totalSamplesSeen, + std::string prefixMsg) + { + // Since we are getting timing resolution of under microsecond we use double precision + // to ensure that we have enough digits to represent small time measurements. + double totalTimeInMBs = 0; + double epochCriterionLastMBs = 0; + + int numSamplesLastMBs = 0; + std::vector epochEvalErrorsLastMBs(epochEvalErrors.size(), 0); + + // initialize statistics + size_t totalEpochSamples = 0; + + int numMBsRun = 0; + + size_t numEvalNodes = epochEvalErrors.size(); + + // NOTE: the following two local matrices are not used in distGradAgg path + // assume only one training criterion node for each epoch + + Matrix localEpochCriterion(1, 1, net.GetDeviceId()); + Matrix localEpochEvalErrors(1, numEvalNodes, net.GetDeviceId()); + + localEpochCriterion.SetValue(0); + localEpochEvalErrors.SetValue(0); + + bool useGradientAggregation = ((m_parallelizationMethod == ParallelizationMethod::DataParallelSGD) && + (epochNumber >= m_parallelizationStartEpochNum)); + bool useModelAveraging = ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) && + (epochNumber >= m_parallelizationStartEpochNum)); + bool useParallelTrain = useGradientAggregation || useModelAveraging; + + // MA-related variables + size_t nSamplesSinceLastModelSync = 0; + size_t nSynced = 0; + float nSecondsOnMASync = 0; + float nSecondsSinceLastMAPerfReport = 0; + + if (useGradientAggregation) + { + epochCriterion = double(0.0); + epochEvalErrors.assign(numEvalNodes, double(0.0)); + } + + Profiler profiler(m_numMBsToCUDAProfile); + + // resetting this, so profiling is performed for one epoch only + m_numMBsToCUDAProfile = 0; + + bool useDistributedMBReading = useParallelTrain && + m_enableDistributedMBReading && + trainSetDataReader->SupportsDistributedMBRead(); + if (useDistributedMBReading) + { + trainSetDataReader->StartDistributedMinibatchLoop(tunedMBSize, epochNumber, g_mpi->CurrentNodeRank(), g_mpi->NumNodesInUse(), m_epochSize); + } + else + { + trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, m_epochSize); + } + + AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices); + + fprintf(stderr, "\nStarting minibatch loop"); + if (useGradientAggregation) + { + fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)", (int)g_mpi->CurrentNodeRank(), (int)g_mpi->NumNodesInUse(), (int)m_numGradientBits); + } + + if (useDistributedMBReading) + { + fprintf(stderr, ", Distributed reading is ENABLED"); + } + fprintf(stderr, ".\n"); + + Timer timer; + timer.Start(); + + // --- MAIN MINIBATCH LOOP + + for (;;) + { + bool wasDataRead = trainSetDataReader->GetMinibatch(*inputMatrices); + + if (useDistributedMBReading) + { + // In case of distributed reading, the current node needs to continue even with a minibatch size of 0 if any + // other node in the group has a non-zero size minibatch to process. This is needed to ensure that + // the gradient aggregation barriers do not get stuck and also to ensure that all nodes update their weights + // properly using the aggregate gradients from other nodes before moving on to the next epoch even though the current + // node itself may not have any gradient contribution. + std::array numNodesWithDataToProcess; + numNodesWithDataToProcess[0] = wasDataRead ? 1 : 0; + g_mpi->AllReduce(numNodesWithDataToProcess); + + if (numNodesWithDataToProcess[0] == 0) + { + break; + } + } + else if (!wasDataRead) + { + break; + } + + size_t actualMBSize = 0; + if (wasDataRead) + { + size_t nSlices = trainSetDataReader->GetNumParallelSequences(); + MBLayoutPtr pMBLayout; + if (!useDistributedMBReading && useParallelTrain) + { + // TODO: refactor this as a function + if (trainSetDataReader->RequireSentenceSeg()) + { + pMBLayout = make_shared(); // items get filled in + DecimateMinibatchWithSentences(*inputMatrices, + g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), + nSlices, pMBLayout, + trainSetDataReader); + } + else + { + DecimateMinibatch(*inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank()); + } + } + + actualMBSize = net.SetActualMiniBatchSizeFromFeatures(); + if (actualMBSize != 0) + { + if (!useDistributedMBReading && useParallelTrain && trainSetDataReader->RequireSentenceSeg()) + { + *net.GetMBLayoutPtr() = *pMBLayout; + // TODO: ^^ we should just pass pointers; this current code is semantically identical to before the change to MBLayout + net.VerifyActualNumParallelSequences(nSlices); + } + else + { + trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr()); + net.VerifyActualNumParallelSequences(nSlices); + } + + nSamplesSinceLastModelSync += actualMBSize; + + ComputationNetwork::UpdateEvalTimeStamps(featureNodes); + ComputationNetwork::UpdateEvalTimeStamps(labelNodes); + +#ifndef EVALDLL + if (m_doGradientCheck && GradientCheck(net, criterionNodes, learnableNodes, 0) == false) + LogicError("cannot pass gradient checker"); +#endif + // TODO: currently only support one node regularization + if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode != nullptr) + { +#if 1 + size_t actualMBSize2 = refNet.SetActualMiniBatchSizeFromFeatures(); + if (actualMBSize2 != actualMBSize) + LogicError("TrainOneEpoch: refNet has different MB size than main net??"); +#else + refNet.SetActualMiniBatchSize(actualMBSize); // TODO: SetActualMiniBatchSizeFromFeatures() should have the same result, no? +#endif + *refNet.GetMBLayoutPtr() = *net.GetMBLayoutPtr(); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently) + refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences()); + + refNet.Evaluate(refNode); + Matrix::ScaleAndAdd((ElemType)m_adaptationRegWeight, + dynamic_pointer_cast>(refNode)->FunctionValues(), + (ElemType)(1.0 - m_adaptationRegWeight), + dynamic_pointer_cast>(labelNodes[0])->FunctionValues()); + } + + //compute eval node first since when gradient is computed the forward function values + //may be changed and need to be recomputed when gradient and function value share the same matrix + for (size_t i = 0; i < numEvalNodes; i++) + { + net.Evaluate(evaluationNodes[i]); + } + + // only compute gradient when learning rate is large enough + if (learnRatePerSample > m_minLearnRate * 0.01) + { + // use only the first criterion. Is there any possibility to use more? + net.ComputeGradient(criterionNodes[0]); + } + else + { + // use only the first criterion. Is there any possibility to use more? + net.Evaluate(criterionNodes[0]); + } + } + } + + //for now since we share the same label masking flag we call this on the network. + //Later, when we apply different labels on different nodes + //we need to add code to call this function multiple times, one for each criteria node + size_t numSamplesWithLabel = net.GetNumSamplesWithLabel(actualMBSize); + + // Sum of actualMBSize across all nodes when using parallel training + size_t aggregateNumSamples = actualMBSize; + size_t aggregateNumSamplesWithLabel = numSamplesWithLabel; + + //distributed gradient aggregation + if (!useGradientAggregation) + { + if (actualMBSize != 0) + { + Matrix::AddElementToElement(dynamic_pointer_cast>(criterionNodes[0])->FunctionValues(), 0, 0, localEpochCriterion, 0, 0); + for (size_t i = 0; i < numEvalNodes; i++) + Matrix::AddElementToElement(dynamic_pointer_cast>(evaluationNodes[i])->FunctionValues(), 0, 0, localEpochEvalErrors, 0, i); + } + } + else + { + LazyInitDistGradAgg(learnableNodes, numEvalNodes, m_traceLevel); + + //prepare the header + m_gradHeader->numEvalNode = numEvalNodes; + m_gradHeader->numSamples = actualMBSize; + m_gradHeader->numSamplesWithLabel = numSamplesWithLabel; + m_gradHeader->criterion = wasDataRead ? criterionNodes[0]->Get00Element() : 0.0; + for (size_t i = 0; i < numEvalNodes; i++) + m_gradHeader->evalErrors[i] = wasDataRead ? evaluationNodes[i]->Get00Element() : 0.0; + + m_distGradAgg->AggregateGradients(m_gradHeader, epochNumber); + + aggregateNumSamples = m_gradHeader->numSamples; + aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel; + epochCriterion += m_gradHeader->criterion; + for (size_t i = 0; ievalErrors[i]; + } + + //update model parameters + if ((aggregateNumSamples > 0) && (learnRatePerSample > m_minLearnRate * 0.01)) + { + auto smoothedGradientIter = smoothedGradients.begin(); + for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++, smoothedGradientIter++) + { + ComputationNodeBasePtr node = *nodeIter; + Matrix& smoothedGradient = *smoothedGradientIter; + + UpdateWeights(node, smoothedGradient, learnRatePerSample, + m_momentumPerSample[epochNumber], aggregateNumSamples, + m_L2RegWeight, m_L1RegWeight, + m_needAveMultiplier); + } + } + + if (useModelAveraging && (g_mpi->NumNodesInUse() > 1)) + { + size_t processedSamples = 0; + float secondsSinceLastSyncFinished = 0; + float secondsSpentOnSync = 0; + if (ModelAveragingProcessing(nSamplesSinceLastModelSync, learnableNodes, processedSamples, + secondsSinceLastSyncFinished, secondsSpentOnSync)) + { + // if a sync happens, do some extra work + nSamplesSinceLastModelSync = 0; + nSynced++; + + nSecondsOnMASync += secondsSpentOnSync; + nSecondsSinceLastMAPerfReport += secondsSinceLastSyncFinished; + + if (m_iMASyncStatsTrace > 0) + { + if (nSynced % m_iMASyncStatsTrace == 0) + { + fprintf(stderr, "\t\t-----(model averaging stats) %d-th sync, %8.2f seconds since last report, %5.2f seconds on communication\n", + (int)nSynced, nSecondsSinceLastMAPerfReport, nSecondsOnMASync); + nSecondsOnMASync = 0; + nSecondsSinceLastMAPerfReport = 0; + } + } + } + aggregateNumSamplesWithLabel = processedSamples; + } + + timer.Stop(); + numMBsRun++; + if (m_traceLevel > 0) + { + totalTimeInMBs += timer.ElapsedSeconds(); + numSamplesLastMBs += useModelAveraging ? int(actualMBSize) : int(aggregateNumSamplesWithLabel); + + if (numMBsRun % m_numMBsToShowResult == 0) + { + // get the epoch Values updated + if (!useGradientAggregation) + { + timer.Restart(); + epochCriterion = localEpochCriterion.Get00Element(); + for (size_t i = 0; i < numEvalNodes; i++) + epochEvalErrors[i] = localEpochEvalErrors(0, i); + timer.Stop(); + + // Add the last trailing compute + totalTimeInMBs += timer.ElapsedSeconds(); + } + + double trainLossPerSample = (epochCriterion - epochCriterionLastMBs) / numSamplesLastMBs; + string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d of %d]: SamplesSeen = %d; TrainLossPerSample = " + + GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; "; + fprintf(stderr, formatString.c_str(), + prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1, + numMBsRun, epochSize / tunedMBSize, numSamplesLastMBs, trainLossPerSample); + + for (size_t i = 0; i < numEvalNodes; i++) + { + double evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs; + formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; "; + fprintf(stderr, formatString.c_str(), i, evalError); + } + + double totalTimePerSample = (1000.0 * totalTimeInMBs) / numSamplesLastMBs; + formatString = "TotalTime = " + GeneratePaddedFloatOrExpFormat(0, 5, totalTimeInMBs) + "s; TotalTimePerSample = " + + GeneratePaddedFloatOrExpFormat(0, 5, totalTimePerSample) + "ms; SamplesPerSecond = %d\n"; + fprintf(stderr, formatString.c_str(), + totalTimeInMBs, totalTimePerSample, + static_cast(numSamplesLastMBs / totalTimeInMBs)); + + fflush(stderr); + + // reset statistics + totalTimeInMBs = 0; + numSamplesLastMBs = 0; + + epochCriterionLastMBs = epochCriterion; + for (size_t i = 0; i < numEvalNodes; i++) + epochEvalErrorsLastMBs[i] = epochEvalErrors[i]; + + if (std::isnan(epochCriterion)) + RuntimeError("The training criterion is not a number (NAN). Stop\n"); + } + } + + timer.Restart(); + totalEpochSamples += aggregateNumSamplesWithLabel; + totalSamplesSeen += aggregateNumSamplesWithLabel; + + if (totalEpochSamples >= epochSize) + break; + + // call DataEnd function + // DataEnd does reader specific process if sentence ending is reached + trainSetDataReader->DataEnd(endDataSentence); + + // Tries to set up derivative features for the next utterance. + AttemptUtteranceDerivativeFeatures(net, trainSetDataReader, featureNodes, inputMatrices); + + profiler.NextSample(); + } + + // --- END MAIN MINIBATCH LOOP + + if (useModelAveraging && (g_mpi->NumNodesInUse() > 1) ) + { + // may not be synced after epoch finished, so do the sync here + int residualSampels = (int)nSamplesSinceLastModelSync; + g_mpi->AllReduce(&residualSampels, 1); + totalSamplesSeen += residualSampels; + totalEpochSamples += residualSampels; + ModelAveragingSync(nSamplesSinceLastModelSync, learnableNodes); + nSynced++; + nSamplesSinceLastModelSync = 0; + } + + if (useGradientAggregation) + { + epochCriterion /= float(totalEpochSamples); + for (size_t i = 0; i< numEvalNodes; i++) + epochEvalErrors[i] /= totalEpochSamples; + } + else + { + localEpochCriterion /= float(totalEpochSamples); + localEpochEvalErrors /= float(totalEpochSamples); + + epochCriterion = localEpochCriterion.Get00Element(); + for (size_t i = 0; i < numEvalNodes; i++) + epochEvalErrors[i] = localEpochEvalErrors(0, i); + } + + + if (useModelAveraging && (g_mpi->NumNodesInUse() > 1)) + { + // merge epochCriterion and epochEvalErrors over nodes + g_mpi->AllReduce(&epochCriterion, 1); + g_mpi->AllReduce(epochEvalErrors); + } + return totalEpochSamples; + } + + template + void SGD::LazyInitDistGradAgg(const std::list& learnableNodes, int numEvalNodes, int traceLevel) + { + if (m_parallelizationMethod == ParallelizationMethod::DataParallelSGD) + { + if (m_distGradAgg == nullptr) + { + std::vector*> learnParamsGradients; + learnParamsGradients.reserve(learnableNodes.size()); + for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) + { + ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); + learnParamsGradients.push_back(&(node->GradientValues())); + } + + m_distGradAgg = new AllReduceDistGradAggregator(learnParamsGradients, numEvalNodes, m_numGradientBits, g_mpi, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, traceLevel); + } + + if (m_gradHeader == nullptr) + { + m_gradHeader = DistGradHeader::Create(numEvalNodes); + } + } + } + + template + bool SGD::ModelAveragingProcessing(size_t nSamplesSinceLastSync, const std::list& learnableNodes, size_t& nProcessedFrames, + float& SecondsSinceLastSyncFinished, float& SecondsSpentOnSync) + { + ////////////////////////////////////////////////////////////////////////// + // the current strategy is that after each minibatch, we will sync between processors + // to decide whether a sync need to be performed. This is definitely not optimal, + // which we will fix it later. + + // TODO: the way we handle timer is not very good + ////////////////////////////////////////////////////////////////////////// + static bool first = true ; + static Timer MAtimer; + if (first) + { + MAtimer.Start(); + first = false; + } + + char bNeedToSync = (char)0; // use char for bool + if (g_mpi->IsMainNode() && nSamplesSinceLastSync >= m_nFramesBetweenMASync) + { + // only the main node can decide whether a sync need to be performed + bNeedToSync = (char)1; + } + g_mpi->Bcast(&bNeedToSync, 1, g_mpi->MainNodeRank()); + if (bNeedToSync) + { + MAtimer.Stop(); + double elapsedsec = MAtimer.ElapsedSeconds(); + SecondsSinceLastSyncFinished = first ? 0 : (float) elapsedsec ; + MAtimer.Start(); + nProcessedFrames = ModelAveragingSync((int)nSamplesSinceLastSync, learnableNodes); + MAtimer.Stop(); + SecondsSpentOnSync = (float)MAtimer.ElapsedSeconds(); + + MAtimer.Start(); + } + else + { + nProcessedFrames = 0; + return false; + } + return true; + } + + template + size_t SGD::ModelAveragingSync(int nSamplesSinceLastSync, const std::list& learnableNodes) + { + if (g_mpi->NumNodesInUse() <= 1) + { + return nSamplesSinceLastSync; + } + + //======================================== + // Sec. 1 calculate factor + //======================================== + float factor = 0; + int nTotalSamples = nSamplesSinceLastSync; + g_mpi->AllReduce(&nTotalSamples, 1); + if (nTotalSamples <= 0) + { + // prepare for overflow + factor = 1.0f / g_mpi->NumNodesInUse(); + } + else + { + factor = (nSamplesSinceLastSync + 0.0f) / nTotalSamples; + } + + //======================================== + // Sec. 2 sync models based on factor + // Note: this is suboptimal at the moment: + // we do the averaging for each node in a sequence manner, i.e., + // (node1) GPU->CPU->MPI_AllReduce -> (node2)GPU->CPU->MPI_AllReduce + // we can improve it by using a pipeline + // (node1) GPU -> CPU -> MPI_AllReduce + // (node2) GPU -> CPU -> MPI_AllReduce + // (node3) GPU -> CPU -> MPI_AllReduce + //======================================== + for (auto iter = learnableNodes.begin(); iter != learnableNodes.end(); iter++) + { + ComputationNodeBasePtr pNode = *iter; + if (!pNode->NeedGradient()) + continue; + + Matrix& mat = dynamic_pointer_cast>(pNode)->FunctionValues(); + // 1. normalize the weight matrix + Matrix::Scale(factor, mat); + // 2. send weight matrix over MPI nodes; + ElemType* px = mat.CopyToArray(); + size_t nx = mat.GetNumElements(); + + // 3. inplace sum + g_mpi->AllReduce(px, nx); + mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), px); + // 4. clean up + delete []px; + } + + return nTotalSamples; + } + +// public: + // UpdateWeightsS - static version of UpdateWeights() + // not static since it wants to access protected methods on the SGD object + template + /*static*/ void SGD::UpdateWeightsS(const SGD* sgd, Matrix& functionValues, + Matrix& gradientValues, + Matrix& smoothedGradient, + const double learnRatePerSample, + const double momentumPerSample, + size_t actualMBSize, + const double L2RegWeight, + const double L1RegWeight, + const bool needAveMultiplier) + { + // we use simple linear (instead of log linear) scaling here + const double momentum = MomentumPerMB(momentumPerSample, actualMBSize); +#if DUMPOUTPUT + fprintf(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n", + learnRatePerSample, momentum, actualMBSize); + fprintf(stderr, "sgd->GradUpdateType()=%d, sgd->GradientUpdateNoiseStd()=%0.8f\n", + sgd->GradUpdateType(), sgd->GradientUpdateNoiseStd()); + gradientValues.Print("Gradient Input"); + smoothedGradient.Print("Smoothed Gradient Input"); +#endif + + // make actualMBSize is a valid value + assert(actualMBSize > 0); + + //clipping gradients to prevent outliers + sgd->ClipGradient(gradientValues, actualMBSize); + + GradientsUpdateType adpType = sgd->GradUpdateType(); + double noiseStd = sgd->GradientUpdateNoiseStd(); + Matrix sgdUpdateNoise((DEVICEID_TYPE)functionValues.GetDeviceId()); + if (noiseStd > 0) + { + // get the gradient structure since gradient is sparse + sgdUpdateNoise.SetValue(gradientValues); + + // reset its value to random + sgdUpdateNoise.SetGaussianRandomValue(0, (ElemType)noiseStd); + } + + // L2 regularizer + if (L2RegWeight > 0) + { + // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample + Matrix::ScaleAndAdd((ElemType)(L2RegWeight * actualMBSize), functionValues, gradientValues); + } + + if (adpType == GradientsUpdateType::None) + { + smoothedGradient.NormalGrad(gradientValues, functionValues, + (ElemType)learnRatePerSample, (ElemType)momentum); + } + else if (adpType == GradientsUpdateType::AdaGrad || + (adpType == GradientsUpdateType::RmsProp && gradientValues.GetMatrixType() == MatrixType::SPARSE) || + (adpType == GradientsUpdateType::FSAdaGrad && gradientValues.GetMatrixType() == MatrixType::SPARSE)) + { + //rmsprop for sparse is not implemented yet, delegate it with adagrad + + double aveMultiplier = smoothedGradient.Adagrad(gradientValues, needAveMultiplier); + Matrix::ScaleAndAdd((ElemType)(-learnRatePerSample / aveMultiplier), gradientValues, functionValues); + } + else if (adpType == GradientsUpdateType::FSAdaGrad) + { + smoothedGradient.FSAdagrad(actualMBSize, gradientValues, functionValues, learnRatePerSample, momentum); + } + else if (adpType == GradientsUpdateType::RmsProp) + { + double aveMultiplier = smoothedGradient.RmsProp(gradientValues, (ElemType)sgd->m_rpi.gamma, + (ElemType)sgd->m_rpi.inc, (ElemType)sgd->m_rpi.max, + (ElemType)sgd->m_rpi.dec, (ElemType)sgd->m_rpi.min, needAveMultiplier); + Matrix::ScaleAndAdd((ElemType)(-learnRatePerSample / aveMultiplier), gradientValues, functionValues); + } + + if (noiseStd > 0) + { + Matrix::ScaleAndAdd(1.0, sgdUpdateNoise, functionValues); + } + + // L1 regularizer with proximal gradient descent method + if (L1RegWeight > 0) + { + // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample + functionValues.InplaceSoftThreshold((ElemType)(learnRatePerSample * L1RegWeight * actualMBSize)); + } + +#if DUMPOUTPUT + functionValues.Print("Parameter Update"); +#endif + } + +// protected: + + // UpdateWeights - update the weights in + template + void SGD::UpdateWeights(const ComputationNodeBasePtr node, + Matrix& smoothedGradient, + const double learnRatePerSample, + const double momentumPerSample, + const size_t actualMBSize, + const double L2RegWeight, const double L1RegWeight, + const bool needAveMultiplier) const + { +#if DUMPOUTPUT + fprintf(stderr, "Update_%ls\n", node->NodeName().c_str()); +#endif + UpdateWeightsS(this, dynamic_pointer_cast>(node)->FunctionValues(), dynamic_pointer_cast>(node)->GradientValues(), + smoothedGradient, learnRatePerSample, momentumPerSample, + actualMBSize, L2RegWeight, L1RegWeight, + needAveMultiplier); + node->UpdateEvalTimeStamp(); + } + + template + void SGD::ClipGradient(Matrix& gradient, const size_t actualMBSize) const + { + if (m_clippingThresholdPerSample != std::numeric_limits::infinity()) + { + double maxGradientPerMB = m_clippingThresholdPerSample * actualMBSize; + if (m_gradientClippingWithTruncation) + gradient.InplaceTruncate((ElemType)(maxGradientPerMB)); + else + { + // norm2 normalized + double gradientNorm = gradient.FrobeniusNorm(); + if (gradientNorm > maxGradientPerMB) + { + double normFactor = maxGradientPerMB / gradientNorm; + gradient *= (ElemType)normFactor; + } + } + } + } + + template + void SGD::SaveCheckPointInfo(const size_t epoch, const size_t totalSamplesSeen, + const double learnRatePerSample, + const std::list>& smoothedGradients, + const double prevCriterion, + const size_t minibatchSize) + { + // In case of parallel training only the main node should we saving the checkpoint to prevent + // the parallel training nodes from colliding to write the same file + if ((g_mpi == nullptr) || g_mpi->IsMainNode()) + { + wstring checkPointFileName = GetCheckPointFileNameForEpoch(int(epoch)); + // Saving into temporary file and then renaming it to the checkPointFileName + // This is a standard trick to avoid havign corrupted checkpoints files if process dies during writing + wstring tempFileName = checkPointFileName + L".tmp"; + + { + File fstream(tempFileName, FileOptions::fileOptionsBinary | FileOptions::fileOptionsWrite); + fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BCKP"); + + fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BLearnRate"); + fstream << totalSamplesSeen << learnRatePerSample << prevCriterion; + fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ELearnRate"); + + fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BMinibatchSize"); + fstream << minibatchSize; + fstream.PutMarker(FileMarker::fileMarkerEndSection, L"EMinibatchSize"); + + fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BGradient"); + + for (auto smoothedGradientIter = smoothedGradients.begin(); smoothedGradientIter != smoothedGradients.end(); smoothedGradientIter++) + { + const Matrix& smoothedGradient = *smoothedGradientIter; + fstream << smoothedGradient; + } + + fstream.PutMarker(FileMarker::fileMarkerEndSection, L"EGradient"); + + fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ECKP"); + + // Ensuring that data is written + fstream.Flush(); + } + + renameOrDie(tempFileName, checkPointFileName); + } + } + + template + bool SGD::LoadCheckPointInfo(const size_t epochNumber, + /*out*/ size_t& totalSamplesSeen, + /*out*/ double& learnRatePerSample, + std::list>& smoothedGradients, + /*out*/ double& prevCriterion, + /*out*/ size_t& minibatchSize) + { + wstring checkPointFileName = GetCheckPointFileNameForEpoch(int(epochNumber)); + if (!fexists(checkPointFileName.c_str())) + { + fprintf(stderr, "Warning: checkpoint file is missing. learning parameters will be initialized from 0\n"); + return false; + } + + File fstream(checkPointFileName, + FileOptions::fileOptionsBinary | FileOptions::fileOptionsRead); + fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BCKP"); + + fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BLearnRate"); + fstream >> totalSamplesSeen >> learnRatePerSample >> prevCriterion; + fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ELearnRate"); + + if (fstream.TryGetMarker(FileMarker::fileMarkerBeginSection, L"BMinibatchSize")) + { + fstream >> minibatchSize; + fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EMinibatchSize"); + } + else + { + minibatchSize = m_mbSize[epochNumber]; + } + + fstream.GetMarker(FileMarker::fileMarkerBeginSection, L"BGradient"); + + for (auto smoothedGradientIter = smoothedGradients.begin(); smoothedGradientIter != smoothedGradients.end(); smoothedGradientIter++) + { + Matrix& smoothedGradient = *smoothedGradientIter; + fstream >> smoothedGradient; + } + fstream.GetMarker(FileMarker::fileMarkerEndSection, L"EGradient"); + + fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ECKP"); + + return true; + } + + template + wstring SGD::GetCheckPointFileNameForEpoch(const int epoch) + { + return GetModelNameForEpoch(epoch) + L".ckp"; + } + + template + wstring SGD::GetModelNameForEpoch(const int epoch, bool bLastModel) + { + int epoch1Base = epoch + 1; + if (epoch1Base == m_maxEpochs || bLastModel) + { + return m_modelPath; + } + else + { + wstring w = msra::strfun::wstrprintf(L"%ls.%d", m_modelPath.c_str(), (int)epoch1Base); + return w; + } + + } + + // return -1 if nothing exists + template // TODO: needed? + int SGD::DetermineStartEpoch(const bool makeMode) + { + if (!makeMode) + { + // always start from scratch + return -1; + } + + int firstEpoch = -1; + + wstring curEpochFile = GetModelNameForEpoch(int(m_maxEpochs) - 1); + for (int e = int(m_maxEpochs) - 1; e >= -1; e--) + { + const wstring prevEpochFile = GetModelNameForEpoch(e - 1); + + if (msra::files::fuptodate(curEpochFile, prevEpochFile, false)) + { + firstEpoch = size_t(e) + 1; + break; + } + else + { + curEpochFile = prevEpochFile; + } + } + + return firstEpoch; + } + +#define EPSILON 1e-5 + + template + bool SGD::GradientCheck(ComputationNetwork& net, + const std::vector & criterionNodes, + const std::list & learnableNodes, + int npos) + { + vector errMsgs; + + // gradient checking + for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++) + { + ComputationNodePtr node = dynamic_pointer_cast>(*nodeIter); + char wstrtmp[2048]; + + for (size_t itry = 0; itry < min((size_t)50, node->FunctionValues().GetNumElements()); itry++) + { + /// no support to sparse matrix yet + int irow = (int) fmod(rand(), node->FunctionValues().GetNumRows() - 1); + int icol = (int) fmod(rand(), node->FunctionValues().GetNumCols() - 1); + irow = max(0, irow); + icol = max(0, icol); + + fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str()); + + double eOrg = node->FunctionValues()(irow, icol); + node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); + + node->UpdateEvalTimeStamp(); + + // use only the first criterion. Is + net.ComputeGradient(criterionNodes[npos]); + + if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE) + { + break; + } + + //double mbEvalCri = + //criterionNode should be a scalar + // TODO: why is this value not used? + criterionNodes[npos]->Get00Element(); + double eGradErr = node->GradientValues()(irow, icol); + node->GradientValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); + + double ePos = eOrg + EPSILON; + double eNeg = eOrg - EPSILON; + + node->FunctionValues()(irow, icol) = (ElemType)ePos; + node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); + + node->UpdateEvalTimeStamp(); + net.Evaluate(criterionNodes[npos]); + //criterionNode should be a scalar + + double mbEvalCriPos = criterionNodes[npos]->Get00Element(); // TODO: make Get00Element() a function of ComputationNodeBase + + node->FunctionValues()(irow, icol) = (ElemType)eNeg; + node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); + + node->UpdateEvalTimeStamp(); + net.Evaluate(criterionNodes[npos]); + + // criterionNode should be a scalar + double mbEvalCriNeg = criterionNodes[npos]->Get00Element(); + + // back to its orginal parameter value + node->FunctionValues()(irow, icol) = (ElemType)eOrg; + node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true); + + // check if they are consistent + double eGradNum = ((mbEvalCriPos - mbEvalCriNeg) / (ePos - eNeg)); + double threshold = pow(10.0, + max(0.0, + ceil(log10(min(fabs(eGradErr), + fabs(eGradNum))))) - (int)m_gradientCheckSigDigit); + double diff = fabs(eGradErr - eGradNum); + bool wrong = (std::isnan(diff) || diff > threshold); + if (wrong) + { + fprintf(stderr, "\nd%ls Numeric gradient = %e, Error BP gradient = %e\n", + node->NodeName().c_str(), eGradNum, eGradErr); + sprintf(wstrtmp, "\nd%ls Numeric gradient = %e, Error BP gradient = %e\n", + node->NodeName().c_str(), eGradNum, eGradErr); + errMsgs.push_back(wstrtmp); + } + } + } + + return errMsgs.size() == 0; + } + +template class SGD; +template class SGD; + +// TODO: does not build--but part is used directly from CNTK.cpp +//template class MultiNetworksSGD; +//template class MultiNetworksSGD; + +}}} diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index aaaeb36b2b85..cf3fbf4bdef6 100755 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -1277,6 +1277,26 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } + template + void GPUMatrix::FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, + ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul) + { + size_t numColsNeeded = 2 * gradients.GetNumCols(); + + if (IsEmpty() || GetNumCols() < numColsNeeded) + { + Resize(gradients.GetNumRows(), numColsNeeded); + SetValue(0.0); + } + + assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == numColsNeeded); + + size_t n = gradients.GetNumElements(); + int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; + _fsadagrad<<>>(n, gradients.m_pArray, m_pArray, m_pArray + n, functionValues.m_pArray, + learnRatePerSample, momentum, adaWeight, adaMul); + } + template ElemType GPUMatrix::RmsProp(GPUMatrix& gradients, ElemType RMS_GAMMA, diff --git a/Math/Math/GPUMatrix.h b/Math/Math/GPUMatrix.h index ee7bc2139be9..b80900f95ad7 100755 --- a/Math/Math/GPUMatrix.h +++ b/Math/Math/GPUMatrix.h @@ -124,6 +124,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType* BufferPointer() const {return m_pArray;} ElemType Adagrad(GPUMatrix& gradients, const bool needAveMultiplier); + void FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, + ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul); ElemType RmsProp(GPUMatrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier); void Reshape(const size_t numRows, const size_t numCols); diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index 18982cc13488..d45b9e006d87 100755 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -1110,6 +1110,36 @@ __global__ void _adagrad4BlockSparse( multipliers[id] = 1 / temp; } +template +__global__ void _fsadagrad(CUDA_LONG size, ElemType* grad, ElemType* smoothAda, ElemType* smoothMom, ElemType* val, + ElemType lr, ElemType mom, ElemType adaWeight, ElemType adaMul) +{ + CUDA_LONG idx = blockIdx.x * blockDim.x + threadIdx.x; + CUDA_LONG stride = blockDim.x * gridDim.x; + for (; idx < size; idx += stride) + { + ElemType g = grad[idx]; + ElemType adaSqr = adaWeight * smoothAda[idx] + (1.0f - adaWeight) * g * g; + smoothAda[idx] = adaSqr; + if (adaSqr != 0.0f) + { + ElemType w = adaMul * rsqrtf(adaSqr); + if (w > 10.0f) + w = 10.0f; + g *= w; + } + + if (mom > 0.0f) + { + g = mom * smoothMom[idx] + (1.0f - mom) * g; + smoothMom[idx] = g; + } + + g *= lr; + val[idx] -= g; + } +} + template __global__ void _rmsprop_init( ElemType* avars, ElemType* signs, ElemType* steps, diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index e122905be16f..1707c8222851 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1296,6 +1296,27 @@ namespace Microsoft { namespace MSR { namespace CNTK { return aveMultiplier; } + template + void Matrix::FSAdagrad(size_t mbSize, Matrix& gradients, Matrix& functionValues, const ElemType learnRatePerSample, const ElemType momentum) + { + // REVEW alexeyk: hardcoded for now, taken from DBN. Naming is the same as in DBN. + const size_t adagradT = 2 * 3600 * 100; + const ElemType adagradkeepweight = static_cast(exp(-1.0 * mbSize / adagradT)); + + const ElemType targetadagradavdenom = 0.0025; // 1/400 magic constant + static ElemType aggadagradsqrframes = 0; + aggadagradsqrframes = adagradkeepweight * aggadagradsqrframes + (1.0f - adagradkeepweight) * mbSize; + const ElemType targetadagradavdenom_x_sqrtadagradsqrframes = static_cast(targetadagradavdenom * sqrt(aggadagradsqrframes)); + + DISPATCH_MATRIX_ON_FLAG(&gradients, + &gradients, + SetDataLocation(CPU), + m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes); SetDataLocation(GPU), + NOT_IMPLEMENTED, + NOT_IMPLEMENTED + ); + } + template ElemType Matrix::RmsProp(Matrix& gradients, ElemType RMS_GAMMA, diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 34eac851b7f8..d848d2e174f6 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -27,41 +27,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { // there is a version down there of ColumnSlice() that abstracts the number of streams // TODO: This may not belong here, but having it in ComputeNode would require syntax changes, while having it as a member here only requires a local find-replace. Let's make it work first, then decide how to refactor. - // the looping versions of EvaluateThisNode() and ComputeInputPartial() take a frame range, through this structure - // It can cast from a size_t, i.e. those functions can be called passing a size_t in place of the FrameRange. - // TODO: GetNumParallelSequences() should be subsumed here & removed from nodes - // TODO: Where this design currently breaks: - // - BatchModeNodes must access GetNumParallelSequences(), yet operate on the whole sequence - // - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator - // - RecurrentNodes access frames with a time shift, where out-of-bounds ones access a different matrix' values - // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? - // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) - // TODO: Turns out, a FrameRange is either a whole batch or a single frame. - struct FrameRange - { - const size_t timeIdxInSeq; // start frame - const size_t samplesInRecurrentStep; // number of samples in this step --BUGBUG: this should be part of MBLayout, not FrameRange - // can construct from a single size_t -> a single-frame range - //FrameRange(size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(0)/*FIX THIS*/{} - FrameRange(size_t timeIdxInSeq, size_t samplesInRecurrentStep) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(samplesInRecurrentStep){} - // or without arguments -> entire minibatch / no frame-range - FrameRange() : timeIdxInSeq(0), samplesInRecurrentStep(SIZE_MAX) {} - // code that can only handle single-frame ranges will call t() to get the time index, which will throw if numFrames != 1 - // Some functions need just the time index, e.g. for looking up stuff in m_boundaryInfo. That's where an unscaled index is needed (as opposed to startColumn()). - size_t t() const { EnsureNotAllFrames(); return timeIdxInSeq; } - // multi-frame slice case: these two get startFrame and numFrames - size_t StartColumn() const { EnsureNotAllFrames(); return timeIdxInSeq * samplesInRecurrentStep; } - size_t NumCols() const { EnsureNotAllFrames(); return samplesInRecurrentStep; } - bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead - private: - FrameRange(const FrameRange & other);// : timeIdxInSeq(other.timeIdxInSeq), numFrames(other.numFrames) { } - void operator=(const FrameRange &); - void EnsureNotAllFrames() const - { - if (IsAllFrames()) - LogicError("FrameRange::t() called when frame range refers to whole minibatch"); - } - }; + // the looping versions of EvaluateThisNode() and ComputeInputPartial() take a frame range, through this structure + // It can cast from a size_t, i.e. those functions can be called passing a size_t in place of the FrameRange. + // TODO: GetNumParallelSequences() should be subsumed here & removed from nodes + // TODO: Where this design currently breaks: + // - BatchModeNodes must access GetNumParallelSequences(), yet operate on the whole sequence + // - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator + // - RecurrentNodes access frames with a time shift, where out-of-bounds ones access a different matrix' values + // - RecurrentNodes iterate over individual slices--need a sub-setting constructor from a FrameRange to another? + // - RecurrentNodes access boundary info with a similar pattern, but boundary info has a different #streams (namely, 1) + // TODO: Turns out, a FrameRange is either a whole batch or a single frame. + struct FrameRange + { + const size_t timeIdxInSeq; // start frame + const size_t samplesInRecurrentStep; // number of samples in this step --BUGBUG: this should be part of MBLayout, not FrameRange + // can construct from a single size_t -> a single-frame range + //FrameRange(size_t timeIdxInSeq) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(0)/*FIX THIS*/{} + FrameRange(size_t timeIdxInSeq, size_t samplesInRecurrentStep) : timeIdxInSeq(timeIdxInSeq), samplesInRecurrentStep(samplesInRecurrentStep){} + // or without arguments -> entire minibatch / no frame-range + FrameRange() : timeIdxInSeq(0), samplesInRecurrentStep(SIZE_MAX) {} + // code that can only handle single-frame ranges will call t() to get the time index, which will throw if numFrames != 1 + // Some functions need just the time index, e.g. for looking up stuff in m_boundaryInfo. That's where an unscaled index is needed (as opposed to startColumn()). + size_t t() const { EnsureNotAllFrames(); return timeIdxInSeq; } + // multi-frame slice case: these two get startFrame and numFrames + size_t StartColumn() const { EnsureNotAllFrames(); return timeIdxInSeq * samplesInRecurrentStep; } + size_t NumCols() const { EnsureNotAllFrames(); return samplesInRecurrentStep; } + bool IsAllFrames() const { return samplesInRecurrentStep == SIZE_MAX; } // if true then above functions may not be called; caller must use entire batch instead + private: + FrameRange(const FrameRange & other);// : timeIdxInSeq(other.timeIdxInSeq), numFrames(other.numFrames) { } + void operator=(const FrameRange &); + void EnsureNotAllFrames() const + { + if (IsAllFrames()) + LogicError("FrameRange::t() called when frame range refers to whole minibatch"); + } + }; enum CurrentDataLocation { @@ -212,6 +212,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: all these scalars should be passed as doubles and cast down inside void NormalGrad(Matrix& gradients, Matrix& functionValues, const ElemType learnRatePerSample, const ElemType momentum); ElemType Adagrad(Matrix& gradients, const bool needAveMultiplier); + void FSAdagrad(size_t mbSize, Matrix& gradients, Matrix& functionValues, const ElemType learnRatePerSample, const ElemType momentum); ElemType RmsProp(Matrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier); // TODO: should Reshape() return a new Matrix object that contains a reference to the original? @@ -514,20 +515,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef Matrix SingleMatrix; typedef Matrix DoubleMatrix; - - // MBLayout -- layout information of minibatch - // Currently this is to bind the two somewhat inconsistent boundary flags and packing flags. - // Once that is unified, we can clean it up further. For now, it's just moving the data members and encapsulating access to them where possible. - // This should probably also contain m_actualNumParallelSequencesInEachRecIter (which should be node-dependent). - // TODO: move this to an appropriate place and name it properly - // NOTE: This class represents an abstraction of an originally distributed/code-duped way of defining and accessing the MB layout. - // The code below represents the actual use cases I encountered. Not all are, I believe, needed to be as they are; this class could be simplified/streamlined much further. - // Some wackiness below is explained by this. - // TODO: frame-randoized MBs are now represented as one stream of many frames. This is wrong; they should be one-frame utterances with many streams. Once we fully abstract out Data access, this can be changed easily. - struct MBLayout - { - MBLayout() : m_sentenceBoundaryFlags(CPUDEVICE) { } - private: // one day... + + // MBLayout -- layout information of minibatch + // Currently this is to bind the two somewhat inconsistent boundary flags and packing flags. + // Once that is unified, we can clean it up further. For now, it's just moving the data members and encapsulating access to them where possible. + // This should probably also contain m_actualNumParallelSequencesInEachRecIter (which should be node-dependent). + // TODO: move this to an appropriate place and name it properly + // NOTE: This class represents an abstraction of an originally distributed/code-duped way of defining and accessing the MB layout. + // The code below represents the actual use cases I encountered. Not all are, I believe, needed to be as they are; this class could be simplified/streamlined much further. + // Some wackiness below is explained by this. + // TODO: frame-randoized MBs are now represented as one stream of many frames. This is wrong; they should be one-frame utterances with many streams. Once we fully abstract out Data access, this can be changed easily. + struct MBLayout + { + MBLayout() : m_sentenceBoundaryFlags(CPUDEVICE) { } + private: // one day... /// a matrix of n_stream x n_length /// n_stream is the number of streams /// n_length is the maximum lenght of each stream @@ -540,96 +541,96 @@ namespace Microsoft { namespace MSR { namespace CNTK { /// the second data stream has two sentences, with 0 indicating begining of sentences /// you may use 1 even if a sentence begins at that position, in this case, the trainer will carry over hidden states to the following /// frame. - Matrix m_sentenceBoundaryFlags; // (t,stream) - // ^^ float -> MinibatchPackingFlags, right? Or unsigned char; or change that to 'char' because Matrix already exists - // This matrix ^^ is always in CPU memory --TODO: should rather be a matrix of some int - /// conditionally point to either a pointer to that provided by network, or point to - /// an individual sentence boundary info, which happens if timeStep > 1 is required for PastValue node + Matrix m_sentenceBoundaryFlags; // (t,stream) + // ^^ float -> MinibatchPackingFlags, right? Or unsigned char; or change that to 'char' because Matrix already exists + // This matrix ^^ is always in CPU memory --TODO: should rather be a matrix of some int + /// conditionally point to either a pointer to that provided by network, or point to + /// an individual sentence boundary info, which happens if timeStep > 1 is required for PastValue node /// a matrix of 1 x n_length /// != 0 denotes the case that there exists sentence begin or no_labels case in this frame /// == 0 denotes such case is not in this frame - vector m_minibatchPackingFlags; - // ^^ This is some form of aggregate of m_sentenceBoundaryFlags taken over all streams. TODO: find out the exact condition - public: - - bool Is(size_t t, MinibatchPackingFlags f) const { return m_minibatchPackingFlags[t] & f; } - bool Is(size_t id, size_t t, MinibatchPackingFlags f) const { return ((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f; } - - // get info for one frame; used in DelayedValueNode - // TODO: clean this up, we can do this more nicely - pair, MinibatchPackingFlags> GetFrame(size_t t) const - { - return make_pair(m_sentenceBoundaryFlags.ColumnSlice(t, 1), m_minibatchPackingFlags[t]); - } - - // set a boundary flag - // This ORs the flags, i.e. it assumes that the matrix has been cleared before. - // NOTE: original code that calls this did not OR the matrix, but did OR the vector value. I visually checked that it was cleared before, but might have gotten it wrong. - void Set(size_t id, size_t t, MinibatchPackingFlags f) - { - m_sentenceBoundaryFlags.SetValue(id, t, (float)(((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) | f)); - m_minibatchPackingFlags[t] |= f; - } - // same but not ORing --TODO: is this distinction needed? - void Reset(size_t id, size_t t, MinibatchPackingFlags f) - { - m_sentenceBoundaryFlags.SetValue(id, t, (float)(int)f); - m_minibatchPackingFlags[t] |= f; - } - // needed in DelayedValueNodeBase - // TODO: this is wicked in that the matrix keeps only the NoLabel flag, while the vector keeps all (just gets ORed into) - void Mask(size_t id, size_t t, MinibatchPackingFlags f) - { - m_sentenceBoundaryFlags.SetValue(id, t, (float)(((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f)); - //m_minibatchPackingFlags[t] &= f; - } - - // for LSTMNode ony, which is deprecated, only to make it compile easily: also used in FindBestPathWithVariableLength() and FindBestPath() in a strange way - Matrix & GetM() { return m_sentenceBoundaryFlags; } - // and for DecimateMinibatchWithSentences() which should be revised - vector & GetV() { return m_minibatchPackingFlags; } - - // resize and reset all frames to None (note: this is an invalid state and must be fixed by caller afterwards) - void Resize(size_t numStreams, size_t numFrames) - { - m_sentenceBoundaryFlags.Resize(numStreams, numFrames); - m_sentenceBoundaryFlags.SetValue((float)((int)MinibatchPackingFlags::None)); - m_minibatchPackingFlags.assign(m_sentenceBoundaryFlags.GetNumCols(), MinibatchPackingFlags::None); - } - - // test a pre-condition --TODO: we only resize this thing here, so this should not be necessary in the future - void validate() const { if (m_minibatchPackingFlags.size() != m_sentenceBoundaryFlags.GetNumCols()) LogicError("MBLayout: GetSize() != GetNumTimeSteps()"); } - - // these accessors were for now just collected from actual usage; need to be cleaned up once this compiles again - size_t GetNumTimeSteps() const { validate(); return m_sentenceBoundaryFlags.GetNumCols(); } - size_t GetNumParallelSequences() const { return (m_sentenceBoundaryFlags.GetNumRows() == 0) ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix - size_t GetSize() const { validate(); return m_minibatchPackingFlags.size(); } - - // if we have no matrix/vector, this means no frame has any flag set - // We still can have a number of rows in this case. - bool IsAllNone() const { validate(); return m_minibatchPackingFlags.empty(); } - void SetAllNone() { Resize(0, 0); } - -#if 0 // we have this pattern often: - // TODO: mbSize and #slices must also move into MBLayout - evalnet->SetActualMiniBatchSize(mbSize); - dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); - evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); -#endif -#if 0 // a VERY TELLING piece of code - // packing flags = frame-wise or over all streams of start and end - for (size_t nt = 0; nt < nMBSize; nt++) - { - for (size_t ns = 0; ns < nSlices; ns++) - { - if (newBoundary(ns, nt) == ((int) MinibatchPackingFlags::SequenceStart)) - pMBLayout->m_minibatchPackingFlags[nt] |= MinibatchPackingFlags::SequenceStart; - if (newBoundary(ns, nt) == ((int) MinibatchPackingFlags::SequenceEnd)) - pMBLayout->m_minibatchPackingFlags[nt] |= MinibatchPackingFlags::SequenceEnd; - } - } -#endif - }; - typedef std::shared_ptr MBLayoutPtr; - + vector m_minibatchPackingFlags; + // ^^ This is some form of aggregate of m_sentenceBoundaryFlags taken over all streams. TODO: find out the exact condition + public: + + bool Is(size_t t, MinibatchPackingFlags f) const { return m_minibatchPackingFlags[t] & f; } + bool Is(size_t id, size_t t, MinibatchPackingFlags f) const { return ((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f; } + + // get info for one frame; used in DelayedValueNode + // TODO: clean this up, we can do this more nicely + pair, MinibatchPackingFlags> GetFrame(size_t t) const + { + return make_pair(m_sentenceBoundaryFlags.ColumnSlice(t, 1), m_minibatchPackingFlags[t]); + } + + // set a boundary flag + // This ORs the flags, i.e. it assumes that the matrix has been cleared before. + // NOTE: original code that calls this did not OR the matrix, but did OR the vector value. I visually checked that it was cleared before, but might have gotten it wrong. + void Set(size_t id, size_t t, MinibatchPackingFlags f) + { + m_sentenceBoundaryFlags.SetValue(id, t, (float)(((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) | f)); + m_minibatchPackingFlags[t] |= f; + } + // same but not ORing --TODO: is this distinction needed? + void Reset(size_t id, size_t t, MinibatchPackingFlags f) + { + m_sentenceBoundaryFlags.SetValue(id, t, (float)(int)f); + m_minibatchPackingFlags[t] |= f; + } + // needed in DelayedValueNodeBase + // TODO: this is wicked in that the matrix keeps only the NoLabel flag, while the vector keeps all (just gets ORed into) + void Mask(size_t id, size_t t, MinibatchPackingFlags f) + { + m_sentenceBoundaryFlags.SetValue(id, t, (float)(((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f)); + //m_minibatchPackingFlags[t] &= f; + } + + // for LSTMNode ony, which is deprecated, only to make it compile easily: also used in FindBestPathWithVariableLength() and FindBestPath() in a strange way + Matrix & GetM() { return m_sentenceBoundaryFlags; } + // and for DecimateMinibatchWithSentences() which should be revised + vector & GetV() { return m_minibatchPackingFlags; } + + // resize and reset all frames to None (note: this is an invalid state and must be fixed by caller afterwards) + void Resize(size_t numStreams, size_t numFrames) + { + m_sentenceBoundaryFlags.Resize(numStreams, numFrames); + m_sentenceBoundaryFlags.SetValue((float)((int)MinibatchPackingFlags::None)); + m_minibatchPackingFlags.assign(m_sentenceBoundaryFlags.GetNumCols(), MinibatchPackingFlags::None); + } + + // test a pre-condition --TODO: we only resize this thing here, so this should not be necessary in the future + void validate() const { if (m_minibatchPackingFlags.size() != m_sentenceBoundaryFlags.GetNumCols()) LogicError("MBLayout: GetSize() != GetNumTimeSteps()"); } + + // these accessors were for now just collected from actual usage; need to be cleaned up once this compiles again + size_t GetNumTimeSteps() const { validate(); return m_sentenceBoundaryFlags.GetNumCols(); } + size_t GetNumParallelSequences() const { return (m_sentenceBoundaryFlags.GetNumRows() == 0) ? 1 : m_sentenceBoundaryFlags.GetNumRows(); } // 1 stream if no matrix + size_t GetSize() const { validate(); return m_minibatchPackingFlags.size(); } + + // if we have no matrix/vector, this means no frame has any flag set + // We still can have a number of rows in this case. + bool IsAllNone() const { validate(); return m_minibatchPackingFlags.empty(); } + void SetAllNone() { Resize(0, 0); } + +#if 0 // we have this pattern often: + // TODO: mbSize and #slices must also move into MBLayout + evalnet->SetActualMiniBatchSize(mbSize); + dataReader->CopyMBLayoutTo(evalnet->GetMBLayoutPtr()); + evalnet->VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences()); +#endif +#if 0 // a VERY TELLING piece of code + // packing flags = frame-wise or over all streams of start and end + for (size_t nt = 0; nt < nMBSize; nt++) + { + for (size_t ns = 0; ns < nSlices; ns++) + { + if (newBoundary(ns, nt) == ((int) MinibatchPackingFlags::SequenceStart)) + pMBLayout->m_minibatchPackingFlags[nt] |= MinibatchPackingFlags::SequenceStart; + if (newBoundary(ns, nt) == ((int) MinibatchPackingFlags::SequenceEnd)) + pMBLayout->m_minibatchPackingFlags[nt] |= MinibatchPackingFlags::SequenceEnd; + } + } +#endif + }; + typedef std::shared_ptr MBLayoutPtr; + }}} From 5d1c0ce0416a892e08d3f62b28fba2e0e36cb569 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Wed, 16 Sep 2015 15:46:09 -0700 Subject: [PATCH 31/44] Merged FSAdaGrad with lastest master changes --- MachineLearning/CNTKSGDLib/SGD.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MachineLearning/CNTKSGDLib/SGD.h b/MachineLearning/CNTKSGDLib/SGD.h index c27cd405c4c5..cbf3717cd891 100644 --- a/MachineLearning/CNTKSGDLib/SGD.h +++ b/MachineLearning/CNTKSGDLib/SGD.h @@ -43,7 +43,8 @@ enum class GradientsUpdateType : int { None, AdaGrad, - RmsProp + RmsProp, + FSAdaGrad }; // TODO: While currently combining these methods is not supported, From 87e6adaef19f96e84e305aefb830bf5f2923cba5 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Mon, 21 Sep 2015 14:48:31 -0700 Subject: [PATCH 32/44] Fix merge issues. --- MachineLearning/CNTKSGDLib/SGD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MachineLearning/CNTKSGDLib/SGD.cpp b/MachineLearning/CNTKSGDLib/SGD.cpp index 6bcaa2b9bd61..5133bca2d77b 100644 --- a/MachineLearning/CNTKSGDLib/SGD.cpp +++ b/MachineLearning/CNTKSGDLib/SGD.cpp @@ -2402,7 +2402,7 @@ template } else if (adpType == GradientsUpdateType::FSAdaGrad) { - smoothedGradient.FSAdagrad(actualMBSize, gradientValues, functionValues, learnRatePerSample, momentum); + smoothedGradient.FSAdagrad(actualMBSize, gradientValues, functionValues, (ElemType)learnRatePerSample, (ElemType)momentum); } else if (adpType == GradientsUpdateType::RmsProp) { From 188f392d9ae987b310d5605409956d1f9c439ee2 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 16:08:47 -0700 Subject: [PATCH 33/44] renamed m_nodesReqMultiSeqHandling to m_requestNodesMultiSeqHandling, likewise SetRequestNodesMultiSeqHandling(), to indicate that this is a user request (as opposed to req-uired) --- MachineLearning/CNTK/ModelEditLanguage.cpp | 2 +- MachineLearning/CNTK/NDLUtil.h | 2 +- .../CNTK/SynchronousExecutionEngine.h | 2 +- .../ComputationNetwork.cpp | 18 ++++++++++-------- .../ComputationNetwork.h | 16 ++++++++-------- .../ComputationNode.h | 12 ++++++++---- .../InputAndParamNodes.h | 2 +- .../NetworkBuilderFromConfig.cpp | 2 +- .../CNTKComputationNetworkLib/RecurrentNodes.h | 5 ++--- .../TrainingCriterionNodes.h | 6 +++--- Math/Math/Matrix.h | 4 ++-- 11 files changed, 38 insertions(+), 33 deletions(-) diff --git a/MachineLearning/CNTK/ModelEditLanguage.cpp b/MachineLearning/CNTK/ModelEditLanguage.cpp index 6c700133ebf1..ed4b2ed234ad 100644 --- a/MachineLearning/CNTK/ModelEditLanguage.cpp +++ b/MachineLearning/CNTK/ModelEditLanguage.cpp @@ -472,7 +472,7 @@ void MELScript::CallFunction(const std::string& p_name, const ConfigPa case melPropMultiSeqHandling: { bool set = params[2]; - SetProperty(node, cn->NodesReqMultiSeqHandling(), set); + SetProperty(node, cn->RequestNodesMultiSeqHandling(), set); break; } case melPropEvaluation: diff --git a/MachineLearning/CNTK/NDLUtil.h b/MachineLearning/CNTK/NDLUtil.h index e0a7dd1b47cb..b4435f2dd812 100644 --- a/MachineLearning/CNTK/NDLUtil.h +++ b/MachineLearning/CNTK/NDLUtil.h @@ -178,7 +178,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CheckOutputNodes(script, "FeatureNodes", m_net->FeatureNodes()); CheckOutputNodes(script, "LabelNodes", m_net->LabelNodes()); CheckOutputNodes(script, "CriteriaNodes", m_net->FinalCriterionNodes()); - CheckOutputNodes(script, "NodesReqMultiSeqHandling", m_net->NodesReqMultiSeqHandling()); + CheckOutputNodes(script, "NodesReqMultiSeqHandling", m_net->RequestNodesMultiSeqHandling()); CheckOutputNodes(script, "EvalNodes", m_net->EvaluationNodes()); CheckOutputNodes(script, "OutputNodes", m_net->OutputNodes()); } diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index 195c57643d61..43f82c38c436 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -288,7 +288,7 @@ class SynchronousNodeEvaluator : public NDLNodeEvaluator } else if (!_stricmp(value.c_str(), "multiseq")) { - SetOutputNode(m_net.NodesReqMultiSeqHandling(), compNode); + SetOutputNode(m_net.RequestNodesMultiSeqHandling(), compNode); } else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters { diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index cbcb2ed1ee95..eb81d56579b7 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -126,9 +126,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ECriteriaNodes"); fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BNodesReqMultiSeqHandling"); - fstream << m_nodesReqMultiSeqHandling.size(); - for (size_t i = 0; iNodeName(); + fstream << m_requestNodesMultiSeqHandling.size(); + for (size_t i = 0; iNodeName(); fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ENodesReqMultiSeqHandling"); fstream.PutMarker(FileMarker::fileMarkerBeginSection, L"BEvalNodes"); @@ -326,11 +326,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { return false; } + // Some nodes always need m_reqMultiSeqHandling; those set it themselves. Basically RecurrentNode only currently (besides PairNode and LSTMNode). + // Some nodes need it to be set xxx. // TODO: comment on who owns this flag. Is it entirely owned by Network? // Or should the 4 node types below know? - void ComputationNetwork::SetNodesReqMultiSeqHandling() + void ComputationNetwork::SetRequestNodesMultiSeqHandling() { - for (auto & node : m_nodesReqMultiSeqHandling) + for (auto & node : m_requestNodesMultiSeqHandling) // this set is defined in NDL; here we propagate that into the actual nodes' flags, except for a few where it makes no sense (avoid user error) { //SumElements node will generate a scalar value and so it should never require special handling //TransposeNode will change the size of columns and so it should also not included for special handling @@ -1018,7 +1020,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i = 0; i> nodeName; - m_nodesReqMultiSeqHandling.push_back(GetNodeFromName(nodeName)); + m_requestNodesMultiSeqHandling.push_back(GetNodeFromName(nodeName)); } fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodesReqMultiSeqHandling"); } @@ -1160,7 +1162,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // critera fstream << FormSpecialNodes(dotcfg.m_CriteriaStyle, m_finalCriteria); // nodes that requires multi sequence handling - fstream << FormSpecialNodes(dotcfg.m_nodesReqMultiSeqHandlingStyle, m_nodesReqMultiSeqHandling); + fstream << FormSpecialNodes(dotcfg.m_nodesReqMultiSeqHandlingStyle, m_requestNodesMultiSeqHandling); // pre-compute nodes fstream << FormSpecialNodes(dotcfg.m_PrecomputingNodeStyle, PreComputedNodes); // PastValue nodes @@ -1203,7 +1205,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { line.clear(); for (const auto & x : m_finalCriteria) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); - for (const auto & x : m_nodesReqMultiSeqHandling) + for (const auto & x : m_requestNodesMultiSeqHandling) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); for (const auto & x : m_outputNodes) line = line + msra::strfun::wstrprintf(L"\"%ls\" ", x->GetName().c_str()); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index afe489328c04..b679dca2b110 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -529,7 +529,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb bool IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr); - void SetNodesReqMultiSeqHandling(); + void SetRequestNodesMultiSeqHandling(); // MAIN ENTRY POINT for evaluation (forward prop) // TODO: pass a set of nodes instead of only one @@ -876,10 +876,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb return std::vector { node }; } - inline std::vector & NodesReqMultiSeqHandling() { return m_nodesReqMultiSeqHandling; } - inline std::vector & EvaluationNodes() { return m_evalNodes; } - inline std::vector & OutputNodes() { return m_outputNodes; } - inline std::vector & PairNodes() { return m_pairNodes; } + inline std::vector & RequestNodesMultiSeqHandling() { return m_requestNodesMultiSeqHandling; } // user-specified list 'NodesReqMultiSeqHandling' (NDL and MEL create/modify this list) + inline std::vector & EvaluationNodes() { return m_evalNodes; } + inline std::vector & OutputNodes() { return m_outputNodes; } + inline std::vector & PairNodes() { return m_pairNodes; } inline std::vector & RecurrentNodes() { return m_recurrentInfo; } @@ -1181,7 +1181,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb CollectInputAndLearnableParameters(rootNode); // - SetNodesReqMultiSeqHandling(); + SetRequestNodesMultiSeqHandling(); } //this function will need to be called before actual validation and execution to @@ -1549,10 +1549,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb std::vector m_evalNodes; std::vector m_outputNodes; std::vector m_pairNodes; /// nodes for the children network to pair - std::vector m_nodesReqMultiSeqHandling; + std::vector m_requestNodesMultiSeqHandling; vector*> GetAllNodeGroups() // get all groups to allow to iterate over all of them ...continue { - return vector*> { &m_features, &m_labels, &m_finalCriteria, &m_evalNodes, &m_outputNodes, &m_pairNodes, &m_nodesReqMultiSeqHandling }; + return vector*> { &m_features, &m_labels, &m_finalCriteria, &m_evalNodes, &m_outputNodes, &m_pairNodes, &m_requestNodesMultiSeqHandling }; } std::vector m_recurrentInfo; // [index--TODO: comment what this is indexed with] diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 71e06618c46e..47e798ef783c 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -625,6 +625,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { DEVICEID_TYPE m_deviceId; //CPU=-1, >=0 GPU bool m_needGradient; //only used for leaf, i.e., learnable parameters, etc. bool m_reqMultiSeqHandling; // indicates whether the results of operation should be masked to handle the cases that the utterances have different lengths when grouped together as a minibatch. + // ^^ This decides whether the node gets passed the full layout with flags or only the one without flags + // and this is only ever tested in MaskToZeroWhenLabelAndFeatureMissing(), of which two versions exist, one in ComputationNode and one in ClassBasedCrossEntropyWithSoftmaxNode + // TODO: rename this to reflect that it affects only masking size_t m_inputWidth, m_inputHeight, m_inputChannels; //how to interpret each column in the input as an image size_t m_outputWidth, m_outputHeight, m_outputChannels; //how to interpret each column in the output as an image @@ -806,7 +809,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { EvaluateThisNode(); // this is a call to the virtual function that implements the actual operation - if (!UseCustomizedMultiSeqHandling()) + if (!UseCustomizedMultiSeqHandling()) // this means the node does it by itself; if not, we do it for the node MaskToZeroWhenLabelAndFeatureMissing(m_functionValues); } @@ -857,7 +860,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed - if (m_pMBLayout && !m_pMBLayout->IsAllNone()) + if (!m_pMBLayout->IsAllNone()) { size_t nT = matrixToBeMasked.GetNumCols(); size_t nS = m_pMBLayout->GetNumParallelSequences(); @@ -871,10 +874,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { { size_t t = utt_t / nS; - if (m_pMBLayout->Is(t, MinibatchPackingFlags::NoLabel)) + if (m_pMBLayout->Is(t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeatures)) { for (size_t id = 0; id < nS; id++) - if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel)) + if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeatures)) matrixToBeMasked.ColumnSlice(utt_t+id, 1).SetValue(0); processedExistsNoLabelorFeatureMissing = true; } @@ -1212,6 +1215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void SetErrorsFromFutureMinibatch(Matrix&) {} // indicatess whether special handling is needed.The standard handleing will be just mask the function values after the evalaution and mask the gradient before gradiant computation for the children. this is not valid for all criterion nodes whose result is a scalar. + // defined by training/eval criteria (and the soon-to-be-deprecated PairNode, LSTMNode) virtual bool UseCustomizedMultiSeqHandling() { return false; } protected: diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index aa29b045369a..8913a71f9f94 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -530,7 +530,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef ComputationNode Base; UsingComputationNodeMembers; void Init(size_t row_size, size_t col_size) { - m_reqMultiSeqHandling = true; + SetReqMultiSeqHandlingTo(true); m_functionValues.Resize(row_size, col_size); } public: diff --git a/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp b/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp index d2d04b7ad0d7..c20b83c758e4 100644 --- a/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp @@ -677,7 +677,7 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects { else if (!_wcsnicmp(tag.c_str(), L"eval", 4)) net->EvaluationNodes().push_back(node); // eval* else if (tag == L"output") net->OutputNodes().push_back(node); else if (tag == L"pair") net->PairNodes().push_back(node); // TODO: I made this up; the original code in SynchronousExecutionEngine did not have this - else if (tag == L"multiseq") net->NodesReqMultiSeqHandling().push_back(node); + else if (tag == L"multiseq") net->RequestNodesMultiSeqHandling().push_back(node); else if (!tag.empty()) RuntimeError("ComputationNetwork: unknown tag '%ls'", tag.c_str()); // TODO: are there nodes without tag? Where do they go? diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 9cdf2f4accd4..cba78b3c271d 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -36,7 +36,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { private: void Init(size_t row_size, size_t col_size, ElemType initialActivationValue = (ElemType)DEFAULT_HIDDEN_ACTIVATION) { - m_reqMultiSeqHandling = true; + SetReqMultiSeqHandlingTo(true); m_initialActivationValue = initialActivationValue; m_timeStep = 1; m_functionValues.Resize(row_size, col_size); @@ -517,7 +517,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_use_errors_from_future_minibatch(false), m_DefaultState((ElemType)DEFAULT_HIDDEN_ACTIVATION) { - m_reqMultiSeqHandling = true; + SetReqMultiSeqHandlingTo(true); } virtual const std::wstring OperationName() const { return TypeName(); } @@ -561,7 +561,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { node->m_use_errors_from_future_minibatch = m_use_errors_from_future_minibatch; node->m_DefaultState = m_DefaultState; - node->m_reqMultiSeqHandling = m_reqMultiSeqHandling; } } diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index ab04316ca667..c0a64c77743d 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -1043,7 +1043,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix softMax_t = softMax.ColumnSlice(sz, nbr_wrd); Matrix logSoftMax_t = logSoftmax.ColumnSlice(sz, nbr_wrd); - if (curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t) == false) + if (!curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t)) { Matrix obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1 obs.Reshape(1, nRow); /// 1 x 200 @@ -1065,7 +1065,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } /// add the class log posterior probability - if (curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t) == false) + if (!curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t)) { try{ Matrix::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0); @@ -1095,7 +1095,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed - if (m_pMBLayout && !m_pMBLayout->IsAllNone()) + if (!m_pMBLayout->IsAllNone()) { // 't' is not a time but rather a column index that encodes (time stamp, stream) size_t nS = m_pMBLayout->GetNumParallelSequences(); diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 75c4798eb181..0185c4ebebe9 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -503,8 +503,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { // ^^ This is some form of aggregate of m_sentenceBoundaryFlags taken over all streams. TODO: find out the exact condition public: - bool Is(size_t t, MinibatchPackingFlags f) const { return m_minibatchPackingFlags[t] & f; } - bool Is(size_t id, size_t t, MinibatchPackingFlags f) const { return ((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f; } + bool Is(size_t t, MinibatchPackingFlags f) const { return (m_minibatchPackingFlags[t] & f) != 0; } + bool Is(size_t id, size_t t, MinibatchPackingFlags f) const { return (((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f) != 0; } // get info for one frame; used in DelayedValueNode // TODO: clean this up, we can do this more nicely From b72a1eb9ee07662d78330bda966b7e62a28a20d9 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 16:47:41 -0700 Subject: [PATCH 34/44] renamed MaskToZeroWhenLabelAndFeatureMissing() to MaskMissingColumnsToZero() and commented what it does; and merged it with ClassBasedCrossEntropyWithSoftmaxNode::MaskToZeroWhenLabelAndFeatureMissing() which was nearly identical --- .../ComputationNode.h | 47 +++++++++++-------- .../EvaluationCriterionNodes.h | 4 +- .../TrainingCriterionNodes.h | 40 ++++++++-------- Math/Math/Matrix.h | 1 + 4 files changed, 51 insertions(+), 41 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 47e798ef783c..09575913da3e 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -127,7 +127,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // evaluate only N frames at time index timeIdxInSeq // Normally, N is 1 or it spans the entire minibatch. virtual void EvaluateThisNode(const FrameRange &) = 0; - // evaluate a node--this calls EvaluateThisNode() and MaskToZeroWhenLabelAndFeatureMissing() if needed + // evaluate a node--this calls EvaluateThisNode() and MaskMissingColumnsToZero() if needed // this is the main entry point for Network; while EvaluateThisNode() is the virtual call into specific node implementation virtual void EvaluateThisNodeGivenInputs() = 0; virtual void EvaluateThisNodeGivenInputs(const size_t timeIdxInSeq) = 0; // TODO: change to FrameRange as well @@ -626,7 +626,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { bool m_needGradient; //only used for leaf, i.e., learnable parameters, etc. bool m_reqMultiSeqHandling; // indicates whether the results of operation should be masked to handle the cases that the utterances have different lengths when grouped together as a minibatch. // ^^ This decides whether the node gets passed the full layout with flags or only the one without flags - // and this is only ever tested in MaskToZeroWhenLabelAndFeatureMissing(), of which two versions exist, one in ComputationNode and one in ClassBasedCrossEntropyWithSoftmaxNode + // and this is only ever tested in MaskMissingColumnsToZero(), of which two versions exist, one in ComputationNode and one in ClassBasedCrossEntropyWithSoftmaxNode // TODO: rename this to reflect that it affects only masking size_t m_inputWidth, m_inputHeight, m_inputChannels; //how to interpret each column in the input as an image size_t m_outputWidth, m_outputHeight, m_outputChannels; //how to interpret each column in the output as an image @@ -810,7 +810,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { EvaluateThisNode(); // this is a call to the virtual function that implements the actual operation if (!UseCustomizedMultiSeqHandling()) // this means the node does it by itself; if not, we do it for the node - MaskToZeroWhenLabelAndFeatureMissing(m_functionValues); + MaskMissingColumnsToZero(m_functionValues); } // TODO: use a FrameRange arg, then unify with above @@ -821,7 +821,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { EvaluateThisNode(FrameRange(timeIdxInSeq, GetNumParallelSequences())); if (!UseCustomizedMultiSeqHandling()) - MaskToZeroWhenLabelAndFeatureMissing(m_functionValues, timeIdxInSeq); + MaskMissingColumnsToZero(m_functionValues, timeIdxInSeq); } #if 0 // (this function cannot be used currently since sentenceBegin is not a Matrix anymore; only affects LSTMNode which is no longer used) @@ -855,30 +855,37 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labels */ - // TODO: use a FrameRange instead of timeIdxSeq - bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t timeIdxInSeq=(size_t)-1) const + // This sets MB columns to 0 that have the NoLabel or NoFeature flag set. + // This happens as a result of packing multiple sequences for parallel processing--there will be some gaps, which are flagged by these flags. + // Nodes that operate in 'map' style (input(j) -> output(j) independently) can ignore this; it will be garbage-in-garbage-out. + // However, nodes that 'reduce' minibatches (e.g. computing the sum of all frames across all sequences) must deal with the garbage. + // This function sets those to 0, assuming that now they can be reduced without affecting the result. + // This function can operate on the whole range or on a selected single frame and/or a single sequence. + bool MaskMissingColumnsToZero(Matrix& matrixToBeMasked, size_t timeIdxInSeq = SIZE_MAX, size_t seqIndex = SIZE_MAX) const { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed if (!m_pMBLayout->IsAllNone()) { - size_t nT = matrixToBeMasked.GetNumCols(); + size_t nT = m_pMBLayout->GetNumTimeSteps(); size_t nS = m_pMBLayout->GetNumParallelSequences(); - if (m_pMBLayout->GetSize() != nT / nS) - LogicError("MaskToZeroWhenLabelAndFeatureMissing: m_pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. "); + if (matrixToBeMasked.GetNumCols() != nT * nS) + LogicError("MaskMissingColumnsToZero: m_pMBLayout->m_minibatchPackingFlags should have one element for each timestep of all streams. Check feature reader. "); - size_t startT = (timeIdxInSeq == (size_t)-1) ? 0 : timeIdxInSeq * nS; // TODO: misnomer; startT, endT, and utt_t are not times but columns in the packed matrix - size_t endT = (timeIdxInSeq == (size_t)-1) ? nT : timeIdxInSeq * nS + nS; - for (size_t utt_t = startT; utt_t < endT; utt_t += nS) - { - size_t t = utt_t / nS; + size_t startT = (timeIdxInSeq == SIZE_MAX) ? 0 : timeIdxInSeq; + size_t endT = (timeIdxInSeq == SIZE_MAX) ? nT : timeIdxInSeq + 1; + + size_t startS = (seqIndex == SIZE_MAX) ? 0 : seqIndex; + size_t endS = (seqIndex == SIZE_MAX) ? nS : seqIndex + 1; - if (m_pMBLayout->Is(t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeatures)) + for (size_t t = startT; t < endT; t++) + { + if (m_pMBLayout->Is(t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeature)) { - for (size_t id = 0; id < nS; id++) - if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeatures)) - matrixToBeMasked.ColumnSlice(utt_t+id, 1).SetValue(0); + for (size_t id = startS; id < endS; id++) + if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeature)) + matrixToBeMasked.ColumnSlice(t * nS + id, 1).SetValue(0); processedExistsNoLabelorFeatureMissing = true; } } @@ -1050,7 +1057,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i=0; iNeedGradient()) @@ -1079,7 +1086,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i=0; iNeedGradient()) diff --git a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h index a60ea5c604e1..a765ca6d4a8c 100644 --- a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h @@ -48,8 +48,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { { inputFunctionValues0.VectorMax(maxIndexes0, maxValues, true); inputFunctionValues1.VectorMax(maxIndexes1, maxValues, true); - curNode->MaskToZeroWhenLabelAndFeatureMissing(maxIndexes0); //we are fine since it will only be called with full minibatch - curNode->MaskToZeroWhenLabelAndFeatureMissing(maxIndexes1); + curNode->MaskMissingColumnsToZero(maxIndexes0); //we are fine since it will only be called with full minibatch + curNode->MaskMissingColumnsToZero(maxIndexes1); functionValues.AssignNumOfDiff(maxIndexes0, maxIndexes1); #if NANCHECK functionValues.HasNan("ErrorPrediction"); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index c0a64c77743d..a13126e33628 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -60,7 +60,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const Matrix& inputFunctionValues0, const Matrix& inputFunctionValues1, Matrix& leftMinusRight, ComputationNodePtr curNode) { leftMinusRight.AssignDifferenceOf(inputFunctionValues0, inputFunctionValues1); - curNode->MaskToZeroWhenLabelAndFeatureMissing(leftMinusRight); //we are fine since it will only be called with full minibatch. + curNode->MaskMissingColumnsToZero(leftMinusRight); //we are fine since it will only be called with full minibatch. ElemType v = leftMinusRight.FrobeniusNorm(); functionValues.Resize(1,1); functionValues.SetValue(v*v/2); @@ -174,7 +174,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { else { ComputeInputPartialRight(m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues()); - Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(inputIndex)->GradientValues()); + Base::MaskMissingColumnsToZero(Inputs(inputIndex)->GradientValues()); } } @@ -221,7 +221,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { logSoftmaxOfRight.AssignLogSoftmaxOf(inputFunctionValues1, true); softmaxOfRight.SetValue(logSoftmaxOfRight); softmaxOfRight.InplaceExp(); - curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftmaxOfRight); //we are fine here since it will be called only with full minibatch + curNode->MaskMissingColumnsToZero(logSoftmaxOfRight); //we are fine here since it will be called only with full minibatch functionValues.AssignInnerProductOfMatrices(inputFunctionValues0, logSoftmaxOfRight); functionValues*=(-1); #if NANCHECK @@ -363,7 +363,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix& inputGradientValues, const Matrix& gradientValues, ComputationNodePtr curNode) { leftDivRight.AssignElementDivisionOf(inputFunctionValues0, inputFunctionValues1); - curNode->MaskToZeroWhenLabelAndFeatureMissing(leftDivRight); + curNode->MaskMissingColumnsToZero(leftDivRight); Matrix::ScaleAndAdd(-gradientValues.Get00Element(), leftDivRight, inputGradientValues); } @@ -377,7 +377,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { logOfRight.SetValue(inputFunctionValues1); logOfRight.InplaceLog(); - curNode->MaskToZeroWhenLabelAndFeatureMissing(logOfRight); + curNode->MaskMissingColumnsToZero(logOfRight); functionValues.AssignInnerProductOfMatrices(inputFunctionValues0, logOfRight); functionValues*=(-1); #if NANCHECK @@ -503,7 +503,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void EvaluateThisNode() { - Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); + Base::MaskMissingColumnsToZero(Inputs(0)->FunctionValues()); EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues()); } @@ -599,7 +599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void EvaluateThisNode() { - Base::MaskToZeroWhenLabelAndFeatureMissing(Inputs(0)->FunctionValues()); + Base::MaskMissingColumnsToZero(Inputs(0)->FunctionValues()); EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues()); } @@ -1043,7 +1043,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix softMax_t = softMax.ColumnSlice(sz, nbr_wrd); Matrix logSoftMax_t = logSoftmax.ColumnSlice(sz, nbr_wrd); - if (!curNode->MaskToZeroWhenLabelAndFeatureMissing(logSoftMax_t, t)) + if (!curNode->MaskMissingColumnsToZero(logSoftMax_t, t)) { Matrix obs = inputs.ColumnSlice(t, 1); /// e.g., 200 x 1 obs.Reshape(1, nRow); /// 1 x 200 @@ -1065,7 +1065,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } /// add the class log posterior probability - if (!curNode->MaskToZeroWhenLabelAndFeatureMissing(clsLogSoftmax, t)) + if (!curNode->MaskMissingColumnsToZero(clsLogSoftmax, t)) { try{ Matrix::AddElementToElement(clsLogSoftmax, c_t, t, functionValues, 0, 0); @@ -1090,28 +1090,30 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** reset to error signals to 0 for any elements without labels */ - // TODO: This has overlap with ComputationNode::MaskToZeroWhenLabelAndFeatureMissing(), should call that instead. Note: This one does only one stream, while Base:: one does all streams. - bool MaskToZeroWhenLabelAndFeatureMissing(Matrix& matrixToBeMasked, const size_t t) const - { + // BUGBUG: the layout should be that of matrixToBeMasked, not of 'this' + bool MaskMissingColumnsToZero(Matrix& matrixToBeMasked, const size_t j) const + { + size_t nS = m_pMBLayout->GetNumParallelSequences(); + size_t t = j / nS; // this is the time stamp + size_t id = j % nS; // this is the stream + return Base::MaskMissingColumnsToZero(matrixToBeMasked, t, id); +#if 0 // old version prior to merging with Base version bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed if (!m_pMBLayout->IsAllNone()) { - // 't' is not a time but rather a column index that encodes (time stamp, stream) - size_t nS = m_pMBLayout->GetNumParallelSequences(); - size_t j = t / nS; // this is the time stamp - size_t i = t % nS; // this is the stream - if (m_pMBLayout->Is(j, MinibatchPackingFlags::NoLabel)) // TODO: this outer test is redundant here, no? + if (m_pMBLayout->Is(t, MinibatchPackingFlags::NoLabel)) // TODO: this outer test is redundant here, no? { - if (m_pMBLayout->Is(i, j, MinibatchPackingFlags::NoLabel)) + if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel)) { - matrixToBeMasked.ColumnSlice(t,1).SetValue(0); + matrixToBeMasked.ColumnSlice(t * nS + id,1).SetValue(0); processedExistsNoLabelorFeatureMissing = true; } } } return processedExistsNoLabelorFeatureMissing; +#endif } /** diff --git a/Math/Math/Matrix.h b/Math/Math/Matrix.h index 0185c4ebebe9..75aba9597a1d 100644 --- a/Math/Math/Matrix.h +++ b/Math/Math/Matrix.h @@ -504,6 +504,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { public: bool Is(size_t t, MinibatchPackingFlags f) const { return (m_minibatchPackingFlags[t] & f) != 0; } + // TODO: swap id and t; t is the more important parameter bool Is(size_t id, size_t t, MinibatchPackingFlags f) const { return (((MinibatchPackingFlags)(int)m_sentenceBoundaryFlags(id, t)) & f) != 0; } // get info for one frame; used in DelayedValueNode From c17079ed5f1bb2bfacfbd287114bcecc1cbaa59d Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 17:18:09 -0700 Subject: [PATCH 35/44] more renaming for clarity regarding masking: SetReqMultiSeqHandlingTo(true) -> SetMaskMissingColumnsToZero(), ReqMultiSeqHandling() -> NeedToMaskMissingColumnsToZero(), likewise m_maskMissingColumnsToZero --- .../ComputationNetwork.cpp | 13 ++++++------- .../ComputationNetwork.h | 6 +++++- .../ComputationNode.h | 19 +++++++++++-------- .../InputAndParamNodes.h | 2 +- .../RecurrentNodes.h | 4 ++-- 5 files changed, 25 insertions(+), 19 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index eb81d56579b7..42c73d02263a 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -326,10 +326,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { return false; } - // Some nodes always need m_reqMultiSeqHandling; those set it themselves. Basically RecurrentNode only currently (besides PairNode and LSTMNode). - // Some nodes need it to be set xxx. - // TODO: comment on who owns this flag. Is it entirely owned by Network? - // Or should the 4 node types below know? + // transfer user-specified request for masking to the indivudal nodes + // This is only needed if users explicitly perform reduce-like operations. + // It makes no sense for some nodes, so we skip those. void ComputationNetwork::SetRequestNodesMultiSeqHandling() { for (auto & node : m_requestNodesMultiSeqHandling) // this set is defined in NDL; here we propagate that into the actual nodes' flags, except for a few where it makes no sense (avoid user error) @@ -342,18 +341,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { node->OperationName() != OperationNameOf(MeanNode) && node->OperationName() != OperationNameOf(InvStdDevNode) ) - node->SetReqMultiSeqHandlingTo(true); + node->SetMaskMissingColumnsToZero(); } //if a typical criterion node is used as the training criterion node we assume it requires multiseq handling //this is for backward compatibility for (auto & node : m_finalCriteria) if (IsTypicalCriterionNode(node)) - node->SetReqMultiSeqHandlingTo(true); + node->SetMaskMissingColumnsToZero(); for (auto & node : m_evalNodes) if (IsTypicalCriterionNode(node)) - node->SetReqMultiSeqHandlingTo(true); + node->SetMaskMissingColumnsToZero(); } template void ComputationNetwork::GetNodesRequiringX(std::list & nodesRequirePreComputation, const ComputationNodeBasePtr rootNode, bool checkComputed) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index b679dca2b110..7606784006ed 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -562,7 +562,11 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // TODO: in the future, these will be different on different nodes; and probably should be propagated by nodes themselves, like functionValues for (auto nodeIter = allNodes.begin(); nodeIter != allNodes.end(); nodeIter++) { - if ((*nodeIter)->ReqMultiSeqHandling()) + // TODO: we should just always set the real layout; the nodes themselves should know to ignore it based on NeedToMaskMissingColumnsToZero() + // MaskMissingColumnsToZero() will test whether the layout is all none, and then skip. + // This is the only place where ResetBound() is ever called on a node. Hence, we could test NeedToMaskMissingColumnsToZero() instead. + // Note that NeedToMaskMissingColumnsToZero() is true only where it is necessary; that is, most node have it set to false (since most nodes can just map garbage-in-garbage-out). + if ((*nodeIter)->NeedToMaskMissingColumnsToZero()) (*nodeIter)->ResetBound(m_pMBLayout); else (*nodeIter)->ResetBound(m_pMBNoLayout); diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 09575913da3e..887da5e18abe 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -79,7 +79,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_indexInLoop(0), m_visited(false), m_inStack(false), - m_reqMultiSeqHandling(false), + m_maskMissingColumnsToZero(false), m_nodeName(name == L"" ? CreateUniqNodeName() : name) { } @@ -279,8 +279,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { bool& NeedGradient() { return m_needGradient; } const bool& NeedGradient() const { return m_needGradient; } - void SetReqMultiSeqHandlingTo(const bool v) { m_reqMultiSeqHandling = v; } - bool ReqMultiSeqHandling() const { return m_reqMultiSeqHandling; } + void SetMaskMissingColumnsToZero() { m_maskMissingColumnsToZero = true; } + bool NeedToMaskMissingColumnsToZero() const { return m_maskMissingColumnsToZero; } void InitRecurrentNode() // this initialization says that this node is not inside a loop { @@ -624,10 +624,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { DEVICEID_TYPE m_deviceId; //CPU=-1, >=0 GPU bool m_needGradient; //only used for leaf, i.e., learnable parameters, etc. - bool m_reqMultiSeqHandling; // indicates whether the results of operation should be masked to handle the cases that the utterances have different lengths when grouped together as a minibatch. + bool m_maskMissingColumnsToZero; // indicates whether the results of operation should be masked to handle the cases that the utterances have different lengths when grouped together as a minibatch. // ^^ This decides whether the node gets passed the full layout with flags or only the one without flags // and this is only ever tested in MaskMissingColumnsToZero(), of which two versions exist, one in ComputationNode and one in ClassBasedCrossEntropyWithSoftmaxNode - // TODO: rename this to reflect that it affects only masking + // Pertinent reduction operations (criterion nodes and gradient computation) always perform masking. + // Hence, this flag is only needed for special use cases where regular matrix ops are used for a 'reduce' operation. size_t m_inputWidth, m_inputHeight, m_inputChannels; //how to interpret each column in the input as an image size_t m_outputWidth, m_outputHeight, m_outputChannels; //how to interpret each column in the output as an image @@ -861,6 +862,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { // However, nodes that 'reduce' minibatches (e.g. computing the sum of all frames across all sequences) must deal with the garbage. // This function sets those to 0, assuming that now they can be reduced without affecting the result. // This function can operate on the whole range or on a selected single frame and/or a single sequence. + // It is indirectly guarded by the m_maskMissingColumnsToZero flag, which, if false, will install a layout with IsAllNone() to be true. TODO: we better always install the same layout, and instead test m_maskMissingColumnsToZero here. + // Note that existing 'reduce' style operations--the criterion nodes and gradient computation--already call this. bool MaskMissingColumnsToZero(Matrix& matrixToBeMasked, size_t timeIdxInSeq = SIZE_MAX, size_t seqIndex = SIZE_MAX) const { bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed @@ -1200,7 +1203,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { node->m_functionValues = m_functionValues; node->m_gradientValues = m_gradientValues; - node->m_reqMultiSeqHandling = m_reqMultiSeqHandling; + node->m_maskMissingColumnsToZero = m_maskMissingColumnsToZero; } } @@ -1297,8 +1300,8 @@ protected: \ using Base::m_visitedOrder; using Base::m_index; using Base::m_lowLink; using Base::m_visited; using Base::m_inStack; \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ - using Base::m_reqMultiSeqHandling; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ - using Base::DataSlice; using Base::VALUE; using Base::GRADIENT; \ + using Base::m_maskMissingColumnsToZero; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ + using Base::DataSlice; using Base::ValueSlice; using Base::GradientSlice; using Base::SetMaskMissingColumnsToZero; \ using Base::m_children; using Base::m_deviceId; using Base::m_evalTimeStamp; using Base::m_functionValues; using Base::m_gradientValues; \ using Base::m_inputChannels; using Base::m_inputHeight; using Base::m_inputWidth; using Base::m_needGradient; using Base::m_nodeName; \ using Base::m_outputChannels; using Base::m_outputHeight; using Base::m_outputWidth; using Base::s_constOnes; using Base::s_timeStampCounter; \ diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 8913a71f9f94..68a31eec8a61 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -530,7 +530,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef ComputationNode Base; UsingComputationNodeMembers; void Init(size_t row_size, size_t col_size) { - SetReqMultiSeqHandlingTo(true); + SetMaskMissingColumnsToZero(); m_functionValues.Resize(row_size, col_size); } public: diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index cba78b3c271d..bd81077a2ea0 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -36,7 +36,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { private: void Init(size_t row_size, size_t col_size, ElemType initialActivationValue = (ElemType)DEFAULT_HIDDEN_ACTIVATION) { - SetReqMultiSeqHandlingTo(true); + SetMaskMissingColumnsToZero(); m_initialActivationValue = initialActivationValue; m_timeStep = 1; m_functionValues.Resize(row_size, col_size); @@ -517,7 +517,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_use_errors_from_future_minibatch(false), m_DefaultState((ElemType)DEFAULT_HIDDEN_ACTIVATION) { - SetReqMultiSeqHandlingTo(true); + SetMaskMissingColumnsToZero(); } virtual const std::wstring OperationName() const { return TypeName(); } From a0c4aa9001e4928aaaf6500047696e0549ea7b77 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 17:27:33 -0700 Subject: [PATCH 36/44] renamed UseCustomizedMultiSeqHandling() to NodeDoesItsOwnCustomizedMissingColumnsMasking() for clarity--this stuff will eventually go away once we allow inconsistent layouts --- .../CompositeComputationNodes.h | 2 +- .../ComputationNode.h | 14 +++++++------- .../EvaluationCriterionNodes.h | 2 +- .../InputAndParamNodes.h | 2 +- .../CNTKComputationNetworkLib/RecurrentNodes.h | 2 +- .../TrainingCriterionNodes.h | 18 +++++++++--------- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h index 077daa841cbe..8469e795dfe0 100644 --- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h @@ -1068,7 +1068,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } protected: - virtual bool UseCustomizedMultiSeqHandling() + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 887da5e18abe..bc111a6d0aa9 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -810,7 +810,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { EvaluateThisNode(); // this is a call to the virtual function that implements the actual operation - if (!UseCustomizedMultiSeqHandling()) // this means the node does it by itself; if not, we do it for the node + if (!NodeDoesItsOwnCustomizedMissingColumnsMasking()) // this means the node does it by itself; if not, we do it for the node MaskMissingColumnsToZero(m_functionValues); } @@ -821,7 +821,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { EvaluateThisNode(FrameRange(timeIdxInSeq, GetNumParallelSequences())); - if (!UseCustomizedMultiSeqHandling()) + if (!NodeDoesItsOwnCustomizedMissingColumnsMasking()) MaskMissingColumnsToZero(m_functionValues, timeIdxInSeq); } @@ -1059,7 +1059,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i=0; i&) {} // indicatess whether special handling is needed.The standard handleing will be just mask the function values after the evalaution and mask the gradient before gradiant computation for the children. this is not valid for all criterion nodes whose result is a scalar. - // defined by training/eval criteria (and the soon-to-be-deprecated PairNode, LSTMNode) - virtual bool UseCustomizedMultiSeqHandling() { return false; } + // overridden to return true by training/eval criteria (and the soon-to-be-deprecated PairNode, LSTMNode) + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return false; } protected: @@ -1300,7 +1300,7 @@ protected: \ using Base::m_visitedOrder; using Base::m_index; using Base::m_lowLink; using Base::m_visited; using Base::m_inStack; \ using Base::m_indexInLoop; \ using Base::m_pMBLayout; \ - using Base::m_maskMissingColumnsToZero; using Base::UseCustomizedMultiSeqHandling; using Base::GetNumParallelSequences; \ + using Base::m_maskMissingColumnsToZero; using Base::NodeDoesItsOwnCustomizedMissingColumnsMasking; using Base::GetNumParallelSequences; \ using Base::DataSlice; using Base::ValueSlice; using Base::GradientSlice; using Base::SetMaskMissingColumnsToZero; \ using Base::m_children; using Base::m_deviceId; using Base::m_evalTimeStamp; using Base::m_functionValues; using Base::m_gradientValues; \ using Base::m_inputChannels; using Base::m_inputHeight; using Base::m_inputWidth; using Base::m_needGradient; using Base::m_nodeName; \ diff --git a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h index a765ca6d4a8c..c44a91986ce0 100644 --- a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h @@ -141,7 +141,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: Matrix m_maxIndexes0, m_maxIndexes1; diff --git a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h index 68a31eec8a61..eed17fbc8c28 100644 --- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h @@ -634,7 +634,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { static const std::wstring TypeName() { return L"PairNetwork"; } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } }; diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index bd81077a2ea0..3a3aceeb291c 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -1571,7 +1571,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } protected: size_t m_inputDim; diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index a13126e33628..5be3f0cc42f1 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -138,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: Matrix m_leftMinusRight; }; @@ -311,7 +311,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } protected: Matrix m_logSoftmaxOfRight; Matrix m_softmaxOfRight; @@ -461,7 +461,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: // matrix value passed from evaluate to computePartial Matrix m_logOfRight; @@ -561,7 +561,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: Matrix m_gradientOfL1Norm; // temporary }; @@ -646,7 +646,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_temp.TransferToDeviceIfNotThereAndNotAutoPlace(deviceId, true); } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: Matrix m_temp; }; @@ -825,7 +825,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_grdToSoftMaxInput.TransferToDeviceIfNotThereAndNotAutoPlace(deviceId, true); } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } protected: Matrix m_logSoftmax; Matrix m_softMax; @@ -1176,7 +1176,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_grdToSoftMaxInput.TransferToDeviceIfNotThereAndNotAutoPlace(deviceId, true); } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } protected: Matrix m_logSoftmax; Matrix m_softMax; @@ -1494,7 +1494,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } private: Matrix mAlpha; // TODO: m_Alpha etc. Matrix mBeta; @@ -1600,7 +1600,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_children[2] = prediction; } protected: - virtual bool UseCustomizedMultiSeqHandling() { return true; } + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return true; } }; template class DummyCriterionNode; From 9f4fcf036526db87cc0c029b020db1fe63a23abf Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 17:41:14 -0700 Subject: [PATCH 37/44] included test for NeedToMaskMissingColumnsToZero() into the calls to MaskMissingColumnsToZero() that would not do anything if this flag is set; renamed ResetBound() to SetMBLayout() --- .../ComputationNetwork.h | 6 +++--- .../CNTKComputationNetworkLib/ComputationNode.h | 16 ++++++++-------- .../CNTKComputationNetworkLib/RecurrentNodes.h | 6 +++--- .../TrainingCriterionNodes.h | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 7606784006ed..38f8e72cfc1d 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -564,12 +564,12 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb { // TODO: we should just always set the real layout; the nodes themselves should know to ignore it based on NeedToMaskMissingColumnsToZero() // MaskMissingColumnsToZero() will test whether the layout is all none, and then skip. - // This is the only place where ResetBound() is ever called on a node. Hence, we could test NeedToMaskMissingColumnsToZero() instead. + // This is the only place where SetMBLayout() is ever called on a node. Hence, we could test NeedToMaskMissingColumnsToZero() instead. // Note that NeedToMaskMissingColumnsToZero() is true only where it is necessary; that is, most node have it set to false (since most nodes can just map garbage-in-garbage-out). if ((*nodeIter)->NeedToMaskMissingColumnsToZero()) - (*nodeIter)->ResetBound(m_pMBLayout); + (*nodeIter)->SetMBLayout(m_pMBLayout); else - (*nodeIter)->ResetBound(m_pMBNoLayout); + (*nodeIter)->SetMBLayout(m_pMBNoLayout); (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); } diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index bc111a6d0aa9..0f041f709972 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -166,7 +166,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void SetFunctionAndGradientSize(const int numSamples) = 0; - virtual void ResetBound(MBLayoutPtr pMBLayout) + virtual void SetMBLayout(MBLayoutPtr pMBLayout) { assert(pMBLayout->GetNumTimeSteps() == pMBLayout->GetSize()); // TODO: move this check into MBLayout m_pMBLayout = pMBLayout; @@ -810,7 +810,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { EvaluateThisNode(); // this is a call to the virtual function that implements the actual operation - if (!NodeDoesItsOwnCustomizedMissingColumnsMasking()) // this means the node does it by itself; if not, we do it for the node + if (NeedToMaskMissingColumnsToZero() && !NodeDoesItsOwnCustomizedMissingColumnsMasking()) // this means the node does it by itself; if not, we do it for the node MaskMissingColumnsToZero(m_functionValues); } @@ -821,7 +821,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { EvaluateThisNode(FrameRange(timeIdxInSeq, GetNumParallelSequences())); - if (!NodeDoesItsOwnCustomizedMissingColumnsMasking()) + if (NeedToMaskMissingColumnsToZero() && !NodeDoesItsOwnCustomizedMissingColumnsMasking()) MaskMissingColumnsToZero(m_functionValues, timeIdxInSeq); } @@ -866,7 +866,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Note that existing 'reduce' style operations--the criterion nodes and gradient computation--already call this. bool MaskMissingColumnsToZero(Matrix& matrixToBeMasked, size_t timeIdxInSeq = SIZE_MAX, size_t seqIndex = SIZE_MAX) const { - bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed + bool foundLabelOrFeatureMissing = false; /// set to true if either nolabel or feature missing is processed if (!m_pMBLayout->IsAllNone()) { @@ -889,12 +889,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t id = startS; id < endS; id++) if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel | MinibatchPackingFlags::NoFeature)) matrixToBeMasked.ColumnSlice(t * nS + id, 1).SetValue(0); - processedExistsNoLabelorFeatureMissing = true; + foundLabelOrFeatureMissing = true; } } } - return processedExistsNoLabelorFeatureMissing; + return foundLabelOrFeatureMissing; } /* @@ -1059,7 +1059,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i=0; iSet(0, 1, MinibatchPackingFlags::SequenceStart); // TODO: strange--start at frame[1] instead of [0]? - Base::ResetBound(pMBLayout); + Base::SetMBLayout(pMBLayout); f0 = Inputs(0)->FunctionValues(); f1 = Inputs(1)->FunctionValues(); diff --git a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h index 5be3f0cc42f1..36017ca63f2f 100644 --- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h @@ -1098,7 +1098,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t id = j % nS; // this is the stream return Base::MaskMissingColumnsToZero(matrixToBeMasked, t, id); #if 0 // old version prior to merging with Base version - bool processedExistsNoLabelorFeatureMissing = false; /// set to true if either nolabel or feature missing is processed + bool foundLabelOrFeatureMissing = false; /// set to true if either nolabel or feature missing is processed if (!m_pMBLayout->IsAllNone()) { @@ -1107,12 +1107,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (m_pMBLayout->Is(id, t, MinibatchPackingFlags::NoLabel)) { matrixToBeMasked.ColumnSlice(t * nS + id,1).SetValue(0); - processedExistsNoLabelorFeatureMissing = true; + foundLabelOrFeatureMissing = true; } } } - return processedExistsNoLabelorFeatureMissing; + return foundLabelOrFeatureMissing; #endif } From cde6220292702e848fbb6484e01207dfc80f6224 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Mon, 21 Sep 2015 18:11:39 -0700 Subject: [PATCH 38/44] (comment) --- MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 38f8e72cfc1d..60ab38482e88 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -1371,6 +1371,8 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb }; // note: this is called to write into our existing MBLayout instance + // TODO: This is broken. Instead, we should pass this from the reader, or better, do batching inside here. + // The problem is that we cannot post-process. E.g. is the layout guaranteed to reflect the minibatch size, in the case of no recurrence?? const MBLayoutPtr & GetMBLayoutPtr() { return m_pMBLayout; } protected: From a2f111a88b342eec9a4b359f32d2fd51ff15233e Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Tue, 22 Sep 2015 10:29:39 -0700 Subject: [PATCH 39/44] made ComputeInputPartialSRP() and EvaluateThisNodeSRP() class members and reduced the parameter list; added a comment that analyzes whether we need that secondary pMBNoLayout--answer is no --- Common/Include/basetypes.h | 22 +++++++ .../RecurrentNodes.h | 64 +++++++++---------- 2 files changed, 53 insertions(+), 33 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index bfe8a64d1add..013a217ecb10 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -1054,6 +1054,28 @@ class RegisterModule // why is this in basetypes.h? // boundary flags for a frame +// (note for refactoring:) This is currently used by +// - RecurrentNodes: SetMBLayout(), ComputeInputPartialSRP(), EvaluateThisNodeSRP(), .. check SentenceBegin_or_End +// (plus PastValueNode and FutureValueNode base class template parameter) +// - SimpleEvaluator.h: FindBestPath(), FindBestPathWithVariableLength() --doing a bad hack, pretending MBs of 1 frame +// deprecated: +// - LSTMNode +// through ComputationNode::MaskMissingColumnsToZero(): +// - nodes where the user explicitly requested masking (NeedToMaskMissingColumnsToZero() == true) +// - ComputeGradientForChildren() +// - all training and evaluation criterion nodes .. TODO: double-confirm it's all Training nodes; but those also have NodeDoesItsOwnCustomizedMissingColumnsMasking() == true +// in core classes: +// - ComputationNetwork: GetNumSamplesWithLabel(), MaskMissingColumnsToZero() --both are cheap in case of no flags set +// - Matrix.h +// - SGD::DecimateMinibatchWithSentences() (should be done differently) +// and readers that generate the flags: +// - HTKMLFReader::GetMinibatchToTrainOrTest() +// - BatchLUSequenceReader::EnsureDataAvailable(), GetMinibatch(), DataEnd() +// - EvalReader::CopyMBLayoutTo() +// - BatchSequenceReader::SetSentenceBegin() +// others: +// - MathPerformanceTests.cpp +// ==> conclusion: safe to ALWAYS pass the full layout, will not be inefficient enum class MinibatchPackingFlags : char // (note: not using unsigned char because these go into a matrix, and we use Matrix, since we use it as a data holder) { None = 0, diff --git a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h index 2999806394e1..811dbe03f7ec 100644 --- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h @@ -161,16 +161,20 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("PastValue and FutureValue operations only take one input."); assert(m_functionValues.GetNumRows() == GradientValues().GetNumRows()); - assert(m_pMBLayout); - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(frameRange.t()); - ComputeInputPartialSRP(frameRange, m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); + ComputeInputPartialRP(frameRange); } - static void WINAPI ComputeInputPartialSRP(const FrameRange & frameRange, int timeStep, - Matrix& inputGradientValues, const Matrix& gradientValues, - const Matrix& colBoundaryFlags, MinibatchPackingFlags minibatchPackingFlags) + void ComputeInputPartialRP(const FrameRange & frameRange) { + // this is the result of refactoring; feel free to clean up further: + int timeStep = m_timeStep; + Matrix& inputGradientValues = Inputs(0)->GradientValues(); + const Matrix& gradientValues = GradientValues(); + const auto frameLayout = m_pShiftedMBLayout->GetFrame(frameRange.t()); + const Matrix& colBoundaryFlags = frameLayout.first; + const MinibatchPackingFlags & minibatchPackingFlags = frameLayout.second; + size_t timeIdxInSeq = frameRange.t(); size_t mNbr = frameRange.NumCols(); assert(timeIdxInSeq >= 0); @@ -206,10 +210,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { // this one differs in the starting condition virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) = 0; - void EvaluateThisNodeSRP(const FrameRange & frameRange, const int timeStep, - Matrix& functionValues, const Matrix& delayedActivation, const Matrix& inputFunctionValues, - const ElemType & initStateValue, const Matrix & colBoundaryFlags, const MinibatchPackingFlags minibatchPackingFlags) + void EvaluateThisNodeRP(const FrameRange & frameRange) { + // this is the result of refactoring; feel free to clean up further + const int timeStep = m_timeStep; + Matrix& functionValues = m_functionValues; + const Matrix& delayedActivation = m_delayedActivation; + const Matrix& inputFunctionValues = Inputs(0)->FunctionValues(); + const ElemType & initStateValue = m_initialActivationValue; + const auto colBoundaryFlags1 = m_pShiftedMBLayout->GetFrame(frameRange.t()); + const Matrix & colBoundaryFlags = colBoundaryFlags1.first; + const MinibatchPackingFlags & minibatchPackingFlags = colBoundaryFlags1.second; + size_t timeIdxInSeq = frameRange.t(); size_t mNbr = frameRange.NumCols(); assert(timeStep > 0); @@ -346,7 +358,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #define UsingDelayedValueNodeMembers UsingComputationNodeMembers; \ using Base::m_initialActivationValue; using Base::m_delayedActivation; using Base::m_timeStep; \ using Base::m_pShiftedMBLayout; using Base::m_historyAlreadySet; \ - using Base::ComputeInputPartialSRP; using Base::EvaluateThisNodeSRP + using Base::ComputeInputPartialRP; using Base::EvaluateThisNodeRP // ======================================================================= // PastValueNode -- delay node @@ -375,12 +387,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("PastValue and FutureValue operations only take one input."); int nbrSamples = GradientValues().GetNumCols() / GetNumParallelSequences(); + // TODO: call the looping version below to avoid code dup for (int timeIdxInSeq = nbrSamples - 1; timeIdxInSeq >= 0; timeIdxInSeq--) - { - // TODO: call the looping version below to avoid code dup - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - ComputeInputPartialSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); - } + ComputeInputPartialRP(FrameRange(timeIdxInSeq, GetNumParallelSequences())); } // TODO: why is this loop not in th underlying execution engine? This node should not have to know about this. @@ -389,12 +398,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(m_timeStep > 0); int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); + // TODO: call the looping version below to avoid code dup for (int timeIdxInSeq = 0; timeIdxInSeq < nbrSamples; timeIdxInSeq++) - { - // TODO: call the looping version below to avoid code dup - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); - } + EvaluateThisNodeRP(FrameRange(timeIdxInSeq, GetNumParallelSequences())); //set the past activity to be used by next minibatch m_delayedActivation = Inputs(0)->FunctionValues(); @@ -409,8 +415,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (frameRange.t() == 0 && m_historyAlreadySet == false) m_delayedActivation = Inputs(0)->FunctionValues(); - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(frameRange.t()); - EvaluateThisNodeSRP(frameRange, m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); + EvaluateThisNodeRP(frameRange); } }; @@ -445,12 +450,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { InvalidArgument("PastValue and FutureValue operations only take one input."); int nbrSamples = GradientValues().GetNumCols() / GetNumParallelSequences(); + // TODO: call the looping version below to avoid code dup for (int timeIdxInSeq = 0; timeIdxInSeq < nbrSamples; timeIdxInSeq++) - { - // TODO: call the looping version below to avoid code dup - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - ComputeInputPartialSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, Inputs(0)->GradientValues(), GradientValues(), colBoundaryFlags.first, colBoundaryFlags.second); - } + ComputeInputPartialRP(FrameRange(timeIdxInSeq, GetNumParallelSequences())); } virtual void EvaluateThisNode() @@ -459,10 +461,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int nbrSamples = Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences(); for (int timeIdxInSeq = nbrSamples - 1; timeIdxInSeq >= 0; timeIdxInSeq--) - { - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(timeIdxInSeq); - EvaluateThisNodeSRP(FrameRange(timeIdxInSeq, GetNumParallelSequences()), m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); - } + EvaluateThisNodeRP(FrameRange(timeIdxInSeq, GetNumParallelSequences())); //set the future activity to be used by next minibatch m_delayedActivation = Inputs(0)->FunctionValues(); @@ -475,8 +474,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (frameRange.t() == Inputs(0)->FunctionValues().GetNumCols() / GetNumParallelSequences() - 1) m_delayedActivation = Inputs(0)->FunctionValues(); - const auto colBoundaryFlags = m_pShiftedMBLayout->GetFrame(frameRange.t()); - EvaluateThisNodeSRP(frameRange, m_timeStep, m_functionValues, m_delayedActivation, Inputs(0)->FunctionValues(), m_initialActivationValue, colBoundaryFlags.first, colBoundaryFlags.second); + EvaluateThisNodeRP(frameRange); } }; From c86a0f12c3bf6b188b53f8ceab320965b4400534 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Tue, 22 Sep 2015 11:00:50 -0700 Subject: [PATCH 40/44] removed pMBNoLayout, Network now passes the same layout to all nodes now (I checked, I think it should not cause inefficiencies) --- .../CNTKComputationNetworkLib/ComputationNetwork.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h index 60ab38482e88..037a860788dc 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h @@ -75,7 +75,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // ----------------------------------------------------------------------- ComputationNetwork(DEVICEID_TYPE deviceId = AUTOPLACEMATRIX) : - m_deviceId(deviceId), m_pMBLayout(make_shared()), m_pMBNoLayout(make_shared()) + m_deviceId(deviceId), m_pMBLayout(make_shared())//, m_pMBNoLayout(make_shared()) { m_randomSeedOffset = 0; m_actualMBSize = 0; @@ -539,7 +539,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // We have a matching layout structure that matches pMBLayout in number of sequences while not having any flags set. // This is used for nodes that do not need recurrent processing, but can be done in batch. // TODO: Does it harm if we have flags, for those that can be done in batch? I.e. why don't we just always provide flags? - m_pMBNoLayout->Resize(m_pMBLayout->GetNumParallelSequences(), 0); // TODO: this is not nice, but we currently have no trigger to detect changes in layout + //m_pMBNoLayout->Resize(m_pMBLayout->GetNumParallelSequences(), 0); // TODO: this is not nice, but we currently have no trigger to detect changes in layout // prepare to compute with the subnetwork that this rootNode depends on, including // - auto-detecting recurrent loops @@ -566,10 +566,10 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // MaskMissingColumnsToZero() will test whether the layout is all none, and then skip. // This is the only place where SetMBLayout() is ever called on a node. Hence, we could test NeedToMaskMissingColumnsToZero() instead. // Note that NeedToMaskMissingColumnsToZero() is true only where it is necessary; that is, most node have it set to false (since most nodes can just map garbage-in-garbage-out). - if ((*nodeIter)->NeedToMaskMissingColumnsToZero()) + //if ((*nodeIter)->NeedToMaskMissingColumnsToZero()) (*nodeIter)->SetMBLayout(m_pMBLayout); - else - (*nodeIter)->SetMBLayout(m_pMBNoLayout); + //else + // (*nodeIter)->SetMBLayout(m_pMBNoLayout); (*nodeIter)->VerifyNumParallelSequences(GetNumParallelSequences()); } From 8b67a801a030b98d88bece4bdb603e86332419ca Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Tue, 22 Sep 2015 11:30:13 -0700 Subject: [PATCH 41/44] SetRequestNodesMultiSeqHandling() changed so that, instead of fixing things for the user under the hood, it instead forces users to fix it on their side through runtime checks --- .../ComputationNetwork.cpp | 26 ++++++++++++++++--- .../ComputationNode.h | 8 +++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp index 42c73d02263a..1a1cf70bc1f1 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp @@ -312,6 +312,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { return false; } + // note: all of these have NodeDoesItsOwnCustomizedMissingColumnsMasking() returning true bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr) { if (nodePtr->OperationName() == OperationNameOf(SquareErrorNode) || @@ -336,23 +337,40 @@ namespace Microsoft { namespace MSR { namespace CNTK { //SumElements node will generate a scalar value and so it should never require special handling //TransposeNode will change the size of columns and so it should also not included for special handling //their child node should instead +#if 0 if (node->OperationName() != OperationNameOf(SumElementsNode) && node->OperationName() != OperationNameOf(TransposeNode) && node->OperationName() != OperationNameOf(MeanNode) && node->OperationName() != OperationNameOf(InvStdDevNode) ) node->SetMaskMissingColumnsToZero(); +#else + if (node->OperationName() == OperationNameOf(SumElementsNode) || + node->OperationName() == OperationNameOf(TransposeNode) || + node->OperationName() == OperationNameOf(MeanNode) | + node->OperationName() == OperationNameOf(InvStdDevNode)) + { + RuntimeError("SetRequestNodesMultiSeqHandling: NodesReqMultiSeqHandling cannot be used with operation '%ls'\nIn the past, CNTK silently fixed this; now please change your NDL instead", node->OperationName().c_str()); + } + node->SetMaskMissingColumnsToZero(); +#endif } - //if a typical criterion node is used as the training criterion node we assume it requires multiseq handling - //this is for backward compatibility + // if a typical criterion node is used as the training criterion node we assume it requires multiseq handling + // this is for backward compatibility + // All of these have NodeDoesItsOwnCustomizedMissingColumnsMasking() return true, i.e. they will not have MaskMissingColumnsToZero() auto-called from Network. + // Hence, instead of setting the flag, we just ensure that this is true. for (auto & node : m_finalCriteria) if (IsTypicalCriterionNode(node)) - node->SetMaskMissingColumnsToZero(); + //node->SetMaskMissingColumnsToZero(); + if (!node->NodeDoesItsOwnCustomizedMissingColumnsMasking()) + LogicError("criterion %ls's NodeDoesItsOwnCustomizedMissingColumnsMasking() function must return true", node->OperationName().c_str()); for (auto & node : m_evalNodes) if (IsTypicalCriterionNode(node)) - node->SetMaskMissingColumnsToZero(); + //node->SetMaskMissingColumnsToZero(); + if (!node->NodeDoesItsOwnCustomizedMissingColumnsMasking()) + LogicError("criterion %ls's NodeDoesItsOwnCustomizedMissingColumnsMasking() function must return true", node->OperationName().c_str()); } template void ComputationNetwork::GetNodesRequiringX(std::list & nodesRequirePreComputation, const ComputationNodeBasePtr rootNode, bool checkComputed) diff --git a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h index 0f041f709972..7750460f4602 100644 --- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h +++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h @@ -222,6 +222,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { return m_pMBLayout->GetNumParallelSequences(); } + // indicates whether special handling is needed.The standard handleing will be just mask the function values after the evalaution and mask the gradient before gradiant computation for the children. this is not valid for all criterion nodes whose result is a scalar. + // overridden to return true by training/eval criteria (and the soon-to-be-deprecated PairNode, LSTMNode) + virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return false; } + int64_t UpdateEvalTimeStamp() { m_evalTimeStamp = atomic_fetch_add(&s_timeStampCounter, (unsigned long long int) 1); // TODO: does this really need to be atomic? We are not multi-threaded @@ -1224,10 +1228,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { virtual void GetErrorsToPreviousMinibatch(Matrix&) {} virtual void SetErrorsFromFutureMinibatch(Matrix&) {} - // indicatess whether special handling is needed.The standard handleing will be just mask the function values after the evalaution and mask the gradient before gradiant computation for the children. this is not valid for all criterion nodes whose result is a scalar. - // overridden to return true by training/eval criteria (and the soon-to-be-deprecated PairNode, LSTMNode) - virtual bool NodeDoesItsOwnCustomizedMissingColumnsMasking() { return false; } - protected: Matrix m_functionValues, m_gradientValues; From 51fb96bdf9182205b7efbf21c56047ffef82e060 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Tue, 22 Sep 2015 12:40:10 -0700 Subject: [PATCH 42/44] Use double precision rsqrt in FSAdagrad kernel when ElemType is double --- Math/Math/GPUMatrix.cu | 14 +++++++++----- Math/Math/GPUMatrix.h | 3 +-- Math/Math/GPUMatrixCUDAKernels.cu | 11 ++++++++++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index cf3fbf4bdef6..4c525b92ba15 100755 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -1278,23 +1278,27 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - void GPUMatrix::FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, - ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul) + void GPUMatrix::FSAdagrad(GPUMatrix& gradients, + GPUMatrix& functionValues, + ElemType learnRatePerSample, + ElemType momentum, + ElemType adaWeight, + ElemType adaMul) { size_t numColsNeeded = 2 * gradients.GetNumCols(); - if (IsEmpty() || GetNumCols() < numColsNeeded) + if (IsEmpty() || (GetNumCols() < numColsNeeded)) { Resize(gradients.GetNumRows(), numColsNeeded); SetValue(0.0); } - assert(GetNumRows() == gradients.GetNumRows() && GetNumCols() == numColsNeeded); + assert((GetNumRows() == gradients.GetNumRows()) && (GetNumCols() == numColsNeeded)); size_t n = gradients.GetNumElements(); int blocksPerGrid = (n + threadsPerBlock - 1) / threadsPerBlock; _fsadagrad<<>>(n, gradients.m_pArray, m_pArray, m_pArray + n, functionValues.m_pArray, - learnRatePerSample, momentum, adaWeight, adaMul); + learnRatePerSample, momentum, adaWeight, adaMul); } template diff --git a/Math/Math/GPUMatrix.h b/Math/Math/GPUMatrix.h index b80900f95ad7..18a9ef0cb05e 100755 --- a/Math/Math/GPUMatrix.h +++ b/Math/Math/GPUMatrix.h @@ -124,8 +124,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType* BufferPointer() const {return m_pArray;} ElemType Adagrad(GPUMatrix& gradients, const bool needAveMultiplier); - void FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, - ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul); + void FSAdagrad(GPUMatrix& gradients, GPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul); ElemType RmsProp(GPUMatrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN, const bool needAveMultiplier); void Reshape(const size_t numRows, const size_t numCols); diff --git a/Math/Math/GPUMatrixCUDAKernels.cu b/Math/Math/GPUMatrixCUDAKernels.cu index d45b9e006d87..073fa9d2e1ad 100755 --- a/Math/Math/GPUMatrixCUDAKernels.cu +++ b/Math/Math/GPUMatrixCUDAKernels.cu @@ -1123,7 +1123,16 @@ __global__ void _fsadagrad(CUDA_LONG size, ElemType* grad, ElemType* smoothAda, smoothAda[idx] = adaSqr; if (adaSqr != 0.0f) { - ElemType w = adaMul * rsqrtf(adaSqr); + ElemType w; + if (sizeof(ElemType) == sizeof(double)) + { + w = adaMul * rsqrt(adaSqr); + } + else + { + w = adaMul * rsqrtf(adaSqr); + } + if (w > 10.0f) w = 10.0f; g *= w; From dce8e659bf4182ada5b6fe48238ed09f0b78143f Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Tue, 22 Sep 2015 13:44:12 -0700 Subject: [PATCH 43/44] Added CPU support for FSAdagrad --- Math/Math/CPUMatrix.cpp | 51 +++++++++++++++++++++++++++++++++++++++++ Math/Math/CPUMatrix.h | 1 + Math/Math/Matrix.cpp | 7 +++--- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/Math/Math/CPUMatrix.cpp b/Math/Math/CPUMatrix.cpp index 3bac9705dc7e..f5609d449db0 100644 --- a/Math/Math/CPUMatrix.cpp +++ b/Math/Math/CPUMatrix.cpp @@ -1117,6 +1117,57 @@ namespace Microsoft { namespace MSR { namespace CNTK { return 1; } + template + void CPUMatrix::FSAdagrad(CPUMatrix& gradients, + CPUMatrix& functionValues, + ElemType learnRatePerSample, + ElemType momentum, + ElemType adaWeight, + ElemType adaMul) + { + size_t numColsNeeded = 2 * gradients.GetNumCols(); + + if (IsEmpty() || (GetNumCols() < numColsNeeded)) + { + Resize(gradients.GetNumRows(), numColsNeeded); + SetValue(0.0); + } + + assert((GetNumRows() == gradients.GetNumRows()) && (GetNumCols() == numColsNeeded)); + + size_t n = gradients.GetNumElements(); + ElemType* grad = gradients.m_pArray; + ElemType* smoothAda = m_pArray; + ElemType* smoothMom = m_pArray + n; + ElemType* val = functionValues.m_pArray; +#pragma omp parallel for + // TODO: Unroll 4-times for better performance leveraging vectorization + for (long i = 0; i < n; i++) + { + ElemType g = grad[i]; + ElemType adaSqr = adaWeight * smoothAda[i] + (1.0f - adaWeight) * g * g; + smoothAda[i] = adaSqr; + if (adaSqr != 0.0f) + { + ElemType ada = sqrt(adaSqr); + ElemType w = adaMul * ((ElemType)1.0 / ada); + + if (w > 10.0f) + w = 10.0f; + g *= w; + } + + if (momentum > 0.0f) + { + g = momentum * smoothMom[i] + (1.0f - momentum) * g; + smoothMom[i] = g; + } + + g *= learnRatePerSample; + val[i] -= g; + } + } + template ElemType CPUMatrix::RmsProp(CPUMatrix& gradients, ElemType RMS_GAMMA, diff --git a/Math/Math/CPUMatrix.h b/Math/Math/CPUMatrix.h index f9899ecc2936..11f980e44665 100644 --- a/Math/Math/CPUMatrix.h +++ b/Math/Math/CPUMatrix.h @@ -54,6 +54,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { CPUMatrix& SetColumnSlice(const CPUMatrix& fromMatrix, size_t startColumn, size_t numCols); ElemType Adagrad(CPUMatrix& gradients, const bool needAveMultiplier); + void FSAdagrad(CPUMatrix& gradients, CPUMatrix& functionValues, ElemType learnRatePerSample, ElemType momentum, ElemType adaWeight, ElemType adaMul); ElemType RmsProp(CPUMatrix& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 131b68424e63..870bda9e6821 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1324,18 +1324,19 @@ namespace Microsoft { namespace MSR { namespace CNTK { template void Matrix::FSAdagrad(size_t mbSize, Matrix& gradients, Matrix& functionValues, const ElemType learnRatePerSample, const ElemType momentum) { - // REVEW alexeyk: hardcoded for now, taken from DBN. Naming is the same as in DBN. + // TODO: The values of 'adagradT' and 'targetadagradavdenom' are currently hardcoded constants taken from DBN (empirically determined). + // These should be made configurable if needed const size_t adagradT = 2 * 3600 * 100; + const ElemType targetadagradavdenom = 0.0025; // 1/400 magic constant const ElemType adagradkeepweight = static_cast(exp(-1.0 * mbSize / adagradT)); - const ElemType targetadagradavdenom = 0.0025; // 1/400 magic constant static ElemType aggadagradsqrframes = 0; aggadagradsqrframes = adagradkeepweight * aggadagradsqrframes + (1.0f - adagradkeepweight) * mbSize; const ElemType targetadagradavdenom_x_sqrtadagradsqrframes = static_cast(targetadagradavdenom * sqrt(aggadagradsqrframes)); DISPATCH_MATRIX_ON_FLAG(&gradients, &gradients, - SetDataLocation(CPU), + m_CPUMatrix->FSAdagrad(*gradients.m_CPUMatrix, *functionValues.m_CPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes); SetDataLocation(CPU), m_GPUMatrix->FSAdagrad(*gradients.m_GPUMatrix, *functionValues.m_GPUMatrix, learnRatePerSample, momentum, adagradkeepweight, targetadagradavdenom_x_sqrtadagradsqrframes); SetDataLocation(GPU), NOT_IMPLEMENTED, NOT_IMPLEMENTED From 7777c63919599920a1163fedcf947a0cee45daf5 Mon Sep 17 00:00:00 2001 From: Frank Seide Date: Tue, 22 Sep 2015 16:34:53 -0700 Subject: [PATCH 44/44] added predefined macro RowStack() (was missing); fixed spelling RowStack::m_startRowIndeces -> m_startRowIndices --- .../CNTK/ExperimentalNetworkBuilder.cpp | 2 +- .../LinearAlgebraNodes.h | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/MachineLearning/CNTK/ExperimentalNetworkBuilder.cpp b/MachineLearning/CNTK/ExperimentalNetworkBuilder.cpp index 94eb70f816ed..e94212ce97e9 100644 --- a/MachineLearning/CNTK/ExperimentalNetworkBuilder.cpp +++ b/MachineLearning/CNTK/ExperimentalNetworkBuilder.cpp @@ -63,6 +63,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { L"FutureValue(rows, cols, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input /*plus the function args*/ ]\n" L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n" L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n" + L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n" L"Reshape(input, numRows, imageWidth = 0, imageHeight = 0, imageChannels = 0, tag='') = new ComputationNode [ operation = 'Reshape' ; inputs = input /*plus the function args*/ ]\n" L"ConvolutionNode(weightNode, inputValueNode, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample, zeroPadding = false, maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode) /*plus the function args*/ ]\n" L"MaxPoolingNode(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]\n" @@ -114,7 +115,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { BinaryStandardNode(Plus, leftMatrix, rightMatrix) UnaryStandardNode(RectifiedLinear, z) //BinaryStandardNode(RowElementTimesNode) - //BinaryStandardNode(RowStackNode) BinaryStandardNode(Scale, scalarScalingFactor, matrix) //BinaryStandardNode(SequenceDecoderNode) UnaryStandardNode(Sigmoid, z) diff --git a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h index a91ed0d81ff6..ddad05026836 100644 --- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h +++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h @@ -447,6 +447,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // ----------------------------------------------------------------------- //this node is used to extract part of the input by rows as the output + // TODO: Really? RowStack indicates something different. //it has to be continuous segments of rows since each column is treated as one sample template class RowStackNode : public ComputationNode @@ -466,7 +467,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (flags & CopyNodeFlags::copyNodeChildren) { node->m_children = m_children; - node->m_startRowIndeces = m_startRowIndeces; + node->m_startRowIndices = m_startRowIndices; node->m_inputMatrices = m_inputMatrices; } } @@ -478,7 +479,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (inputIndex >= ChildrenSize()) InvalidArgument("RowStack-ComputeInputPartial: inputIndex out of range."); - ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex + 1] - m_startRowIndeces[inputIndex]); + ComputeInputPartialS(Inputs(inputIndex)->GradientValues(), GradientValues(), m_startRowIndices[inputIndex], m_startRowIndices[inputIndex + 1] - m_startRowIndices[inputIndex]); } virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) @@ -489,7 +490,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); Matrix sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check(frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences(), m_pMBLayout)); - ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndeces[inputIndex], m_startRowIndeces[inputIndex+1] - m_startRowIndeces[inputIndex]); + ComputeInputPartialS(sliceInputGrad, sliceOutputGrad, m_startRowIndices[inputIndex], m_startRowIndices[inputIndex+1] - m_startRowIndices[inputIndex]); } static void WINAPI ComputeInputPartialS(Matrix& inputGradientValues, const Matrix& gradientValues, const size_t startIndex, const size_t numRows) @@ -509,7 +510,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { EvaluateThisNodeS(sliceFunctionValues, m_inputMatrices, frameRange.t() * GetNumParallelSequences(), GetNumParallelSequences()); } - static void WINAPI EvaluateThisNodeS(Matrix& functionValues, const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) + // TODO: change to FrameRange + void EvaluateThisNodeS(Matrix& functionValues, const std::vector*>& inputMatrices, const size_t sliceStartCol, const size_t sliceNumCols) { functionValues.AssignRowStackValuesOf(inputMatrices, sliceStartCol, sliceNumCols); #if NANCHECK @@ -528,11 +530,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { LogicError("RowStack operation: the input node is NULL."); size_t numCols = Inputs(0)->FunctionValues().GetNumCols(); - m_startRowIndeces.resize(ChildrenSize()+1); + m_startRowIndices.resize(ChildrenSize()+1); m_inputMatrices.resize(ChildrenSize()); size_t totalRows = 0; - m_startRowIndeces[0] = 0; + m_startRowIndices[0] = 0; for (int i = 0; i < ChildrenSize(); i++) { @@ -549,7 +551,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { totalRows += numRows; m_inputMatrices[i] = &childMatrix; - m_startRowIndeces[i + 1] = m_startRowIndeces[i] + numRows; + m_startRowIndices[i + 1] = m_startRowIndices[i] + numRows; } FunctionValues().Resize(totalRows, numCols); @@ -575,7 +577,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } private: - std::vector m_startRowIndeces; //start row number in the stacked matrix of each input (child) + std::vector m_startRowIndices; //start row number in the stacked matrix of each input (child) std::vector*> m_inputMatrices; };