From d1e14783b6c7f6beb7bc9c82b40ffe997bbd4741 Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Sat, 18 Jul 2015 17:28:14 -0700 Subject: [PATCH] changed feature packing flag names from utteranceXYZ and sentence_XYZ to SequenceXYZ to make them consistent and be understood for both speech and text people. Added flag to indicate the condition of no feature (with assumption that there is also no label) to support efficient bi-directional model training. Updated the windows version of HTKMLFReader, LM and LU Sequence readers. Caution: this change will not allow for using past and future label information in the recurrent networks. The ultimate solution is to have a flag for each feature read-in. However, this will require huge change and we will do this after reader refactorization to reduce effort. --- Common/Include/basetypes.h | 25 +- DataReader/HTKMLFReader/HTKMLFReader.cpp | 22 +- DataReader/HTKMLFReader/HTKMLFReader.h | 2 +- DataReader/HTKMLFReader/basetypes.h | 25 +- .../LMSequenceReader/SequenceReader.cpp | 6 +- .../LUSequenceReader/LUSequenceReader.cpp | 18 +- .../LUSequenceReader/LUSequenceReader.h | 4 +- MachineLearning/CNTK/ComputationNode.h | 6 +- MachineLearning/CNTK/RecurrentNodes.h | 74 +- MachineLearning/CNTK/SimpleEvaluator.h | 8 +- .../CNTK/SynchronousExecutionEngine.h | 1744 ++++++++--------- MachineLearning/CNTK/TrainingCriterionNodes.h | 2 +- MachineLearning/CNTKEval/EvalReader.h | 10 +- .../MathPerformanceTests.cpp | 6 +- 14 files changed, 979 insertions(+), 973 deletions(-) diff --git a/Common/Include/basetypes.h b/Common/Include/basetypes.h index 36f28aa2bacc..3adda2072ab0 100644 --- a/Common/Include/basetypes.h +++ b/Common/Include/basetypes.h @@ -1058,21 +1058,24 @@ class RegisterModule /** These macros are used for sentence segmentation information. */ -#define SENTENCE_BEGIN ((int) MinibatchPackingFlag::UtteranceStart) -#define SENTENCE_MIDDLE ((int) MinibatchPackingFlag::None) -#define SENTENCE_END ((int) MinibatchPackingFlag::UtteranceEnd) -#define NO_LABELS ((int) MinibatchPackingFlag::NoLabel) +#define SEQUENCE_START ((int) MinibatchPackingFlag::SequenceStart) +#define SEQUENCE_MIDDLE ((int) MinibatchPackingFlag::None) +#define SEQUENCE_END ((int) MinibatchPackingFlag::SequenceEnd) +#define NO_INPUT ((int) MinibatchPackingFlag::NoInput) +#define NO_LABEL ((int) MinibatchPackingFlag::NoLabel) enum class MinibatchPackingFlag : unsigned char { None = 0, - UtteranceStart = 1 << 0, //binary 0001 - UtteranceEnd = 1 << 1, //binary 0010 - NoLabel = 1 << 2, //binary 0100 - - UtteranceStartOrNoLabel = UtteranceStart | NoLabel, - UtteranceEndOrNoLabel = UtteranceEnd | NoLabel, - UtteranceStartOrEndOrNoLabel = UtteranceStart | UtteranceEnd | NoLabel, + SequenceStart = 1 << 0, //binary 0001 + SequenceEnd = 1 << 1, //binary 0010 + NoFeature = 1 << 2, //binary 0100 + NoLabel = 1 << 3, //binary 1000 + + NoInput = NoFeature | NoLabel, //when we refactorize reader, NoInput will no longer needed + SequenceStartOrNoInput = SequenceStart | NoInput, + SequenceEndOrNoInput = SequenceEnd | NoInput, + SequenceStartOrEndOrNoInput = SequenceStart | SequenceEnd | NoInput, }; inline MinibatchPackingFlag operator| (MinibatchPackingFlag a, MinibatchPackingFlag b) diff --git a/DataReader/HTKMLFReader/HTKMLFReader.cpp b/DataReader/HTKMLFReader/HTKMLFReader.cpp index 5a9f7f04411f..1c1d4baadf4d 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.cpp +++ b/DataReader/HTKMLFReader/HTKMLFReader.cpp @@ -790,11 +790,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_sentenceBegin.Resize((size_t)1, (size_t)feat.cols()); m_minibatchPackingFlag.resize(feat.cols()); - m_sentenceBegin.SetValue((ElemType) SENTENCE_MIDDLE); - m_sentenceBegin.SetValue(0, 0, (ElemType) SENTENCE_BEGIN); + m_sentenceBegin.SetValue((ElemType) SEQUENCE_MIDDLE); + m_sentenceBegin.SetValue(0, 0, (ElemType) SEQUENCE_START); std::fill(m_minibatchPackingFlag.begin(), m_minibatchPackingFlag.end(), MinibatchPackingFlag::None); - m_minibatchPackingFlag[0] = MinibatchPackingFlag::UtteranceStart; + m_minibatchPackingFlag[0] = MinibatchPackingFlag::SequenceStart; first = false; } @@ -946,7 +946,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { for (size_t j = 0; j < m_mbSize; j++) { - m_sentenceBegin.SetValue(i,j,(ElemType) SENTENCE_MIDDLE); + m_sentenceBegin.SetValue(i,j,(ElemType) SEQUENCE_MIDDLE); } } std::fill(m_minibatchPackingFlag.begin(), m_minibatchPackingFlag.end(), MinibatchPackingFlag::None); @@ -969,8 +969,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { { m_switchFrame[i] = 0; m_sentenceEnd[i] = true; - m_sentenceBegin.SetValue(i, 0, (ElemType)SENTENCE_BEGIN); - m_minibatchPackingFlag[0] = MinibatchPackingFlag::UtteranceStart; + m_sentenceBegin.SetValue(i, 0, (ElemType)SEQUENCE_START); + m_minibatchPackingFlag[0] = MinibatchPackingFlag::SequenceStart; } actualmbsize[i] = m_mbSize; endFr = startFr + actualmbsize[i]; @@ -1121,8 +1121,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_switchFrame[i] = actualmbsize[i]; if (actualmbsize[i] < m_mbSize) { - m_sentenceBegin.SetValue(i, actualmbsize[i], (ElemType)SENTENCE_BEGIN); - m_minibatchPackingFlag[actualmbsize[i]] |= MinibatchPackingFlag::UtteranceStart; + m_sentenceBegin.SetValue(i, actualmbsize[i], (ElemType)SEQUENCE_START); + m_minibatchPackingFlag[actualmbsize[i]] |= MinibatchPackingFlag::SequenceStart; } startFr = m_switchFrame[i]; endFr = m_mbSize; @@ -1270,11 +1270,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { { m_sentenceBegin.Resize((size_t)1, (size_t)feat.cols()); m_minibatchPackingFlag.resize((size_t)feat.cols()); - m_sentenceBegin.SetValue((ElemType)SENTENCE_MIDDLE); - m_sentenceBegin.SetValue(0, 0, (ElemType)SENTENCE_BEGIN); + m_sentenceBegin.SetValue((ElemType)SEQUENCE_MIDDLE); + m_sentenceBegin.SetValue(0, 0, (ElemType)SEQUENCE_START); std::fill(m_minibatchPackingFlag.begin(), m_minibatchPackingFlag.end(), MinibatchPackingFlag::None); - m_minibatchPackingFlag[0] = MinibatchPackingFlag::UtteranceStart; + m_minibatchPackingFlag[0] = MinibatchPackingFlag::SequenceStart; first = false; } diff --git a/DataReader/HTKMLFReader/HTKMLFReader.h b/DataReader/HTKMLFReader/HTKMLFReader.h index 9536f2baebc3..c0fc1f017653 100644 --- a/DataReader/HTKMLFReader/HTKMLFReader.h +++ b/DataReader/HTKMLFReader/HTKMLFReader.h @@ -119,7 +119,7 @@ class HTKMLFReader : public IDataReader virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0); virtual bool DataEnd(EndDataType endDataType); - void SetSentenceSegBatch(Matrix &sentenceBegin, vector& sentenceExistsBeginOrNoLabels); + void SetSentenceSegBatch(Matrix &sentenceBegin, vector& sentenceExistsBeginOrNoInputs); void SetSentenceEnd(int /*actualMbSize*/){}; void SetRandomSeed(int){ NOT_IMPLEMENTED }; }; diff --git a/DataReader/HTKMLFReader/basetypes.h b/DataReader/HTKMLFReader/basetypes.h index 08205daca581..53f04b3d29ed 100644 --- a/DataReader/HTKMLFReader/basetypes.h +++ b/DataReader/HTKMLFReader/basetypes.h @@ -145,21 +145,24 @@ extern void _CHECKED_ASSERT_error(const char * file, int line, const char * exp) /** These macros are used for sentence segmentation information. */ -#define SENTENCE_BEGIN ((int) MinibatchPackingFlag::UtteranceStart) -#define SENTENCE_MIDDLE ((int) MinibatchPackingFlag::None) -#define SENTENCE_END ((int) MinibatchPackingFlag::UtteranceEnd) -#define NO_LABELS ((int) MinibatchPackingFlag::NoLabel) +#define SEQUENCE_START ((int) MinibatchPackingFlag::SequenceStart) +#define SEQUENCE_MIDDLE ((int) MinibatchPackingFlag::None) +#define SEQUENCE_END ((int) MinibatchPackingFlag::SequenceEnd) +#define NO_INPUT ((int) MinibatchPackingFlag::NoInput) +#define NO_LABEL = ((int) MinibatchPackingFlag::NoLabel) enum class MinibatchPackingFlag : unsigned char { None = 0, - UtteranceStart = 1 << 0, //binary 0001 - UtteranceEnd = 1 << 1, //binary 0010 - NoLabel = 1 << 2, //binary 0100 - - UtteranceStartOrNoLabel = UtteranceStart | NoLabel, - UtteranceEndOrNoLabel = UtteranceEnd | NoLabel, - UtteranceStartOrEndOrNoLabel = UtteranceStart | UtteranceEnd | NoLabel, + SequenceStart = 1 << 0, //binary 0001 + SequenceEnd = 1 << 1, //binary 0010 + NoFeature = 1 << 2, //binary 0100 + NoLabel = 1 << 3, //binary 1000 + + NoInput = NoFeature | NoLabel, //when we refactorize reader, NoInput will no longer needed + SequenceStartOrNoInput = SequenceStart | NoInput, + SequenceEndOrNoInput = SequenceEnd | NoInput, + SequenceStartOrEndOrNoInput = SequenceStart | SequenceEnd | NoInput, }; inline MinibatchPackingFlag operator| (MinibatchPackingFlag a, MinibatchPackingFlag b) diff --git a/DataReader/LMSequenceReader/SequenceReader.cpp b/DataReader/LMSequenceReader/SequenceReader.cpp index e88e6a4d12a1..963cda9e5b00 100755 --- a/DataReader/LMSequenceReader/SequenceReader.cpp +++ b/DataReader/LMSequenceReader/SequenceReader.cpp @@ -1858,7 +1858,7 @@ bool BatchSequenceReader::GetMinibatch(std::map::SetSentenceBegin(int wrd, int uttPos, int ti if (wrd == (int)index) { mSentenceBegin = true; - mtSentenceBegin.SetValue(uttPos, timePos, (ElemType)SENTENCE_BEGIN); - m_minibatchPackingFlag[timePos] = MinibatchPackingFlag::UtteranceStart; + mtSentenceBegin.SetValue(uttPos, timePos, (ElemType)SEQUENCE_START); + m_minibatchPackingFlag[timePos] = MinibatchPackingFlag::SequenceStart; } } } diff --git a/DataReader/LUSequenceReader/LUSequenceReader.cpp b/DataReader/LUSequenceReader/LUSequenceReader.cpp index 0186999e2f81..bf7c90e88f56 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.cpp +++ b/DataReader/LUSequenceReader/LUSequenceReader.cpp @@ -705,8 +705,8 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample if (mMaxSentenceLength > m_mbSize) throw std::runtime_error("LUSequenceReader : minibatch size needs to be large enough to accomodate the longest sentence"); - /// reset sentenceending index to NO_LABELS, which is negative - mSentenceEndAt.assign(mSentenceEndAt.size(), NO_LABELS); + /// reset sentenceending index to NO_INPUT, which is negative + mSentenceEndAt.assign(mSentenceEndAt.size(), NO_INPUT); /** mtSentenceBegin : a matrix with [Ns x T] @@ -715,7 +715,7 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample 1 : case exists */ mtSentenceBegin.Resize(mToProcess.size(), mMaxSentenceLength); - mtSentenceBegin.SetValue((ElemType) SENTENCE_MIDDLE); + mtSentenceBegin.SetValue((ElemType) SEQUENCE_MIDDLE); DEVICEID_TYPE sentenceSegDeviceId = mtSentenceBegin.GetDeviceId(); mtSentenceBegin.TransferFromDeviceToDevice(sentenceSegDeviceId, CPUDEVICE, true, false, false); @@ -735,8 +735,8 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample mSentenceBeginAt[k] = i; if (mIgnoreSentenceBeginTag == false) /// ignore sentence begin, this is used for decoder network reader, which carries activities from the encoder networks { - mtSentenceBegin.SetValue(k, j, (ElemType)SENTENCE_BEGIN); - m_minibatchPackingFlag[j] |= MinibatchPackingFlag::UtteranceStart; + mtSentenceBegin.SetValue(k, j, (ElemType)SEQUENCE_START); + m_minibatchPackingFlag[j] |= MinibatchPackingFlag::SequenceStart; } } @@ -798,8 +798,8 @@ bool BatchLUSequenceReader::EnsureDataAvailable(size_t /*mbStartSample m_featureWordContext.push_back(tmpCxt); m_labelIdData.push_back((LabelIdType)NULLLABEL); - mtSentenceBegin.SetValue(k, j, (ElemType) NO_LABELS); - m_minibatchPackingFlag[j] |= MinibatchPackingFlag::NoLabel; + mtSentenceBegin.SetValue(k, j, (ElemType) NO_INPUT); + m_minibatchPackingFlag[j] |= MinibatchPackingFlag::NoInput; } } @@ -899,7 +899,7 @@ bool BatchLUSequenceReader::GetMinibatch(std::map= featInfo.dim) { - if (mtSentenceBegin(utt_id, utt_t) != NO_LABELS) /// for those obs that are for no observations + if (mtSentenceBegin(utt_id, utt_t) != NO_INPUT) /// for those obs that are for no observations { LogicError("BatchLUSequenceReader::GetMinibatch observation is larger than its dimension but no_labels sign is not used to indicate that this observation has no labels. Possible reason is a bug in EnsureDataAvailable or a bug here. "); } @@ -1046,7 +1046,7 @@ bool BatchLUSequenceReader::DataEnd(EndDataType endDataType) ret = true; for (size_t i = 0; i < mToProcess.size(); i++) { - if (mSentenceEndAt[i] == NO_LABELS) + if (mSentenceEndAt[i] == NO_INPUT) { LogicError("BatchLUSequenceReader: minibatch should be large enough to accomodate the longest sentence"); } diff --git a/DataReader/LUSequenceReader/LUSequenceReader.h b/DataReader/LUSequenceReader/LUSequenceReader.h index 11caeacf5af5..be636a9242da 100644 --- a/DataReader/LUSequenceReader/LUSequenceReader.h +++ b/DataReader/LUSequenceReader/LUSequenceReader.h @@ -352,7 +352,7 @@ class BatchLUSequenceReader : public LUSequenceReader /// n_length is the maximum lenght of each stream /// for example, two sentences used in parallel in one minibatch would be /// [2 x 5] if the max length of one of the sentences is 5 - /// the elements of the matrix is 0, 1, or -1, defined as SENTENCE_BEGIN, SENTENCE_MIDDLE, NO_LABELS in cbasetype.h + /// the elements of the matrix is 0, 1, or -1, defined as SEQUENCE_START, SEQUENCE_MIDDLE, NO_INPUT in cbasetype.h /// 0 1 1 0 1 /// 1 0 1 0 0 /// for two parallel data streams. The first has two sentences, with 0 indicating begining of a sentence @@ -367,7 +367,7 @@ class BatchLUSequenceReader : public LUSequenceReader vector m_minibatchPackingFlag; /// by default it is false - /// if true, reader will set to SENTENCE_MIDDLE for time positions that are orignally correspond to SENTENCE_BEGIN + /// if true, reader will set to SEQUENCE_MIDDLE for time positions that are orignally correspond to SEQUENCE_START /// set to true so that a current minibatch can uses state activities from the previous minibatch. /// default will have truncated BPTT, which only does BPTT inside a minibatch bool mIgnoreSentenceBeginTag; diff --git a/MachineLearning/CNTK/ComputationNode.h b/MachineLearning/CNTK/ComputationNode.h index 268cbe28f8d3..7aeccab5ef44 100644 --- a/MachineLearning/CNTK/ComputationNode.h +++ b/MachineLearning/CNTK/ComputationNode.h @@ -247,9 +247,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix colPos(sentenceBegin.GetDeviceId()); colPos.SetValue(sentenceBegin); /// -1 0 1 - colPos.InplaceTruncateBottom(SENTENCE_BEGIN); + colPos.InplaceTruncateBottom(SEQUENCE_START); Matrix::Scale((ElemType)-1.0, colPos); - colPos += SENTENCE_MIDDLE; + colPos += SEQUENCE_MIDDLE; colSeg.SetDiagonalValue(colPos); Matrix ones(sentenceBegin.GetDeviceId()); ones.Resize(nStateRow, nStream); @@ -291,7 +291,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { colSeg = m_sentenceSeg->ColumnSlice(j,1); for (int i = 0; i < nS; i++) { - if (colSeg(i,0) == NO_LABELS) + if ((int)colSeg(i,0) & NO_LABEL) { matrixToBeMasked.ColumnSlice(utt_t+i, 1).SetValue(0); } diff --git a/MachineLearning/CNTK/RecurrentNodes.h b/MachineLearning/CNTK/RecurrentNodes.h index d7fe74e1584c..54a2a409006b 100644 --- a/MachineLearning/CNTK/RecurrentNodes.h +++ b/MachineLearning/CNTK/RecurrentNodes.h @@ -147,36 +147,36 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int i = 0; i < minibatchPackingFlag->size(); i++) { - if ((*minibatchPackingFlag)[i] & MinibatchPackingFlag::UtteranceStartOrNoLabel) + if ((*minibatchPackingFlag)[i] & MinibatchPackingFlag::SequenceStartOrNoInput) { - //we set timeStep-1 elements following it to be UtteranceStart until met NoLabel + //we set timeStep-1 elements following it to be SequenceStart until met NoInput for (int j = 0; j < numRows; j++) { - if ((*seg)(j, i) == SENTENCE_BEGIN) + if ((*seg)(j, i) == SEQUENCE_START) { numResetLeft[j] = m_timeStep; } - else if ((*seg)(j, i) == NO_LABELS) + else if ((*seg)(j, i) == NO_INPUT) { numResetLeft[j] = 0; } } } - //now set the UtteranceStart + //now set the SequenceStart bool valueChanged = false; for (int j = 0; j < numRows; j++) { if (numResetLeft[j]-- > 0) { - m_boundaryInfo(j, i) = SENTENCE_BEGIN; + m_boundaryInfo(j, i) = SEQUENCE_START; valueChanged = true; } } if (valueChanged) { - m_shiftedMinibatchPackingFlag[i] |= MinibatchPackingFlag::UtteranceStart; + m_shiftedMinibatchPackingFlag[i] |= MinibatchPackingFlag::SequenceStart; } } @@ -220,11 +220,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(timeIdxInSeq >= 0); if ((timeIdxInSeq - timeStep) >= 0) { - if (minibatchPackingFlag & MinibatchPackingFlag::UtteranceStartOrNoLabel) + if (minibatchPackingFlag & MinibatchPackingFlag::SequenceStartOrNoInput) { for (int i = 0; i < mNbr; i++) { - if (colBegin(i, 0) != SENTENCE_BEGIN && colBegin(i, 0) != NO_LABELS) + if (colBegin(i, 0) != SEQUENCE_START && colBegin(i, 0) != NO_INPUT) { Matrix to = inputGradientValues.ColumnSlice((timeIdxInSeq - timeStep)*mNbr + i, 1); Matrix frm= gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); @@ -295,13 +295,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix out = functionValues.ColumnSlice(timeIdxInSeq * mNbr, mNbr); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()) ; - if (minibatchPackingFlag & MinibatchPackingFlag::UtteranceStartOrNoLabel) + if (minibatchPackingFlag & MinibatchPackingFlag::SequenceStartOrNoInput) { for (int i = 0; i < mNbr; i ++) { out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + i,1); - if (colBegin(i,0) == SENTENCE_BEGIN) + if (colBegin(i,0) == SEQUENCE_START) { out.SetValue(initStateValue); } @@ -552,36 +552,36 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (int i = minibatchPackingFlag->size()-1; i <=0; i--) { - if ((*minibatchPackingFlag)[i] & MinibatchPackingFlag::UtteranceEndOrNoLabel) + if ((*minibatchPackingFlag)[i] & MinibatchPackingFlag::SequenceEndOrNoInput) { - //we set timeStep-1 elements following it to be UtteranceStart until met NoLabel + //we set timeStep-1 elements following it to be SequenceStart until met NoInput for (int j = 0; j < numRows; j++) { - if ((*seg)(j, i) == SENTENCE_END) + if ((*seg)(j, i) == SEQUENCE_END) { numResetLeft[j] = m_timeStep; } - else if ((*seg)(j, i) == NO_LABELS) + else if ((*seg)(j, i) == NO_INPUT) { numResetLeft[j] = 0; } } } - //now set the UtteranceEnd + //now set the SequenceEnd bool valueChanged = false; for (int j = 0; j < numRows; j++) { if (numResetLeft[j]-- > 0) { - m_boundaryInfo(j, i) = SENTENCE_END; + m_boundaryInfo(j, i) = SEQUENCE_END; valueChanged = true; } } if (valueChanged) { - m_shiftedMinibatchPackingFlag[i] |= MinibatchPackingFlag::UtteranceEnd; + m_shiftedMinibatchPackingFlag[i] |= MinibatchPackingFlag::SequenceEnd; } } @@ -623,11 +623,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { assert(timeIdxInSeq >= 0); if ((timeIdxInSeq + timeStep) < gradientValues.GetNumCols()) { - if (minibatchPackingFlag & MinibatchPackingFlag::UtteranceEndOrNoLabel) + if (minibatchPackingFlag & MinibatchPackingFlag::SequenceEndOrNoInput) { for (int i = 0; i < mNbr; i++) { - if (colFlag(i, 0) != SENTENCE_END && colFlag(i, 0) != NO_LABELS) + if (colFlag(i, 0) != SEQUENCE_END && colFlag(i, 0) != NO_INPUT) { Matrix to = inputGradientValues.ColumnSlice((timeIdxInSeq + timeStep)*mNbr + i, 1); Matrix frm = gradientValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); @@ -726,13 +726,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix out = functionValues.ColumnSlice(timeIdxInSeq * mNbr, mNbr); Matrix inp((DEVICEID_TYPE)functionValues.GetDeviceId()); - if (minibatchPackingFlag & MinibatchPackingFlag::UtteranceEndOrNoLabel) + if (minibatchPackingFlag & MinibatchPackingFlag::SequenceEndOrNoInput) { for (int i = 0; i < mNbr; i++) { out = functionValues.ColumnSlice(timeIdxInSeq * mNbr + i, 1); - if (colFlag(i, 0) == SENTENCE_END) + if (colFlag(i, 0) == SEQUENCE_END) { out.SetValue(initStateValue); } @@ -1311,7 +1311,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } /** - get the segmentation information, SENTENECE_BEGIN, SENTENCE_MIDDLE, NO_LABELS + get the segmentation information, SENTENECE_BEGIN, SEQUENCE_MIDDLE, NO_INPUT for time at t and stream of streamid */ int GetSegInfo(size_t t, size_t streamid) @@ -1345,7 +1345,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { for (int t = nT - m_samplesInRecurrentStep + i; t >= 0; t -= m_samplesInRecurrentStep) { - if (GetSegInfo(t, i) == SENTENCE_MIDDLE) + if (GetSegInfo(t, i) == SEQUENCE_MIDDLE) { mLastOutput.ColumnSlice(i, 1).SetValue(FunctionValues().ColumnSlice(t, 1)); mLastState.ColumnSlice(i, 1).SetValue(m_State.ColumnSlice(t, 1)); @@ -1441,7 +1441,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { /** Prepare history for LSTMnode - This function returns state and output from the previous time instance. For recurrent network, the initial state needs to be set in the case of sentence begining, which is carried over from sentenceBegin. In case of sentence begining, the state activity is set to an initial value. The sentenceBegin has element of SENTENCE_BEGIN, SENTENCE_MIDDLE and NO_LABELS, which are 0, 1, and -1, respectively. + This function returns state and output from the previous time instance. For recurrent network, the initial state needs to be set in the case of sentence begining, which is carried over from sentenceBegin. In case of sentence begining, the state activity is set to an initial value. The sentenceBegin has element of SEQUENCE_START, SEQUENCE_MIDDLE and NO_INPUT, which are 0, 1, and -1, respectively. To compute the initial value, we use prevState = sentenceBegin * pastActivity + ~sentenceBegin * initialStateValue and ~sentenceBegin is computed as -1*(sentenceBegin - 1), assuming that sentenceBegin is either 0 or 1. For example, when sentenceBegin == 1, ~sentenceBegin == 0. @@ -1480,8 +1480,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { colSeg.Resize(nStream, nStream); /// will reset to 0 if sentence begining at a posiiton is 0 /// will keep the output if it is not the sentence begining - colBegin.InplaceTruncateBottom(SENTENCE_BEGIN); - colBegin.InplaceTruncateTop(SENTENCE_MIDDLE); + colBegin.InplaceTruncateBottom(SEQUENCE_START); + colBegin.InplaceTruncateTop(SEQUENCE_MIDDLE); colSeg.SetDiagonalValue(colBegin); Matrix newPrevOutput(colBegin.GetDeviceId()); @@ -1529,8 +1529,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t utt_id = 0; utt_id < nsamples; utt_id++) { /// if uses errors from future minibatch - if ((GetSegInfo(timeIdxInSeq, utt_id) == SENTENCE_MIDDLE && utt_t == total_utt_t - 1) /// last time - || (utt_t < total_utt_t - 1 && GetSegInfo(timeIdxInSeq, utt_id) == SENTENCE_MIDDLE && GetSegInfo(timeIdxInSeq + nsamples, utt_id) == NO_LABELS) /// future observation is no observation + if ((GetSegInfo(timeIdxInSeq, utt_id) == SEQUENCE_MIDDLE && utt_t == total_utt_t - 1) /// last time + || (utt_t < total_utt_t - 1 && GetSegInfo(timeIdxInSeq, utt_id) == SEQUENCE_MIDDLE && GetSegInfo(timeIdxInSeq + nsamples, utt_id) == NO_INPUT) /// future observation is no observation ) { error.ColumnSlice(utt_id, 1) += obs_error_from_future_minibatch.ColumnSlice(utt_id, 1); @@ -1542,9 +1542,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix colBegin(sentenceBegin->GetDeviceId()); colBegin.SetValue(sentenceBegin->ColumnSlice(utt_t, 1)); - colBegin.InplaceTruncateBottom(NO_LABELS); - colBegin.InplaceTruncateTop(SENTENCE_BEGIN); - colBegin += fabs((ElemType)NO_LABELS); /// raise this so that -1 -> 0 and therefore + colBegin.InplaceTruncateBottom(NO_INPUT); + colBegin.InplaceTruncateTop(SEQUENCE_START); + colBegin += fabs((ElemType)NO_INPUT); /// raise this so that -1 -> 0 and therefore Matrix colSeg(colBegin.GetDeviceId()); colSeg.Resize(nsamples, nsamples); colSeg.SetDiagonalValue(colBegin); @@ -1572,8 +1572,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { colBegin.SetValue(sentenceBegin->ColumnSlice(utt_t, 1)); /// will reset to 0 if sentence begining at a posiiton is 0 /// will keep the output if it is not the sentence begining - colBegin.InplaceTruncateBottom(SENTENCE_BEGIN); - colBegin.InplaceTruncateTop(SENTENCE_MIDDLE); + colBegin.InplaceTruncateBottom(SEQUENCE_START); + colBegin.InplaceTruncateTop(SEQUENCE_MIDDLE); Matrix colSeg(colBegin.GetDeviceId()); colSeg.Resize(nsamples, nsamples); @@ -1732,13 +1732,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { ElemType initStateValue = m_DefaultState; Matrix boundary(m_deviceId); boundary.Resize(1, nT); - boundary.SetValue(SENTENCE_MIDDLE); - boundary.ColumnSlice(0, 1).SetValue(SENTENCE_BEGIN); + boundary.SetValue(SEQUENCE_MIDDLE); + boundary.ColumnSlice(0, 1).SetValue(SEQUENCE_START); vector minibatchPackingFlag; minibatchPackingFlag.resize(nT); std::fill(minibatchPackingFlag.begin(), minibatchPackingFlag.end(), MinibatchPackingFlag::None); - minibatchPackingFlag[1] = MinibatchPackingFlag::UtteranceStart; + minibatchPackingFlag[1] = MinibatchPackingFlag::SequenceStart; ComputationNode::ResetBound(&boundary, &minibatchPackingFlag); f0 = Inputs(0)->FunctionValues(); diff --git a/MachineLearning/CNTK/SimpleEvaluator.h b/MachineLearning/CNTK/SimpleEvaluator.h index 96bc60012a98..4394e79dad25 100644 --- a/MachineLearning/CNTK/SimpleEvaluator.h +++ b/MachineLearning/CNTK/SimpleEvaluator.h @@ -969,7 +969,7 @@ namespace Microsoft { evalnet->SetActualMiniBatchSize(1, FeatureNodes); dataReader->SetSentenceSegBatch(evalnet->SentenceBoundary(), evalnet->MinibatchPackingFlags()); /// need to set the sentence begining segmentation info - evalnet->SentenceBoundary().SetValue(SENTENCE_BEGIN); + evalnet->SentenceBoundary().SetValue(SEQUENCE_START); for (itdx = 0; itdx < maxSize; itdx++) { @@ -979,7 +979,7 @@ namespace Microsoft { if (itdx > 0) { /// state need to be carried over from past time instance - evalnet->SentenceBoundary().SetValue(SENTENCE_MIDDLE); + evalnet->SentenceBoundary().SetValue(SEQUENCE_MIDDLE); } PreComputeActivityAtTime(itdx); @@ -1131,7 +1131,7 @@ namespace Microsoft { ElemType best_score = -numeric_limits::infinity(); ElemType best_score_so_far = -numeric_limits::infinity(); - evalnet->SentenceBoundary().SetValue(SENTENCE_BEGIN); + evalnet->SentenceBoundary().SetValue(SEQUENCE_START); for (itdx = 0; itdx < maxMbSize; itdx++) { @@ -1141,7 +1141,7 @@ namespace Microsoft { if (itdx > 0) { /// state need to be carried over from past time instance - evalnet->SentenceBoundary().SetValue(SENTENCE_MIDDLE); + evalnet->SentenceBoundary().SetValue(SEQUENCE_MIDDLE); } PreComputeActivityAtTime(itdx); diff --git a/MachineLearning/CNTK/SynchronousExecutionEngine.h b/MachineLearning/CNTK/SynchronousExecutionEngine.h index efda3bdfc987..af32de27e77d 100644 --- a/MachineLearning/CNTK/SynchronousExecutionEngine.h +++ b/MachineLearning/CNTK/SynchronousExecutionEngine.h @@ -1,876 +1,876 @@ -// -// -// Copyright (c) Microsoft Corporation. All rights reserved. -// -// - -#pragma once - -#include "IExecutionEngine.h" -#include "ComputationNetwork.h" -#include "fileutil.h" // for fexists() - -namespace Microsoft { namespace MSR { namespace CNTK { - -// SynchronousNodeEvaluator -// Process the Network Description Language into a Computation Network useable -// by SynchronousExecutionEngine. -template -class SynchronousNodeEvaluator : public NDLNodeEvaluator -{ -public: - // Constructor - create evaluator - SynchronousNodeEvaluator(ComputationNetwork& cn) : m_net(cn) - { } - - // Evaluate - evaluate a node and translate into underlying - // node - node we are evaluating - // baseName - base name for all symbols at this level - // pass - NDLPass through the evaluation (0-initial, 1-resolve variables, 2-final) - virtual void Evaluate(NDLNode* node, const wstring& baseName, const NDLPass pass) - { - // constants don't need to be evaluated, they just translate into numbers... - if (node->GetType() == ndlTypeConstant - || node->GetType() == ndlTypeArray) - return; - - // setup the node parameters, where they start in the parameter list, and how many there are - // this is needed for the ndlPassResolve step to hookup all the inputs - int nodeParamStart = 0; - int nodeParamCount = 0; - - // get the parameters - std::vector*> parameter = node->GetParameters(); - - // get the name for the symbol to be used by CN nodes - std::wstring name = msra::strfun::utf16(node->GetName()); - if (!baseName.empty()) - { - name = baseName + L"." + name; - } - - std::wstring cnNodeType = msra::strfun::utf16(node->GetValue()); - - ComputationNodePtr nodePtr = nullptr; - - // get the node pointer for the node, should be stored in the EvalValue; - if (pass > ndlPassInitial) - { - nodePtr = (ComputationNodePtr)node->GetEvalValue(); - if (nodePtr == nullptr) - { - nodePtr = (ComputationNodePtr)m_net.GetNodeFromName(name); - node->SetEvalValue(nodePtr); - } - } - - if (InputValue::TypeName() == cnNodeType) - { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - - // first look for this node already existing in the network - if (m_net.NodeNameExist(name)) - nodePtr = m_net.GetNodeFromName(name); - else - nodePtr = m_net.CreateInputNode(name, rows, cols); - } - } - else if (InputValue::SparseTypeName() == cnNodeType) - { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - - // first look for this node already existing in the network - if (m_net.NodeNameExist(name)) - nodePtr = m_net.GetNodeFromName(name); - else - nodePtr = m_net.CreateSparseInputNode(name, rows, cols); - } - } - else if (cnNodeType == L"ImageInput") - { - if (parameter.size() < 3 || parameter.size() > 4) - RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); - size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); - size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); - size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; - - nodePtr = m_net.CreateInputNode(name, imageWidth, imageHeight, imageChannels, numImages); - } - } - else if (cnNodeType == L"SparseImageInput") - { - if (parameter.size() < 3 || parameter.size() > 4) - RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); - size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); - size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); - size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; - - nodePtr = m_net.CreateSparseInputNode(name, imageWidth, imageHeight, imageChannels, numImages); - } - } - else if (LearnableParameter::TypeName() == cnNodeType) - { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - - bool needGradient = node->GetOptionalParameter("needGradient", "true"); - - nodePtr = m_net.CreateLearnableParameter(name, rows, cols); - - nodePtr->NeedGradient() = needGradient; - } - else if (pass == ndlPassFinal) - { - static int randomSeed = 1; - std::string initString = node->GetOptionalParameter("init", "uniform"); - ElemType initValueScale = node->GetOptionalParameter("initValueScale", "1"); - ElemType value = node->GetOptionalParameter("value", "0"); - - msra::strfun::tolower_ascii (initString); - if (initString == "fixedvalue") - nodePtr->FunctionValues().SetValue(value); - else if (initString == "uniform") - m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale); - else if (initString == "gaussian") - m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale); - else if (initString == "fromfile") - { - std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); - if (initFromFilePath == "") - RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); - if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') - // remove the opening and closing double quotes - initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); - if(!fexists(initFromFilePath)) - RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath.c_str()); - m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); - } - else - RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); - } - } - else if (SparseLearnableParameter::TypeName() == cnNodeType) - { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - - bool needGradient = node->GetOptionalParameter("needGradient", "true"); - - nodePtr = m_net.CreateSparseLearnableParameter(name, rows, cols); - - nodePtr->NeedGradient() = needGradient; - } - else if (pass == ndlPassFinal) - { - static int randomSeed = 1; - std::string initString = node->GetOptionalParameter("init", "uniform"); - ElemType initValueScale = node->GetOptionalParameter("initValueScale", "1"); - ElemType value = node->GetOptionalParameter("value", "0"); - - msra::strfun::tolower_ascii(initString); - if (initString == "fixedvalue") - nodePtr->FunctionValues().SetValue(value); - else if (initString == "uniform") - m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale); - else if (initString == "gaussian") - m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale); - else if (initString == "fromfile") - { - std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); - if (initFromFilePath == "") - RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); - if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') - // remove the opening and closing double quotes - initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); - if(!fexists(initFromFilePath)) - RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath.c_str()); - m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); - } - else - RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); - } - } - else if (cnNodeType == L"Constant") - { - if (parameter.size() != 1) - RuntimeError("Constant should have 1 fixed parameter [val] and two optional parameters [rows=[1|yourvalue], cols=[1|yourvalue]]."); - - if (pass == ndlPassInitial) - { - size_t rows = node->GetOptionalParameter("rows", "1"); - size_t cols = node->GetOptionalParameter("cols", "1"); - - nodePtr = m_net.CreateLearnableParameter(name, rows, cols); - nodePtr->NeedGradient() = false; - } - else if (pass == ndlPassFinal || nodePtr->FunctionValues().GetNumElements() != 0) - { - ElemType val = parameter[0]->GetScalar(); - nodePtr->FunctionValues().SetValue(val); - } - } - else if (cnNodeType == RowSliceNode::TypeName()) - { - if (parameter.size() != 3) - RuntimeError("RowSlice should have three parameters. Usage: RowSlice(startRowIndex, numRows, origNodeName."); - - nodeParamCount = 1; - nodeParamStart = 2; - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t start_index = ((NDLNode*)params[0])->GetScalar(); - size_t num_rows = ((NDLNode*)params[1])->GetScalar(); - - bool needGradient = node->GetOptionalParameter("needGradient", "false"); - nodePtr = m_net.RowSlice(NULL, start_index, num_rows, name); - nodePtr->NeedGradient() = needGradient; - } - } - else if (cnNodeType == RowRepeatNode::TypeName()) - { - if (parameter.size() != 2) - RuntimeError("RowRepeat should have two parameters. Usage: RowRepeat(origNodeName, numRepeats."); - - nodeParamCount = 1; - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t num_repeat = ((NDLNode*)params[1])->GetScalar(); - - bool needGradient = node->GetOptionalParameter("needGradient", "false"); - nodePtr = m_net.RowRepeat(NULL, num_repeat, name); - nodePtr->NeedGradient() = needGradient; - } - } - else if (cnNodeType == ReshapeNode::TypeName()) - { - if (parameter.size() < 2 || parameter.size() > 5) - RuntimeError("Reshape should have two to five parameters. Usage: Reshape(origNodeName, numRows, [imageWidth=], [imageHeight=], [imageChannels=]."); - - nodeParamCount = 1; - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t num_rows = ((NDLNode*)params[1])->GetScalar(); - size_t img_width = node->GetOptionalParameter("imageWidth", "0"); - size_t img_height = node->GetOptionalParameter("imageHeight", "0"); - size_t img_channels = node->GetOptionalParameter("imageChannels", "0"); - - bool needGradient = node->GetOptionalParameter("needGradient", "false"); - nodePtr = m_net.Reshape(NULL, num_rows, img_width, img_height, img_channels, name); - nodePtr->NeedGradient() = needGradient; - } - } - else if (cnNodeType == PastValueNode::TypeName() || - cnNodeType == FutureValueNode::TypeName()) - { - if (parameter.size() <2 || parameter.size() >3) - RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [cols], m, [timeStep=1, defaultPastValue=0.1])."); - - nodeParamCount = 1; - nodeParamStart = parameter.size() > 2?2:1; - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - // if we have three parameters the second is columns - size_t cols = parameter.size() > 2 ? ((NDLNode*)params[1])->GetScalar() : 1; - - bool needGradient = node->GetOptionalParameter("needGradient", "false"); - float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1"); - - //for backward compatibility we check timeStep first - size_t timeStep = node->GetOptionalParameter("timeStep", "1"); - if (timeStep == 1) - { - timeStep = node->GetOptionalParameter("delayTime", "1"); - } - - if (cnNodeType == PastValueNode::TypeName()) - { - nodePtr = m_net.PastValue(NULL, defaultHiddenActivity, rows, cols, name); - ((PastValueNode*)nodePtr)->SetTimeStep(timeStep); - } - else - { - nodePtr = m_net.FutureValue(NULL, defaultHiddenActivity, rows, cols, name); - ((FutureValueNode*)nodePtr)->SetTimeStep(timeStep); - } - - nodePtr->NeedGradient() = needGradient; - } - } - else if (cnNodeType == ConvolutionNode::TypeName()) - { - if (parameter.size() != 7) - RuntimeError("%ls should have 7 fixed parameters[weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue]].", cnNodeType.c_str()); - - // setup the parameter position of children so we can hook them up later - nodeParamCount = 2; - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - int id = 2; // skip weightNode and inputValueNode - - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, id, parameter.size()-id, pass); - id = 0; // reset counter because the params array starts at zero - size_t kernelWidth = ((NDLNode*)params[id++])->GetScalar(); - size_t kernelHeight = ((NDLNode*)params[id++])->GetScalar(); - size_t outputChannels = ((NDLNode*)params[id++])->GetScalar(); - size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); - size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); - - assert (id == 5); - - //optional - bool zeroPadding = node->GetOptionalParameter("zeroPadding", "false"); - size_t maxTempMemSizeInSamples = node->GetOptionalParameter("maxTempMemSizeInSamples", "0"); - - - nodePtr = m_net.Convolution(NULL, NULL, kernelWidth, kernelHeight, outputChannels, - horizontalSubsample, verticalSubsample, zeroPadding, name, maxTempMemSizeInSamples); - } - } - else if (cnNodeType == MaxPoolingNode::TypeName()) - { - if (parameter.size() != 5) - RuntimeError("%ls should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType.c_str()); - - // setup the parameter position of children so we can hook them up later - nodeParamCount = 1; - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - int id = 1; // skip inputValueNode - - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass); - id = 0; // reset counter because the params array starts at zero - size_t windowWidth = ((NDLNode*)params[id++])->GetScalar(); - size_t windowHeight = ((NDLNode*)params[id++])->GetScalar(); - size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); - size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); - - assert (id == 4); - - nodePtr = m_net.MaxPooling(NULL, /*inputWidth,inputHeight, channels,*/windowWidth, windowHeight, - horizontalSubsample, verticalSubsample, name); - } - } - else if (cnNodeType == AveragePoolingNode::TypeName()) - { - if (parameter.size() != 5) - RuntimeError("%ls should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType.c_str()); - - // setup the parameter position of children so we can hook them up later - nodeParamCount = 1; - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - int id = 1; // skip inputValueNode - - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass); - id = 0; // reset counter because the params array starts at zero - size_t windowWidth = ((NDLNode*)params[id++])->GetScalar(); - size_t windowHeight = ((NDLNode*)params[id++])->GetScalar(); - size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); - size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); - - assert (id == 4); - - nodePtr = m_net.AveragePooling(NULL, /*inputWidth,inputHeight, channels,*/windowWidth, windowHeight, - horizontalSubsample, verticalSubsample, name); - } - } - else - { - - // setup the variables for node parameter processing - nodeParamCount = parameter.size(); // all parameters are nodes in standard nodes - nodeParamStart = 0; - - if (pass == ndlPassInitial) - { - nodePtr = m_net.CreateComputationNode(node->GetValue(), name); - } - } - - switch (pass) - { - case ndlPassInitial: - node->SetEvalValue(nodePtr); - // evaluate parameters - EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); - break; - case ndlPassResolve: - { - std::vector inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); - - if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs - { - std::vector inputNodes; - inputNodes.resize(inputs.size()); - for (int i = 0; i < inputs.size(); i++) - inputNodes[i] = ComputationNodePtr(inputs[i]); - - nodePtr->AttachInputs(inputNodes); - } - else - { - switch (inputs.size()) - { - case 1: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0])); - break; - case 2: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1])); - break; - case 3: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2])); - break; - case 4: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3])); - break; - case 5: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4])); - break; - case 6: - nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5])); - break; - default: - if (nodeParamCount > 0) - RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); - break; - } - } - // process common optional parameters (like "tag"); - ProcessOptionalParameters(node); - break; - } - case ndlPassFinal: - break; - } - } - -#ifdef LATER - // EvaluateDotName - Evaluate a dot name and resolve to target node - // node - NDLNode of the script - // nodeParam - NDLNode parameter we are evaluating - // baseName - name of the base node - // pass - which pass through the NDL nodes - // returns: the node that is the evaluated parameter - virtual NDLNode* EvaluateDotName(NDLNode* node, NDLNode* nodeParam, const std::wstring& baseNameP, const NDLPass pass) - - { - if (pass > ndlPassInitial && evaluateNode) - { - std::string name = nodeParam->GetName(); - std::wstring wname = msra::strfun::utf16(name); - if (nodeParam->GetType() == ndlTypeDotParameter) - { - // When we see a variable of the form "A.B" in a macro, we need to resolve it to an actual node, by first constructing it's - // fully-qualified name. There are 2 possibilities: - // 1) "A" was defined locally within the macro. In this case, we must find the fully-qualified name of the node that this macro - // call is being assigned to (eg, "C" in the example "C=Macro(X)"), and concatenate it's name with "A.B" (eg, "C.A.B"). - // 2) "A" was passed in as a parameter to a macro. In this case, we must find the fully-qualified name of the node that - // was passed in as "A", and replace the "A" and "A.B" with this name. - - // Consider the following example: - // NdlBLob=[ - // P=MacroCall1(...) - // C=MacroCall2(P) - // ] - // # MacroDefinition - // MacroCall2(X) - // { - // A=MacroCall3(...) - // D=Times(A.B,X.B)} - // } - // - - // In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately. - // Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B". - // We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName"). - - std::size_t firstDotPos = name.find_first_of("."); - if (firstDotPos == std::string::npos) - { - LogicError("nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str()); - } - - std::string nameBeforeDot = name.substr(0, firstDotPos); - std::string nameAfterDot = name.substr(firstDotPos + 1, name.size() - (firstDotPos + 1)); - - // look up if "nameBeforeDot" was a parameter to the macro. - NDLNode* resolvedParam = nodeParam->GetParentScript()->FindSymbol(nameBeforeDot); - if (resolvedParam != nullptr && resolvedParam->GetType() == ndlTypeMacroCall) - { - // if "nameBeforeDot" was a parameter to the macro, builds it's fully qualified name by - // replacing "nameBeforeDot" with the fully qualified name of the node passed in as the parameter. - NDLScript* parentScript = resolvedParam->GetParentScript(); - baseName = parentScript->GetBaseName(); - std::wstring resolvedParamName = msra::strfun::utf16(resolvedParam->GetName()); - wname = baseName.empty() ? - resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot) : - baseName + L"." + resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot); - } - else if (!baseName.empty()) - { - // else, "nameBeforeDot" wasn't a parameter to the macro, so treat it as a local variable. - wname = baseName + L"." + wname; - } - } - else if (!baseName.empty()) - { - wname = baseName + L"." + wname; - } - - // fully qualified names can be looked up in the model - if (m_net.NodeNameExist(wname)) - { - void* np = (void*)m_net.GetNodeFromName(wname); - nodeParam->SetEvalValue(np); - } - // NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL - // when the actual full node reference that the computational network uses would be L1.BFF.FF.P, so that is what CN sees - // can we do the normal find symbol here to allow abbreviated node references? - - // if we still didn't get a value, throw an error - if (nodeParam->GetEvalValue() == nullptr) - { - LogicError("Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str()); - } - } - return nodeParam; - } -#endif - - // EvaluateParameter - Evaluate a parameter of a call - // node - NDLNode of the script - // nodeParam - NDLNode parameter we are evaluating - // baseName - name of the base node - // pass - which pass through the NDL nodes - // returns: the node that is the evaluated parameter - virtual NDLNode* EvaluateParameter(NDLNode* node, NDLNode* nodeParam, const std::wstring& baseNameP, const NDLPass pass ) - { - // get the parent script that includes the symbol table we are interested in - NDLScript* script = node->GetParentScript(); - wstring baseName = baseNameP; - if (script == NULL) - { - std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); - LogicError("no script for a parameter node in call to %ls\n", name.c_str()); - } - - // evaluate the parameter if we haven't yet, or if we are in the resolve pass (need to set the inputs) - bool evaluateNode = nodeParam->GetEvalValue() == NULL || pass == ndlPassResolve; - switch (nodeParam->GetType()) - { - // if the node is a parameter then look it up in the symbol table - case ndlTypeUndetermined: // an undetermined parameter needs to be looked up again in the symbol table - case ndlTypeParameter: - { - // lookup the parameter - NDLNode* nodeResolve = script->FindSymbol(nodeParam->GetName()); - - // if we have resolved the name, no need to continue evaluation - if (!(pass == ndlPassResolve && nodeResolve && nodeParam->GetEvalValue() == nullptr)) - { - break; - } - if (pass > ndlPassInitial && evaluateNode && nodeResolve) - { - std::string name = nodeResolve->GetName(); - // we need to start from the parent script, because that is the namespace of the parameter being passed in - NDLScript* parentScript = nodeResolve->GetParentScript(); - nodeResolve = parentScript->FindSymbol(name); - - // if we still didn't get a value - if (nodeResolve == nullptr || nodeResolve->GetEvalValue() == nullptr) - { - // check for the fully quantified name in the computation network - // this is needed for MEL processing, since CN nodes names can be used as parameters in MEL - std::wstring wname = msra::strfun::utf16(name); - if (m_net.NodeNameExist(wname)) - { - void* np = (void*)m_net.GetNodeFromName(wname); - // if we don't have a resolve node, it's because the name didn't exist in NDL - if (!nodeResolve) - nodeResolve = nodeParam; - nodeResolve->SetEvalValue(np); - } - else - { - RuntimeError("Parameter name could not be resolved '%s'\n", name.c_str()); - } - } - } - nodeParam = nodeResolve; - break; - } - case ndlTypeFunction: - if (evaluateNode) - Evaluate(nodeParam, baseName, pass); - break; - case ndlTypeMacroCall: - if (evaluateNode) - nodeParam->EvaluateMacro(*this, baseName, pass); - break; - // constants and variables are good as is - case ndlTypeConstant: - case ndlTypeVariable: - break; - // everything else is illegal as a parameter - default: - { - std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); - RuntimeError("Invalid parameter (macro definitions and arrays not allowed), see call to %ls\n", name.c_str()); - } - break; - } - return nodeParam; - } - - // EvaluateParameters - Evaluate the parameters of a call - // node - NDLNode we are evaluating paramters for - // baseName - baseName for the current node - // nodeParamStart - starting parameter that contains a node - // nodeParamCount - ending parameter that contains a node - // pass - NDL pass we are evaluating - // returns: vector of eval pointers, which are ComputationNodePtr for CNEvaluator - virtual std::vector EvaluateParameters(NDLNode* node, const wstring& baseName, int nodeParamStart, int nodeParamCount, const NDLPass pass) - { - std::vector inputs; - std::vector*> parameter = node->GetParameters(); - ConfigArray paramString = node->GetParamString(); - - if (parameter.size() < 1) - { - return inputs; - } - if (nodeParamStart + nodeParamCount > parameter.size()) - throw logic_error("EvaluateParmeters: nodeParamters specified that do not exist"); - size_t numChildren = nodeParamCount; - for (size_t i=0; i < numChildren;++i) - { - int index = i+nodeParamStart; - NDLNode* nodeParam = parameter[index]; - std::wstring paramS = paramString[index]; - - // default base is same as current - std::wstring baseSymbol = baseName; - - NDLNode* nodeResult = EvaluateParameter(node, nodeParam, baseSymbol, pass); - // look for a prefix here and set baseName appropriately - - if (pass == ndlPassResolve) - { - void* np = nodeResult->GetEvalValue(); - assert(np != nullptr); - inputs.push_back((void*)np); - } - else if (pass == ndlPassInitial) // for initial pass we are only interested in resolved nodes (to get constant values) - { - inputs.push_back((void*)nodeResult); - } - // NOTE: in final pass inputs are always NULL - } - - // now return the vector - return inputs; - } - - // ProcessOptionalParameters - Process the optional parameters of a node - virtual void ProcessOptionalParameters(NDLNode* node) - { - vector*> params = node->GetParameters(true); // get all the optional parameters only - ComputationNode* compNode = (ComputationNode*)node->GetEvalValue(); - std::string empty; - - // loop through all the optional parameters processing them as necessary - for (NDLNode* param : params) - { - // make sure it's a "tag" optional parameter, that's all we process currently - if (_stricmp(param->GetName().c_str(), "tag")) - continue; - - std::string value = param->GetValue(); - if (!_stricmp(value.c_str(), "feature")) - { - SetOutputNode(m_net.FeatureNodes(), compNode); - } - else if (!_stricmp(value.c_str(), "label")) - { - SetOutputNode(m_net.LabelNodes(), compNode); - } - else if (!_stricmp(value.c_str(), "criteria")) - { - SetOutputNode(m_net.FinalCriterionNodes(), compNode); - } - else if (!_stricmp(value.c_str(), "multiseq")) - { - SetOutputNode(m_net.NodesReqMultiSeqHandling(), compNode); - } - else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters - { - SetOutputNode(m_net.EvaluationNodes(), compNode); - } - else if (!_stricmp(value.c_str(), "output")) - { - SetOutputNode(m_net.OutputNodes(), compNode); - } - } - - } - - // SetOutputNode - Set the output node, checks to see if it already exists first - // nodeGroup - group vector to add to - // compNode - computation node to add +// +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// + +#pragma once + +#include "IExecutionEngine.h" +#include "ComputationNetwork.h" +#include "fileutil.h" // for fexists() + +namespace Microsoft { namespace MSR { namespace CNTK { + +// SynchronousNodeEvaluator +// Process the Network Description Language into a Computation Network useable +// by SynchronousExecutionEngine. +template +class SynchronousNodeEvaluator : public NDLNodeEvaluator +{ +public: + // Constructor - create evaluator + SynchronousNodeEvaluator(ComputationNetwork& cn) : m_net(cn) + { } + + // Evaluate - evaluate a node and translate into underlying + // node - node we are evaluating + // baseName - base name for all symbols at this level + // pass - NDLPass through the evaluation (0-initial, 1-resolve variables, 2-final) + virtual void Evaluate(NDLNode* node, const wstring& baseName, const NDLPass pass) + { + // constants don't need to be evaluated, they just translate into numbers... + if (node->GetType() == ndlTypeConstant + || node->GetType() == ndlTypeArray) + return; + + // setup the node parameters, where they start in the parameter list, and how many there are + // this is needed for the ndlPassResolve step to hookup all the inputs + int nodeParamStart = 0; + int nodeParamCount = 0; + + // get the parameters + std::vector*> parameter = node->GetParameters(); + + // get the name for the symbol to be used by CN nodes + std::wstring name = msra::strfun::utf16(node->GetName()); + if (!baseName.empty()) + { + name = baseName + L"." + name; + } + + std::wstring cnNodeType = msra::strfun::utf16(node->GetValue()); + + ComputationNodePtr nodePtr = nullptr; + + // get the node pointer for the node, should be stored in the EvalValue; + if (pass > ndlPassInitial) + { + nodePtr = (ComputationNodePtr)node->GetEvalValue(); + if (nodePtr == nullptr) + { + nodePtr = (ComputationNodePtr)m_net.GetNodeFromName(name); + node->SetEvalValue(nodePtr); + } + } + + if (InputValue::TypeName() == cnNodeType) + { + if (parameter.size() < 1 || parameter.size() > 2) + RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t rows = ((NDLNode*)params[0])->GetScalar(); + size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; + + // first look for this node already existing in the network + if (m_net.NodeNameExist(name)) + nodePtr = m_net.GetNodeFromName(name); + else + nodePtr = m_net.CreateInputNode(name, rows, cols); + } + } + else if (InputValue::SparseTypeName() == cnNodeType) + { + if (parameter.size() < 1 || parameter.size() > 2) + RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t rows = ((NDLNode*)params[0])->GetScalar(); + size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; + + // first look for this node already existing in the network + if (m_net.NodeNameExist(name)) + nodePtr = m_net.GetNodeFromName(name); + else + nodePtr = m_net.CreateSparseInputNode(name, rows, cols); + } + } + else if (cnNodeType == L"ImageInput") + { + if (parameter.size() < 3 || parameter.size() > 4) + RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); + size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); + size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); + size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; + + nodePtr = m_net.CreateInputNode(name, imageWidth, imageHeight, imageChannels, numImages); + } + } + else if (cnNodeType == L"SparseImageInput") + { + if (parameter.size() < 3 || parameter.size() > 4) + RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); + size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); + size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); + size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; + + nodePtr = m_net.CreateSparseInputNode(name, imageWidth, imageHeight, imageChannels, numImages); + } + } + else if (LearnableParameter::TypeName() == cnNodeType) + { + if (parameter.size() < 1 || parameter.size() > 2) + RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t rows = ((NDLNode*)params[0])->GetScalar(); + size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; + + bool needGradient = node->GetOptionalParameter("needGradient", "true"); + + nodePtr = m_net.CreateLearnableParameter(name, rows, cols); + + nodePtr->NeedGradient() = needGradient; + } + else if (pass == ndlPassFinal) + { + static int randomSeed = 1; + std::string initString = node->GetOptionalParameter("init", "uniform"); + ElemType initValueScale = node->GetOptionalParameter("initValueScale", "1"); + ElemType value = node->GetOptionalParameter("value", "0"); + + msra::strfun::tolower_ascii (initString); + if (initString == "fixedvalue") + nodePtr->FunctionValues().SetValue(value); + else if (initString == "uniform") + m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale); + else if (initString == "gaussian") + m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale); + else if (initString == "fromfile") + { + std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); + if (initFromFilePath == "") + RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); + if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') + // remove the opening and closing double quotes + initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); + if(!fexists(initFromFilePath)) + RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath.c_str()); + m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); + } + else + RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); + } + } + else if (SparseLearnableParameter::TypeName() == cnNodeType) + { + if (parameter.size() < 1 || parameter.size() > 2) + RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t rows = ((NDLNode*)params[0])->GetScalar(); + size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; + + bool needGradient = node->GetOptionalParameter("needGradient", "true"); + + nodePtr = m_net.CreateSparseLearnableParameter(name, rows, cols); + + nodePtr->NeedGradient() = needGradient; + } + else if (pass == ndlPassFinal) + { + static int randomSeed = 1; + std::string initString = node->GetOptionalParameter("init", "uniform"); + ElemType initValueScale = node->GetOptionalParameter("initValueScale", "1"); + ElemType value = node->GetOptionalParameter("value", "0"); + + msra::strfun::tolower_ascii(initString); + if (initString == "fixedvalue") + nodePtr->FunctionValues().SetValue(value); + else if (initString == "uniform") + m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale); + else if (initString == "gaussian") + m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale); + else if (initString == "fromfile") + { + std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", ""); + if (initFromFilePath == "") + RuntimeError("initFromFilePath must be set when using \"fromFile\" initialization method"); + if(initFromFilePath[0] == '\"' && initFromFilePath[initFromFilePath.size()-1] == '\"') + // remove the opening and closing double quotes + initFromFilePath = initFromFilePath.substr(1, initFromFilePath.size()-2); + if(!fexists(initFromFilePath)) + RuntimeError("File pointed to by initFromFilePath does not exist: %s", initFromFilePath.c_str()); + m_net.InitLearnableParametersFromFile(nodePtr, initFromFilePath); + } + else + RuntimeError("init must be one of the values of [uniform|gaussian|fixedvalue]"); + } + } + else if (cnNodeType == L"Constant") + { + if (parameter.size() != 1) + RuntimeError("Constant should have 1 fixed parameter [val] and two optional parameters [rows=[1|yourvalue], cols=[1|yourvalue]]."); + + if (pass == ndlPassInitial) + { + size_t rows = node->GetOptionalParameter("rows", "1"); + size_t cols = node->GetOptionalParameter("cols", "1"); + + nodePtr = m_net.CreateLearnableParameter(name, rows, cols); + nodePtr->NeedGradient() = false; + } + else if (pass == ndlPassFinal || nodePtr->FunctionValues().GetNumElements() != 0) + { + ElemType val = parameter[0]->GetScalar(); + nodePtr->FunctionValues().SetValue(val); + } + } + else if (cnNodeType == RowSliceNode::TypeName()) + { + if (parameter.size() != 3) + RuntimeError("RowSlice should have three parameters. Usage: RowSlice(startRowIndex, numRows, origNodeName."); + + nodeParamCount = 1; + nodeParamStart = 2; + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t start_index = ((NDLNode*)params[0])->GetScalar(); + size_t num_rows = ((NDLNode*)params[1])->GetScalar(); + + bool needGradient = node->GetOptionalParameter("needGradient", "false"); + nodePtr = m_net.RowSlice(NULL, start_index, num_rows, name); + nodePtr->NeedGradient() = needGradient; + } + } + else if (cnNodeType == RowRepeatNode::TypeName()) + { + if (parameter.size() != 2) + RuntimeError("RowRepeat should have two parameters. Usage: RowRepeat(origNodeName, numRepeats."); + + nodeParamCount = 1; + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t num_repeat = ((NDLNode*)params[1])->GetScalar(); + + bool needGradient = node->GetOptionalParameter("needGradient", "false"); + nodePtr = m_net.RowRepeat(NULL, num_repeat, name); + nodePtr->NeedGradient() = needGradient; + } + } + else if (cnNodeType == ReshapeNode::TypeName()) + { + if (parameter.size() < 2 || parameter.size() > 5) + RuntimeError("Reshape should have two to five parameters. Usage: Reshape(origNodeName, numRows, [imageWidth=], [imageHeight=], [imageChannels=]."); + + nodeParamCount = 1; + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t num_rows = ((NDLNode*)params[1])->GetScalar(); + size_t img_width = node->GetOptionalParameter("imageWidth", "0"); + size_t img_height = node->GetOptionalParameter("imageHeight", "0"); + size_t img_channels = node->GetOptionalParameter("imageChannels", "0"); + + bool needGradient = node->GetOptionalParameter("needGradient", "false"); + nodePtr = m_net.Reshape(NULL, num_rows, img_width, img_height, img_channels, name); + nodePtr->NeedGradient() = needGradient; + } + } + else if (cnNodeType == PastValueNode::TypeName() || + cnNodeType == FutureValueNode::TypeName()) + { + if (parameter.size() <2 || parameter.size() >3) + RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [cols], m, [timeStep=1, defaultPastValue=0.1])."); + + nodeParamCount = 1; + nodeParamStart = parameter.size() > 2?2:1; + + if (pass == ndlPassInitial) + { + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); + size_t rows = ((NDLNode*)params[0])->GetScalar(); + // if we have three parameters the second is columns + size_t cols = parameter.size() > 2 ? ((NDLNode*)params[1])->GetScalar() : 1; + + bool needGradient = node->GetOptionalParameter("needGradient", "false"); + float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1"); + + //for backward compatibility we check timeStep first + size_t timeStep = node->GetOptionalParameter("timeStep", "1"); + if (timeStep == 1) + { + timeStep = node->GetOptionalParameter("delayTime", "1"); + } + + if (cnNodeType == PastValueNode::TypeName()) + { + nodePtr = m_net.PastValue(NULL, defaultHiddenActivity, rows, cols, name); + ((PastValueNode*)nodePtr)->SetTimeStep(timeStep); + } + else + { + nodePtr = m_net.FutureValue(NULL, defaultHiddenActivity, rows, cols, name); + ((FutureValueNode*)nodePtr)->SetTimeStep(timeStep); + } + + nodePtr->NeedGradient() = needGradient; + } + } + else if (cnNodeType == ConvolutionNode::TypeName()) + { + if (parameter.size() != 7) + RuntimeError("%ls should have 7 fixed parameters[weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue]].", cnNodeType.c_str()); + + // setup the parameter position of children so we can hook them up later + nodeParamCount = 2; + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + int id = 2; // skip weightNode and inputValueNode + + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, id, parameter.size()-id, pass); + id = 0; // reset counter because the params array starts at zero + size_t kernelWidth = ((NDLNode*)params[id++])->GetScalar(); + size_t kernelHeight = ((NDLNode*)params[id++])->GetScalar(); + size_t outputChannels = ((NDLNode*)params[id++])->GetScalar(); + size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); + size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); + + assert (id == 5); + + //optional + bool zeroPadding = node->GetOptionalParameter("zeroPadding", "false"); + size_t maxTempMemSizeInSamples = node->GetOptionalParameter("maxTempMemSizeInSamples", "0"); + + + nodePtr = m_net.Convolution(NULL, NULL, kernelWidth, kernelHeight, outputChannels, + horizontalSubsample, verticalSubsample, zeroPadding, name, maxTempMemSizeInSamples); + } + } + else if (cnNodeType == MaxPoolingNode::TypeName()) + { + if (parameter.size() != 5) + RuntimeError("%ls should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType.c_str()); + + // setup the parameter position of children so we can hook them up later + nodeParamCount = 1; + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + int id = 1; // skip inputValueNode + + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass); + id = 0; // reset counter because the params array starts at zero + size_t windowWidth = ((NDLNode*)params[id++])->GetScalar(); + size_t windowHeight = ((NDLNode*)params[id++])->GetScalar(); + size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); + size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); + + assert (id == 4); + + nodePtr = m_net.MaxPooling(NULL, /*inputWidth,inputHeight, channels,*/windowWidth, windowHeight, + horizontalSubsample, verticalSubsample, name); + } + } + else if (cnNodeType == AveragePoolingNode::TypeName()) + { + if (parameter.size() != 5) + RuntimeError("%ls should have 5 parameters[inputValueNodeName, windowWidth, windowHeight, horizontalSubsample, verticalSubsample].", cnNodeType.c_str()); + + // setup the parameter position of children so we can hook them up later + nodeParamCount = 1; + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + int id = 1; // skip inputValueNode + + // evaluate only scalar parameters + vector params = EvaluateParameters(node, baseName, id, parameter.size() - id, pass); + id = 0; // reset counter because the params array starts at zero + size_t windowWidth = ((NDLNode*)params[id++])->GetScalar(); + size_t windowHeight = ((NDLNode*)params[id++])->GetScalar(); + size_t horizontalSubsample = ((NDLNode*)params[id++])->GetScalar(); + size_t verticalSubsample = ((NDLNode*)params[id++])->GetScalar(); + + assert (id == 4); + + nodePtr = m_net.AveragePooling(NULL, /*inputWidth,inputHeight, channels,*/windowWidth, windowHeight, + horizontalSubsample, verticalSubsample, name); + } + } + else + { + + // setup the variables for node parameter processing + nodeParamCount = parameter.size(); // all parameters are nodes in standard nodes + nodeParamStart = 0; + + if (pass == ndlPassInitial) + { + nodePtr = m_net.CreateComputationNode(node->GetValue(), name); + } + } + + switch (pass) + { + case ndlPassInitial: + node->SetEvalValue(nodePtr); + // evaluate parameters + EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); + break; + case ndlPassResolve: + { + std::vector inputs = EvaluateParameters(node, baseName, nodeParamStart, nodeParamCount, pass); + + if (cnNodeType == RowStackNode::TypeName()) //support variable length inputs + { + std::vector inputNodes; + inputNodes.resize(inputs.size()); + for (int i = 0; i < inputs.size(); i++) + inputNodes[i] = ComputationNodePtr(inputs[i]); + + nodePtr->AttachInputs(inputNodes); + } + else + { + switch (inputs.size()) + { + case 1: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0])); + break; + case 2: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1])); + break; + case 3: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2])); + break; + case 4: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3])); + break; + case 5: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4])); + break; + case 6: + nodePtr->AttachInputs(ComputationNodePtr(inputs[0]), ComputationNodePtr(inputs[1]), ComputationNodePtr(inputs[2]), ComputationNodePtr(inputs[3]), ComputationNodePtr(inputs[4]), ComputationNodePtr(inputs[5])); + break; + default: + if (nodeParamCount > 0) + RuntimeError("Invalid number of parameters name = '%s' call = '%s'\n", node->GetName().c_str(), node->GetValue().c_str()); + break; + } + } + // process common optional parameters (like "tag"); + ProcessOptionalParameters(node); + break; + } + case ndlPassFinal: + break; + } + } + +#ifdef LATER + // EvaluateDotName - Evaluate a dot name and resolve to target node + // node - NDLNode of the script + // nodeParam - NDLNode parameter we are evaluating + // baseName - name of the base node + // pass - which pass through the NDL nodes + // returns: the node that is the evaluated parameter + virtual NDLNode* EvaluateDotName(NDLNode* node, NDLNode* nodeParam, const std::wstring& baseNameP, const NDLPass pass) + + { + if (pass > ndlPassInitial && evaluateNode) + { + std::string name = nodeParam->GetName(); + std::wstring wname = msra::strfun::utf16(name); + if (nodeParam->GetType() == ndlTypeDotParameter) + { + // When we see a variable of the form "A.B" in a macro, we need to resolve it to an actual node, by first constructing it's + // fully-qualified name. There are 2 possibilities: + // 1) "A" was defined locally within the macro. In this case, we must find the fully-qualified name of the node that this macro + // call is being assigned to (eg, "C" in the example "C=Macro(X)"), and concatenate it's name with "A.B" (eg, "C.A.B"). + // 2) "A" was passed in as a parameter to a macro. In this case, we must find the fully-qualified name of the node that + // was passed in as "A", and replace the "A" and "A.B" with this name. + + // Consider the following example: + // NdlBLob=[ + // P=MacroCall1(...) + // C=MacroCall2(P) + // ] + // # MacroDefinition + // MacroCall2(X) + // { + // A=MacroCall3(...) + // D=Times(A.B,X.B)} + // } + // + + // In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately. + // Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B". + // We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName"). + + std::size_t firstDotPos = name.find_first_of("."); + if (firstDotPos == std::string::npos) + { + LogicError("nodeParam of type \"ndlTypeDotParameter\" doesn't have a dot in its name: %s", name.c_str()); + } + + std::string nameBeforeDot = name.substr(0, firstDotPos); + std::string nameAfterDot = name.substr(firstDotPos + 1, name.size() - (firstDotPos + 1)); + + // look up if "nameBeforeDot" was a parameter to the macro. + NDLNode* resolvedParam = nodeParam->GetParentScript()->FindSymbol(nameBeforeDot); + if (resolvedParam != nullptr && resolvedParam->GetType() == ndlTypeMacroCall) + { + // if "nameBeforeDot" was a parameter to the macro, builds it's fully qualified name by + // replacing "nameBeforeDot" with the fully qualified name of the node passed in as the parameter. + NDLScript* parentScript = resolvedParam->GetParentScript(); + baseName = parentScript->GetBaseName(); + std::wstring resolvedParamName = msra::strfun::utf16(resolvedParam->GetName()); + wname = baseName.empty() ? + resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot) : + baseName + L"." + resolvedParamName + L"." + msra::strfun::utf16(nameAfterDot); + } + else if (!baseName.empty()) + { + // else, "nameBeforeDot" wasn't a parameter to the macro, so treat it as a local variable. + wname = baseName + L"." + wname; + } + } + else if (!baseName.empty()) + { + wname = baseName + L"." + wname; + } + + // fully qualified names can be looked up in the model + if (m_net.NodeNameExist(wname)) + { + void* np = (void*)m_net.GetNodeFromName(wname); + nodeParam->SetEvalValue(np); + } + // NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL + // when the actual full node reference that the computational network uses would be L1.BFF.FF.P, so that is what CN sees + // can we do the normal find symbol here to allow abbreviated node references? + + // if we still didn't get a value, throw an error + if (nodeParam->GetEvalValue() == nullptr) + { + LogicError("Dot name could not be resolved '%s': should have a node named '%ls' in computational network\n", nodeParam->GetName().c_str(), name.c_str()); + } + } + return nodeParam; + } +#endif + + // EvaluateParameter - Evaluate a parameter of a call + // node - NDLNode of the script + // nodeParam - NDLNode parameter we are evaluating + // baseName - name of the base node + // pass - which pass through the NDL nodes + // returns: the node that is the evaluated parameter + virtual NDLNode* EvaluateParameter(NDLNode* node, NDLNode* nodeParam, const std::wstring& baseNameP, const NDLPass pass ) + { + // get the parent script that includes the symbol table we are interested in + NDLScript* script = node->GetParentScript(); + wstring baseName = baseNameP; + if (script == NULL) + { + std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); + LogicError("no script for a parameter node in call to %ls\n", name.c_str()); + } + + // evaluate the parameter if we haven't yet, or if we are in the resolve pass (need to set the inputs) + bool evaluateNode = nodeParam->GetEvalValue() == NULL || pass == ndlPassResolve; + switch (nodeParam->GetType()) + { + // if the node is a parameter then look it up in the symbol table + case ndlTypeUndetermined: // an undetermined parameter needs to be looked up again in the symbol table + case ndlTypeParameter: + { + // lookup the parameter + NDLNode* nodeResolve = script->FindSymbol(nodeParam->GetName()); + + // if we have resolved the name, no need to continue evaluation + if (!(pass == ndlPassResolve && nodeResolve && nodeParam->GetEvalValue() == nullptr)) + { + break; + } + if (pass > ndlPassInitial && evaluateNode && nodeResolve) + { + std::string name = nodeResolve->GetName(); + // we need to start from the parent script, because that is the namespace of the parameter being passed in + NDLScript* parentScript = nodeResolve->GetParentScript(); + nodeResolve = parentScript->FindSymbol(name); + + // if we still didn't get a value + if (nodeResolve == nullptr || nodeResolve->GetEvalValue() == nullptr) + { + // check for the fully quantified name in the computation network + // this is needed for MEL processing, since CN nodes names can be used as parameters in MEL + std::wstring wname = msra::strfun::utf16(name); + if (m_net.NodeNameExist(wname)) + { + void* np = (void*)m_net.GetNodeFromName(wname); + // if we don't have a resolve node, it's because the name didn't exist in NDL + if (!nodeResolve) + nodeResolve = nodeParam; + nodeResolve->SetEvalValue(np); + } + else + { + RuntimeError("Parameter name could not be resolved '%s'\n", name.c_str()); + } + } + } + nodeParam = nodeResolve; + break; + } + case ndlTypeFunction: + if (evaluateNode) + Evaluate(nodeParam, baseName, pass); + break; + case ndlTypeMacroCall: + if (evaluateNode) + nodeParam->EvaluateMacro(*this, baseName, pass); + break; + // constants and variables are good as is + case ndlTypeConstant: + case ndlTypeVariable: + break; + // everything else is illegal as a parameter + default: + { + std::wstring name = baseName + L"." + msra::strfun::utf16(node->GetName()); + RuntimeError("Invalid parameter (macro definitions and arrays not allowed), see call to %ls\n", name.c_str()); + } + break; + } + return nodeParam; + } + + // EvaluateParameters - Evaluate the parameters of a call + // node - NDLNode we are evaluating paramters for + // baseName - baseName for the current node + // nodeParamStart - starting parameter that contains a node + // nodeParamCount - ending parameter that contains a node + // pass - NDL pass we are evaluating + // returns: vector of eval pointers, which are ComputationNodePtr for CNEvaluator + virtual std::vector EvaluateParameters(NDLNode* node, const wstring& baseName, int nodeParamStart, int nodeParamCount, const NDLPass pass) + { + std::vector inputs; + std::vector*> parameter = node->GetParameters(); + ConfigArray paramString = node->GetParamString(); + + if (parameter.size() < 1) + { + return inputs; + } + if (nodeParamStart + nodeParamCount > parameter.size()) + throw logic_error("EvaluateParmeters: nodeParamters specified that do not exist"); + size_t numChildren = nodeParamCount; + for (size_t i=0; i < numChildren;++i) + { + int index = i+nodeParamStart; + NDLNode* nodeParam = parameter[index]; + std::wstring paramS = paramString[index]; + + // default base is same as current + std::wstring baseSymbol = baseName; + + NDLNode* nodeResult = EvaluateParameter(node, nodeParam, baseSymbol, pass); + // look for a prefix here and set baseName appropriately + + if (pass == ndlPassResolve) + { + void* np = nodeResult->GetEvalValue(); + assert(np != nullptr); + inputs.push_back((void*)np); + } + else if (pass == ndlPassInitial) // for initial pass we are only interested in resolved nodes (to get constant values) + { + inputs.push_back((void*)nodeResult); + } + // NOTE: in final pass inputs are always NULL + } + + // now return the vector + return inputs; + } + + // ProcessOptionalParameters - Process the optional parameters of a node + virtual void ProcessOptionalParameters(NDLNode* node) + { + vector*> params = node->GetParameters(true); // get all the optional parameters only + ComputationNode* compNode = (ComputationNode*)node->GetEvalValue(); + std::string empty; + + // loop through all the optional parameters processing them as necessary + for (NDLNode* param : params) + { + // make sure it's a "tag" optional parameter, that's all we process currently + if (_stricmp(param->GetName().c_str(), "tag")) + continue; + + std::string value = param->GetValue(); + if (!_stricmp(value.c_str(), "feature")) + { + SetOutputNode(m_net.FeatureNodes(), compNode); + } + else if (!_stricmp(value.c_str(), "label")) + { + SetOutputNode(m_net.LabelNodes(), compNode); + } + else if (!_stricmp(value.c_str(), "criteria")) + { + SetOutputNode(m_net.FinalCriterionNodes(), compNode); + } + else if (!_stricmp(value.c_str(), "multiseq")) + { + SetOutputNode(m_net.NodesReqMultiSeqHandling(), compNode); + } + else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters + { + SetOutputNode(m_net.EvaluationNodes(), compNode); + } + else if (!_stricmp(value.c_str(), "output")) + { + SetOutputNode(m_net.OutputNodes(), compNode); + } + } + + } + + // SetOutputNode - Set the output node, checks to see if it already exists first + // nodeGroup - group vector to add to + // compNode - computation node to add void SetOutputNode(std::vector*>* nodeGroup, ComputationNode* compNode) - { + { for (ComputationNodePtr node : *nodeGroup) - { - if (node == compNode) - return; - } + { + if (node == compNode) + return; + } nodeGroup->push_back(compNode); - } - - // FindSymbol - Search the nodes for a fully quantified symbol - // symbol - name of the symbol fully quantified name with "dots" - // returns - pointer to the matching EvalValue for that node, of NULL if not found - virtual void* FindSymbol(const wstring& symbol) - { - if (m_net.NodeNameExist(symbol)) - return m_net.GetNodeFromName(symbol); - return NULL; - } - - virtual ~SynchronousNodeEvaluator() - { - } - -private: - ComputationNetwork& m_net; - typedef ComputationNode* ComputationNodePtr; - void operator=(const SynchronousNodeEvaluator&); -}; - -// SynchronousExecutionEngine -// TODO JC Refactor eligible methods and members into abstract base class. -template -class SynchronousExecutionEngine : public IExecutionEngine -{ -public: - SynchronousExecutionEngine(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, unsigned long randomSeedOffset=0) - { - m_computationNetwork = new ComputationNetwork(deviceId); - m_computationNetwork->SetRandomSeedOffset(randomSeedOffset); - m_ownNetwork = true; - m_nodeEvaluator = new SynchronousNodeEvaluator(*m_computationNetwork); - } - - SynchronousExecutionEngine(ComputationNetwork* computationNetwork) - { - m_computationNetwork = computationNetwork; - m_ownNetwork = false; - m_nodeEvaluator = new SynchronousNodeEvaluator(*m_computationNetwork); - } - - virtual ~SynchronousExecutionEngine() - { - if (m_ownNetwork) - delete m_computationNetwork; - delete m_nodeEvaluator; - } - - ComputationNetwork& GetComputationNetwork() - { - return *m_computationNetwork; - } - - NDLNodeEvaluator& GetNodeEvaluator() - { - return *m_nodeEvaluator; - } - -private: - bool m_ownNetwork; - ComputationNetwork* m_computationNetwork; - SynchronousNodeEvaluator* m_nodeEvaluator; -protected: - // Copy constructor, should never be called. - SynchronousExecutionEngine(const SynchronousExecutionEngine& /*deepCopyFrom*/) - { - throw std::logic_error("'SynchronousExecutionEngine(const SynchronousExecutionEngine& deepCopyFrom)' should never be called."); - } - - // Assignment operator, should never be called. - SynchronousExecutionEngine& operator=(const SynchronousExecutionEngine& /*deepCopyFrom*/) - { - throw std::logic_error("'SynchronousExecutionEngine& operator=(const SynchronousExecutionEngine& deepCopyFrom)' should never be called."); - } -}; - -template class SynchronousExecutionEngine; -template class SynchronousExecutionEngine; - + } + + // FindSymbol - Search the nodes for a fully quantified symbol + // symbol - name of the symbol fully quantified name with "dots" + // returns - pointer to the matching EvalValue for that node, of NULL if not found + virtual void* FindSymbol(const wstring& symbol) + { + if (m_net.NodeNameExist(symbol)) + return m_net.GetNodeFromName(symbol); + return NULL; + } + + virtual ~SynchronousNodeEvaluator() + { + } + +private: + ComputationNetwork& m_net; + typedef ComputationNode* ComputationNodePtr; + void operator=(const SynchronousNodeEvaluator&); +}; + +// SynchronousExecutionEngine +// TODO JC Refactor eligible methods and members into abstract base class. +template +class SynchronousExecutionEngine : public IExecutionEngine +{ +public: + SynchronousExecutionEngine(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, unsigned long randomSeedOffset=0) + { + m_computationNetwork = new ComputationNetwork(deviceId); + m_computationNetwork->SetRandomSeedOffset(randomSeedOffset); + m_ownNetwork = true; + m_nodeEvaluator = new SynchronousNodeEvaluator(*m_computationNetwork); + } + + SynchronousExecutionEngine(ComputationNetwork* computationNetwork) + { + m_computationNetwork = computationNetwork; + m_ownNetwork = false; + m_nodeEvaluator = new SynchronousNodeEvaluator(*m_computationNetwork); + } + + virtual ~SynchronousExecutionEngine() + { + if (m_ownNetwork) + delete m_computationNetwork; + delete m_nodeEvaluator; + } + + ComputationNetwork& GetComputationNetwork() + { + return *m_computationNetwork; + } + + NDLNodeEvaluator& GetNodeEvaluator() + { + return *m_nodeEvaluator; + } + +private: + bool m_ownNetwork; + ComputationNetwork* m_computationNetwork; + SynchronousNodeEvaluator* m_nodeEvaluator; +protected: + // Copy constructor, should never be called. + SynchronousExecutionEngine(const SynchronousExecutionEngine& /*deepCopyFrom*/) + { + throw std::logic_error("'SynchronousExecutionEngine(const SynchronousExecutionEngine& deepCopyFrom)' should never be called."); + } + + // Assignment operator, should never be called. + SynchronousExecutionEngine& operator=(const SynchronousExecutionEngine& /*deepCopyFrom*/) + { + throw std::logic_error("'SynchronousExecutionEngine& operator=(const SynchronousExecutionEngine& deepCopyFrom)' should never be called."); + } +}; + +template class SynchronousExecutionEngine; +template class SynchronousExecutionEngine; + }}} \ No newline at end of file diff --git a/MachineLearning/CNTK/TrainingCriterionNodes.h b/MachineLearning/CNTK/TrainingCriterionNodes.h index 8735fd6df088..8c123082e204 100644 --- a/MachineLearning/CNTK/TrainingCriterionNodes.h +++ b/MachineLearning/CNTK/TrainingCriterionNodes.h @@ -1410,7 +1410,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t i = t % nS; if ((*m_minibatchPackingFlag)[j] & MinibatchPackingFlag::NoLabel) { - if ((*m_sentenceSeg)(i,j) == NO_LABELS) + if ((int)(*m_sentenceSeg)(i,j) & NO_LABEL) { matrixToBeMasked.ColumnSlice(t,1).SetValue(0); diff --git a/MachineLearning/CNTKEval/EvalReader.h b/MachineLearning/CNTKEval/EvalReader.h index 287ec8a75bf8..02f16e7cd5c4 100644 --- a/MachineLearning/CNTKEval/EvalReader.h +++ b/MachineLearning/CNTKEval/EvalReader.h @@ -177,17 +177,17 @@ class EvalReader : public IDataReader assert(m_switchFrame.size() == 1); sentenceBegin.Resize(1, m_mbSize); minibatchPackingFlag.resize(m_mbSize); - sentenceBegin.SetValue((ElemType)SENTENCE_MIDDLE); + sentenceBegin.SetValue((ElemType)SEQUENCE_MIDDLE); std::fill(minibatchPackingFlag.begin(), minibatchPackingFlag.end(), MinibatchPackingFlag::None); if (m_switchFrame[0] < m_mbSize) /* there is a switch frame within the minibatch*/ { - sentenceBegin.SetValue(0, m_switchFrame[0], (ElemType)SENTENCE_BEGIN); - minibatchPackingFlag[m_switchFrame[0]] = MinibatchPackingFlag::UtteranceStart; + sentenceBegin.SetValue(0, m_switchFrame[0], (ElemType)SEQUENCE_START); + minibatchPackingFlag[m_switchFrame[0]] = MinibatchPackingFlag::SequenceStart; if (m_switchFrame[0] > 0) { - sentenceBegin.SetValue(0, m_switchFrame[0] - 1, (ElemType)SENTENCE_END); - minibatchPackingFlag[m_switchFrame[0] - 1] = MinibatchPackingFlag::UtteranceEnd; + sentenceBegin.SetValue(0, m_switchFrame[0] - 1, (ElemType)SEQUENCE_END); + minibatchPackingFlag[m_switchFrame[0] - 1] = MinibatchPackingFlag::SequenceEnd; } } } diff --git a/Math/MathPerformanceTests/MathPerformanceTests.cpp b/Math/MathPerformanceTests/MathPerformanceTests.cpp index 53269ede4e6b..8d7b480ca45c 100644 --- a/Math/MathPerformanceTests/MathPerformanceTests.cpp +++ b/Math/MathPerformanceTests/MathPerformanceTests.cpp @@ -30,9 +30,9 @@ void SetToInitStateValueForResetSeg(const Matrix& sentenceBegin, Matrix colPos(sentenceBegin.GetDeviceId()); colPos.SetValue(sentenceBegin); /// -1 0 1 - colPos.InplaceTruncateBottom(SENTENCE_BEGIN); + colPos.InplaceTruncateBottom(SEQUENCE_START); Matrix::Scale((ElemType)-1.0, colPos); - colPos += SENTENCE_MIDDLE; + colPos += SEQUENCE_MIDDLE; colSeg.SetDiagonalValue(colPos); Matrix ones(sentenceBegin.GetDeviceId()); ones.Resize(nStateRow, nStream); @@ -63,7 +63,7 @@ void rnnEvaluateThisNodeSRP(Matrix& functionValues, size_t mNbr, Matri colSeg.Resize(mNbr, mNbr); colSeg.SetValue(0); colSegPastActivity.SetValue(colBegin); - colSegPastActivity.InplaceTruncateBottom(SENTENCE_BEGIN); + colSegPastActivity.InplaceTruncateBottom(SEQUENCE_START); colSeg.SetDiagonalValue(colSegPastActivity); Matrix::Multiply(inp, false, colSeg, false, out); ElemType initStateValue = (ElemType) 0.1;