diff --git a/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp b/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp index 34eeaac121e2..e8682b4cde6e 100644 --- a/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp +++ b/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp @@ -39,13 +39,13 @@ using namespace std; L"Parameter = LearnableParameter // deprecated \n" L"ParameterTensor(dims, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n" // ^^ already works; vv untested - L"Input(rows, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change - L"SparseInput(rows, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" + L"Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change + L"SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n" L"ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]\n" L"SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]\n" L"Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, needGradient = false, init = 'fixedValue', value = val) \n" - L"PastValue(rows, cols, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input /*plus the function args*/ ]\n" - L"FutureValue(rows, cols, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input /*plus the function args*/ ]\n" + L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n" + L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n" // TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation. L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n" L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n" diff --git a/Source/CNTK/SimpleNetworkBuilder.cpp b/Source/CNTK/SimpleNetworkBuilder.cpp index 5a9ddc2685d9..90892f8972a9 100644 --- a/Source/CNTK/SimpleNetworkBuilder.cpp +++ b/Source/CNTK/SimpleNetworkBuilder.cpp @@ -30,29 +30,29 @@ namespace Microsoft { namespace MSR { namespace CNTK { case SIMPLENET: net = BuildSimpleDNN(); break; case SIMPLERNN: - net = BuildSimpleRNN(1); break; + net = BuildSimpleRNN(); break; case LSTM: - net = BuildLSTMNetworkFromDescription(1); break; + net = BuildLSTMNetworkFromDescription(); break; case CLASSLSTM: - net = BuildCLASSLSTMNetworkFromDescription(1); break; + net = BuildCLASSLSTMNetworkFromDescription(); break; case NCELSTM: - net = BuildNCELSTMNetworkFromDescription(1); break; + net = BuildNCELSTMNetworkFromDescription(); break; case CLASSLM: - net = BuildClassEntropyNetwork(1); break; + net = BuildClassEntropyNetwork(); break; case LBLM: - net = BuildLogBilinearNetworkFromDescription(1); break; + net = BuildLogBilinearNetworkFromDescription(); break; case NPLM: - net = BuildNeuralProbNetworkFromDescription(1); break; + net = BuildNeuralProbNetworkFromDescription(); break; case CLSTM: - net = BuildConditionalLSTMNetworkFromDescription(1); break; + net = BuildConditionalLSTMNetworkFromDescription(); break; case RCRF: - net = BuildSeqTrnLSTMNetworkFromDescription(1); break; + net = BuildSeqTrnLSTMNetworkFromDescription(); break; case LSTMENCODER: - net = BuildLSTMEncoderNetworkFromDescription(1); break; + net = BuildLSTMEncoderNetworkFromDescription(); break; case UNIDIRECTIONALLSTM: - net = BuildUnidirectionalLSTMNetworksFromDescription(1); break; + net = BuildUnidirectionalLSTMNetworksFromDescription(); break; case BIDIRECTIONALLSTM: - net = BuildBiDirectionalLSTMNetworksFromDescription(1); break; + net = BuildBiDirectionalLSTMNetworksFromDescription(); break; default: LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int)m_rnnType); } @@ -75,11 +75,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (m_rnnType) { case ALIGNMENTSIMILARITYGENERATOR: - net = BuildAlignmentDecoderNetworkFromDescription(encoderNet, 1); + net = BuildAlignmentDecoderNetworkFromDescription(encoderNet); net->CompileNetwork(); return net; case ALIGNMENTSIMILARITYGFORWARDDECODER: - net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet, 1); + net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet); net->CompileNetwork(); return net; } @@ -95,12 +95,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { { unsigned long randomSeed = 1; - size_t mbSize = 3; //this is not the actual minibatch size. only used in the validataion process - size_t numHiddenLayers = m_layerSizes.size() - 2; ComputationNodePtr input, w, b, output, label, prior, scaledLogLikelihood; - input = builder.Input(m_layerSizes[0], mbSize, L"features"); + input = builder.CreateInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -114,9 +112,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numHiddenLayers > 0) { - w = builder.Parameter(m_layerSizes[1], m_layerSizes[0], L"W0"); + w = builder.CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[0]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - b = builder.Parameter(m_layerSizes[1], 1, L"B0"); + b = builder.CreateLearnableParameter(L"B0", m_layerSizes[1], 1); output = ApplyNonlinearFunction(builder.Plus(builder.Times(w, input, L"W0*features"), b, L"W0*features+B0"), 0, L"H1"); if (m_addDropoutNodes) @@ -133,9 +131,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { wstring nameOfPlus = nameOfTimes + L"+" + nameOfB; wstring nameOfH = msra::strfun::wstrprintf(L"H%d", i + 1); - w = builder.Parameter(m_layerSizes[i + 1], m_layerSizes[i], nameOfW); + w = builder.CreateLearnableParameter(nameOfW, m_layerSizes[i + 1], m_layerSizes[i]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - b = builder.Parameter(m_layerSizes[i + 1], 1, nameOfB); + b = builder.CreateLearnableParameter(nameOfB, m_layerSizes[i + 1], 1); output = ApplyNonlinearFunction(builder.Plus(builder.Times(w, input, nameOfTimes), b, nameOfPlus), i, nameOfH); if (m_addDropoutNodes) @@ -151,13 +149,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { wstring nameOfTimes = nameOfW + L"*" + nameOfPrevH; wstring nameOfPlus = nameOfTimes + L"+" + nameOfB; - w = builder.Parameter(m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers], nameOfW); + w = builder.CreateLearnableParameter(nameOfW, m_layerSizes[numHiddenLayers + 1], m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - b = builder.Parameter(m_layerSizes[numHiddenLayers + 1], 1, nameOfB); + b = builder.CreateLearnableParameter(nameOfB, m_layerSizes[numHiddenLayers + 1], 1); output = builder.Plus(builder.Times(w, input, nameOfTimes), b, nameOfPlus); m_net->RenameNode(output, L"HLast"); - label = builder.Input(m_layerSizes[numHiddenLayers + 1], mbSize, L"labels"); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]); AddTrainAndEvalCriterionNodes(output, label); @@ -188,7 +186,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // Note: while ComputationNode and CompuationNetwork are (supposed to be) independent of ElemType, it is OK to keep this class dependent. template - ComputationNetworkPtr SimpleNetworkBuilder::BuildSimpleRNN(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildSimpleRNN() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -201,7 +199,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr input, w, b, u, pastValue, output, label, prior; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -225,7 +223,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1); /// unless there is a good algorithm to detect loops, use this explicit setup output = ApplyNonlinearFunction( builder.Plus( @@ -255,7 +253,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], 1); /// unless there is a good algorithm to detect loops, use this explicit setup output = ApplyNonlinearFunction( builder.Plus( @@ -279,7 +277,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /*m_net->MatrixL2Reg(w , L"L1w");*/ - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1]); AddTrainAndEvalCriterionNodes(input, label, w, L"criterion", L"eval"); output = builder.Times(w, input, L"outputs"); @@ -294,7 +292,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildClassEntropyNetwork(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildClassEntropyNetwork() { ComputationNetworkBuilder builder(*m_net); @@ -312,7 +310,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (m_vocabSize != m_layerSizes[numHiddenLayers + 1]) RuntimeError("BuildClassEntropyNetwork : vocabulary size should be the same as the output layer size"); - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -335,7 +333,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(L"W0", m_layerSizes[1], m_layerSizes[1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[1], 1); /// unless there is a good algorithm to detect loops, use this explicit setup output = ApplyNonlinearFunction( builder.Plus( @@ -364,7 +362,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i+1], 1); /// unless there is a good algorithm to detect loops, use this explicit setup output = ApplyNonlinearFunction( builder.Plus( @@ -391,7 +389,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 4, mbSize); + label = builder.CreateInputNode(L"labels", 4); clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); @@ -412,7 +410,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildConditionalLSTMNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildConditionalLSTMNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -428,7 +426,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr clslogpostprob; ComputationNodePtr clsweight; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -461,13 +459,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numHiddenLayers > 0) { // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); /// previously used function. now uses LSTMNode which is correct and fast input = output; for (int i = 1 + offset; i < numHiddenLayers; i++) { // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); if (m_addDropoutNodes) input = builder.Dropout(output); @@ -477,7 +475,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } /// serve as a global bias term - gt = builder.CreateInputNode(L"binaryFeature", m_auxFeatDim, 1); + gt = builder.CreateInputNode(L"binaryFeature", m_auxFeatDim); m_net->FeatureNodes().push_back(gt); e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"AuxTrans%d", 0), m_layerSizes[numHiddenLayers], m_auxFeatDim); @@ -493,7 +491,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 4, mbSize); + label = builder.CreateInputNode(L"labels", 4); clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); @@ -518,7 +516,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { the aligment node takes a variable length input and relates each element to a variable length output */ template - ComputationNetworkPtr SimpleNetworkBuilder::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet) { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -535,7 +533,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr clsweight; ComputationNodePtr columnStride, rowStride; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_lookupTableOrder > 0) @@ -577,9 +575,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i], 1); // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - // output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + // output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); /// alignment node to get weights from source to target /// this aligment node computes weights of the current hidden state after special encoder ending symbol to all @@ -607,7 +605,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (; i < numHiddenLayers; i++) { //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); if (m_addDropoutNodes) input = builder.Dropout(output); @@ -625,7 +623,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 4, mbSize); + label = builder.CreateInputNode(L"labels", 4); clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); @@ -645,7 +643,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet) { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -662,7 +660,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr clsweight; ComputationNodePtr columnStride, rowStride; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_lookupTableOrder > 0) @@ -704,9 +702,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t)m_layerSizes[i], 1); // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - // output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + // output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); /// alignment node to get weights from source to target /// this aligment node computes weights of the current hidden state after special encoder ending symbol to all @@ -734,7 +732,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (; i < numHiddenLayers; i++) { //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); if (m_addDropoutNodes) input = builder.Dropout(output); @@ -752,7 +750,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 4, mbSize); + label = builder.CreateInputNode(L"labels", 4); clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); @@ -775,7 +773,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildLogBilinearNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildLogBilinearNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -793,8 +791,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr ot=nullptr, it=nullptr, ft=nullptr, gt=nullptr, ct=nullptr, ht=nullptr; ComputationNodePtr pastValueXI, pastValueXII, pastValueXIII, pastValueXIV; -// input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); - input = builder.CreateInputNode(L"features", m_layerSizes[0], mbSize); +// input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); + input = builder.CreateInputNode(L"features", m_layerSizes[0]); featin = input; m_net->FeatureNodes().push_back(input); @@ -827,7 +825,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { while (ik <= m_maOrder) { pastValueXI = - builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], mbSize, ik, msra::strfun::wstrprintf(L"pastValue%d", ik)); + builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], ik, msra::strfun::wstrprintf(L"pastValue%d", ik)); pastValueXI->SetParameterUpdateRequired(false); pastValueXI->AttachInputs(input); //TODO: to figure out sparse matrix size @@ -855,7 +853,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"R%d", i+1), m_layerSizes[i+1], m_layerSizes[i+1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], 1); output = builder.Plus(builder.Times(w, pastValue), input); pastValue->AttachInputs(output); @@ -875,7 +873,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers+1]); AddTrainAndEvalCriterionNodes(input, label, w); output = builder.Times(w, input, L"outputs"); @@ -892,7 +890,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildNeuralProbNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildNeuralProbNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -910,7 +908,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr ot = nullptr, it = nullptr, ft = nullptr, gt = nullptr, ct = nullptr, ht = nullptr; ComputationNodePtr pastValueXI, pastValueXII, pastValueXIII, pastValueXIV; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -927,10 +925,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { { bi = builder.CreateLearnableParameter(L"bi0", m_layerSizes[1], 1); - pastValueXI = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], mbSize, 1); - pastValueXII = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], mbSize, 2); - pastValueXIII = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], mbSize, 3); - pastValueXIV = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], mbSize, 4); + pastValueXI = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], 1); + pastValueXII = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], 2); + pastValueXIII = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], 3); + pastValueXIV = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[0], 4); pastValueXI->AttachInputs(input); pastValueXII->AttachInputs(input); pastValueXIII->AttachInputs(input); @@ -996,7 +994,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", i), m_layerSizes[i+1], m_layerSizes[i+1]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); std::list recurrent_loop; - pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], mbSize, 1); + pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, m_layerSizes[i+1], 1); output = SimpleNetworkBuilder::ApplyNonlinearFunction(builder.Plus(builder.Times(u, input), builder.Times(w, pastValue)), i); pastValue->AttachInputs(output); recur_idx++; @@ -1017,7 +1015,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { w = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"W%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); // b = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"B%d", numHiddenLayers), m_layerSizes[numHiddenLayers+1], 1); - label = builder.CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1], mbSize); + label = builder.CreateSparseInputNode(L"labels", m_layerSizes[numHiddenLayers+1]); AddTrainAndEvalCriterionNodes(input, label, w); output = builder.Times(w, input); @@ -1034,7 +1032,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildDirectConnect(unsigned long &randomSeed, size_t /*mbSize*/, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode) + shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildDirectConnect(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode) { ComputationNetworkBuilder builder(*m_net); @@ -1065,7 +1063,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { template - shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs) + shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildLSTMComponent(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr inputObs) { ComputationNetworkBuilder builder(*m_net); @@ -1121,17 +1119,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t layer1 = outputDim; - pastValueHI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHO = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); + pastValueHI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHO = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); if(m_constInputGateValue) { - //it = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim, mbSize); + //it = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim); //it->SetParameterUpdateRequired(false); //it->Value().SetValue(m_constInputGateValue); it = nullptr; @@ -1241,7 +1239,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildSeqTrnLSTMNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -1261,7 +1259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr }; ComputationNodePtr trans; - input = builder.CreateInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -1297,7 +1295,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i+1) { - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i] * (offset ? m_lookupTableOrder : 1), m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i] * (offset ? m_lookupTableOrder : 1), m_layerSizes[i + 1], input); input = output; recur_idx++; @@ -1326,7 +1324,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { trans->Value().SetValue((ElemType)1.0 / m_layerSizes[numHiddenLayers + 1]); // m_net->InitLearnableParameters(trans, m_uniformInit, randomSeed++, m_initValueScale); trans->SetParameterUpdateRequired(true); - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]); AddTrainAndEvalCriterionNodes(output, label, nullptr, L"CRFTrainCriterion", L"CRFEvalCriterion", nullptr, trans); input = output; @@ -1340,7 +1338,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildCLASSLSTMNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildCLASSLSTMNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -1356,7 +1354,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr clslogpostprob; ComputationNodePtr clsweight; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -1389,13 +1387,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numHiddenLayers > 0) { // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); /// previously used function. now uses LSTMNode which is correct and fast input = output; for (int i = 1 + offset; i InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 4, mbSize); + label = builder.CreateInputNode(L"labels", 4); clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]); m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale); @@ -1482,7 +1480,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #endif template - ComputationNetworkPtr SimpleNetworkBuilder::BuildLSTMNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildLSTMNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -1502,9 +1500,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr }; if (m_sparse_input) - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); else - input = builder.CreateInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); @@ -1542,7 +1540,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); /// previously used function. now uses LSTMNode which is correct and fast input = output; outputFromEachLayer[offset + 1] = input; @@ -1553,7 +1551,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); // previously used function, now uses LSTMnode, which is fast and correct recur_idx++; @@ -1580,7 +1578,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { #ifdef DEBUG_DECODER w->Value().SetValue((ElemType)0.01); #endif - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]); AddTrainAndEvalCriterionNodes(input, label, w); output = builder.Times(w, input, L"outputs"); @@ -1615,7 +1613,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015 */ template - ComputationNetworkPtr SimpleNetworkBuilder::BuildLSTMEncoderNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildLSTMEncoderNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); @@ -1631,9 +1629,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior; if (m_sparse_input) - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); else - input = builder.CreateInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); @@ -1669,14 +1667,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (numHiddenLayers > 0) { //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); input = output; i++; for (; i - ComputationNetworkPtr SimpleNetworkBuilder::BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildUnidirectionalLSTMNetworksFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -1726,11 +1724,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { map featDim; assert(m_streamSizes.size() > 0); - inputbackward = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0], mbSize); + inputbackward = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]); m_net->FeatureNodes().push_back(inputbackward); featDim[L"featurepastValueedTarget"] = m_streamSizes[0]; - inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1], mbSize); + inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]); m_net->FeatureNodes().push_back(inputletter); featDim[L"ltrForward"] = m_streamSizes[1]; @@ -1777,7 +1775,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { switch (m_rnnType){ case UNIDIRECTIONALLSTM: //output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx, dims, m_layerSizes[layerIdx + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input); break; default: LogicError("This is for unidorectional LSTM model. Check rnntype to see whether it is UNIDIRECTIONALLSTMWITHPASTPREDICTION or TRANSDUCER"); @@ -1797,7 +1795,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { input = output; /// here uses "labels", so only one label from multiple stream inputs are used. - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]); AddTrainAndEvalCriterionNodes(input, label, w); @@ -1819,7 +1817,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t mbSize, size_t iLayer, const vector& inputDim, size_t outputDim, const vector& inputObs, bool inputWeightSparse) + shared_ptr> /*ComputationNodePtr*/ SimpleNetworkBuilder::BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t iLayer, const vector& inputDim, size_t outputDim, const vector& inputObs, bool inputWeightSparse) { ComputationNetworkBuilder builder(*m_net); @@ -1896,17 +1894,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t layer1 = outputDim; - pastValueHI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHO = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueHC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); - pastValueCC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, mbSize, 1); + pastValueHI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHO = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueHC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCI = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCF = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); + pastValueCC = builder.PastValue(NULL, m_defaultHiddenActivity, layer1, 1); if (m_constInputGateValue) { - //it = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim, mbSize); + //it = builder.CreateLearnableParameter(msra::strfun::wstrprintf (L"CONSTIT%d", iLayer), outputDim); //it->SetParameterUpdateRequired(false); //it->Value().SetValue(m_constInputGateValue); it = nullptr; @@ -2026,7 +2024,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015 */ template - ComputationNetworkPtr SimpleNetworkBuilder::BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildBiDirectionalLSTMNetworksFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -2049,10 +2047,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t ltrSrcIdx = 1; /// create projections to use pastValue predictions - inputprediction = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0], mbSize); + inputprediction = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]); m_net->FeatureNodes().push_back(inputprediction); - inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1], mbSize); + inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]); m_net->FeatureNodes().push_back(inputletter); featDim[L"ltrForward"] = m_streamSizes[1]; @@ -2100,12 +2098,12 @@ namespace Microsoft { namespace MSR { namespace CNTK { { /// forward direction //forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); - forwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); + forwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); forwardInput = forwardOutput; backwardInput = (ComputationNodePtr)builder.TimeReverse(ltrSource); //backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput); - backwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput); + backwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput); backwardInput = backwardOutput; layerIdx++; @@ -2113,11 +2111,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { while (layerIdx < numHiddenLayers - 1) { //forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput); - forwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput); + forwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput); forwardInput = forwardOutput; //backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput); - backwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput); + backwardOutput = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput); backwardInput = backwardOutput; layerIdx++; @@ -2137,7 +2135,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { forwardInput = (ComputationNodePtr)builder.Parallel(streams[0], streams[1], L"Parallel1"); // output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput); input = output; layerIdx++; @@ -2150,7 +2148,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { input = output; /// here uses "labels", so only one label from multiple stream inputs are used. - label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1], mbSize); + label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]); AddTrainAndEvalCriterionNodes(input, label); @@ -2174,7 +2172,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } template - ComputationNetworkPtr SimpleNetworkBuilder::BuildNCELSTMNetworkFromDescription(size_t mbSize) + ComputationNetworkPtr SimpleNetworkBuilder::BuildNCELSTMNetworkFromDescription() { ComputationNetworkBuilder builder(*m_net); if (m_net->GetTotalNumberOfNodes() < 1) //not built yet @@ -2190,7 +2188,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr bias; ComputationNodePtr outputFromEachLayer[MAX_DEPTH] = { nullptr }; - input = builder.CreateSparseInputNode(L"features", m_layerSizes[0], mbSize); + input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]); m_net->FeatureNodes().push_back(input); if (m_applyMeanVarNorm) @@ -2222,7 +2220,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { int offset = m_lookupTableOrder > 0 ? 1 : 0; if (numHiddenLayers > 0) { - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input); input = output; outputFromEachLayer[offset + 1] = input; @@ -2230,7 +2228,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { { if (m_recurrentLayers.size() > 0 && m_recurrentLayers[recur_idx] == i) { - output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, mbSize, i, m_layerSizes[i], m_layerSizes[i + 1], input); + output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input); recur_idx++; } @@ -2254,7 +2252,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { for (size_t i = offset; i < m_layerSizes.size(); i++) { /// add direct connect from each layers' output to the layer before the output layer - output = BuildDirectConnect(randomSeed, mbSize, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input); + output = BuildDirectConnect(randomSeed, i, (i > 1) ? m_layerSizes[i] : ((offset == 0) ? m_layerSizes[i] : m_layerSizes[i] * m_lookupTableOrder), m_layerSizes[numHiddenLayers], outputFromEachLayer[i], input); if (output != nullptr) input = output; } @@ -2266,7 +2264,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); /// the label is a dense matrix. each element is the word index - label = builder.CreateInputNode(L"labels", 2 * (this->nce_noises + 1), mbSize); + label = builder.CreateInputNode(L"labels", 2 * (this->nce_noises + 1)); bias = builder.CreateLearnableParameter(L"BiasVector", 1, m_layerSizes[m_layerSizes.size() - 1]); bias->Value().SetValue((ElemType)-std::log(m_layerSizes[m_layerSizes.size() - 1])); @@ -2301,7 +2299,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr input, w, b, output, label, prior, scaledLogLikelihood; shared_ptr> pcNodePtr; - size_t mbSize = 3; //this is not the actual minibatch size. only used in the validataion process File fstream(dbnModelFileName, FileOptions::fileOptionsBinary | FileOptions::fileOptionsRead); @@ -2336,7 +2333,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { Matrix A = ReadMatrixFromDbnFile(fstream, std::string("b")); if (i == 0) { - input = builder.Input(wts.GetNumCols(), mbSize, L"features"); + input = builder.CreateInputNode(L"features", wts.GetNumCols()); m_net->FeatureNodes().push_back(input); size_t frameDim = globalMean.GetNumRows(); @@ -2381,10 +2378,10 @@ namespace Microsoft { namespace MSR { namespace CNTK { wstring nameOfPlus = nameOfTimes + L"+" + nameOfB; wstring nameOfH = msra::strfun::wstrprintf(L"H%d", i + 1); - w = builder.Parameter(wts.GetNumRows(), wts.GetNumCols(), nameOfW); + w = builder.CreateLearnableParameter(nameOfW, wts.GetNumRows(), wts.GetNumCols()); w->Value().SetValue(wts); - b = builder.Parameter(bias.GetNumRows(), 1, nameOfB); + b = builder.CreateLearnableParameter(nameOfB, bias.GetNumRows(), 1); b->Value().SetValue(bias); if (layerType == "perceptron") @@ -2412,7 +2409,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { RuntimeError("Error reading DBN file - did not find expected tag ENET\n"); //size_t outputLayerSize = m_layerSizes[m_layerSizes.size()-1]; - label = builder.Input(m_outputLayerSize, mbSize, L"labels"); + label = builder.CreateInputNode(L"labels", m_outputLayerSize); if (layerType == "perceptron") // complete network { @@ -2446,9 +2443,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { wstring nameOfPlus = nameOfTimes + L"+" + nameOfB; wstring nameOfH = msra::strfun::wstrprintf(L"H%d", i + 1); - w = builder.Parameter(outputLayerSize, penultimateSize, nameOfW); + w = builder.CreateLearnableParameter(nameOfW, outputLayerSize, penultimateSize); m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale); - b = builder.Parameter(outputLayerSize, 1, nameOfB); + b = builder.CreateLearnableParameter(nameOfB, outputLayerSize, 1); output = builder.Plus(builder.Times(w, input, nameOfTimes), b, nameOfPlus); m_net->RenameNode(output, L"HLast"); diff --git a/Source/CNTK/SimpleNetworkBuilder.h b/Source/CNTK/SimpleNetworkBuilder.h index 9f9f5fa22f26..14b54d975a55 100644 --- a/Source/CNTK/SimpleNetworkBuilder.h +++ b/Source/CNTK/SimpleNetworkBuilder.h @@ -256,41 +256,41 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNetworkPtr BuildSimpleDNN(); - ComputationNetworkPtr BuildSimpleRNN(size_t mbSize = 1); + ComputationNetworkPtr BuildSimpleRNN(); - ComputationNetworkPtr BuildClassEntropyNetwork(size_t mbSize = 1); + ComputationNetworkPtr BuildClassEntropyNetwork(); - ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input); + ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input); ComputationNodePtr BuildLSTMNodeComponent(ULONG &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input); - ComputationNodePtr BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t mbSize, size_t iLayer, const vector& inputDim, size_t outputDim, const vector& inputObs, bool inputWeightSparse = false); + ComputationNodePtr BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t iLayer, const vector& inputDim, size_t outputDim, const vector& inputObs, bool inputWeightSparse = false); - ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode); + ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode); - ComputationNetworkPtr BuildLogBilinearNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildLogBilinearNetworkFromDescription(); - ComputationNetworkPtr BuildNeuralProbNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildNeuralProbNetworkFromDescription(); - ComputationNetworkPtr BuildLSTMNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildLSTMNetworkFromDescription(); - ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(); - ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription(); - ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(); - ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(); - ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription(); - ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription(); - ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(size_t mbSize = 1); + ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(); - ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1); + ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet); - ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1); + ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet); //layer is 0 based ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L""); diff --git a/Source/CNTK/SynchronousExecutionEngine.cpp b/Source/CNTK/SynchronousExecutionEngine.cpp index 30622087a895..06e1c7a4f5b7 100644 --- a/Source/CNTK/SynchronousExecutionEngine.cpp +++ b/Source/CNTK/SynchronousExecutionEngine.cpp @@ -15,9 +15,12 @@ #include "ConvolutionalNodes.h" #include "NonlinearityNodes.h" #include "ReshapingNodes.h" +#include "DataTensor.h" namespace Microsoft { namespace MSR { namespace CNTK { + using namespace std; + template void SynchronousNodeEvaluator::Evaluate(NDLNode* node, const wstring& baseName, const NDLPass pass) { @@ -58,48 +61,34 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } - if (OperationNameOf(InputValue) == cnNodeType) + if (OperationNameOf(InputValue) == cnNodeType || OperationNameOf(SparseInputValue) == cnNodeType) { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); + bool isSparse = (OperationNameOf(SparseInputValue) == cnNodeType); + if (parameter.size() < 1) + RuntimeError("%ls should have 1 or more parameters (tensor dimensions, e.g. [rows, cols]).", cnNodeType.c_str()); if (pass == ndlPassInitial) { // evaluate only scalar parameters vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; + size_t i = 0; + auto tensorShape = ProcessTensorShapeParameters(node, params, i, /*isImage=*/false, cnNodeType); // first look for this node already existing in the network + // BUGBUG: How does this set the dimensions then? if (m_net->NodeNameExists(name)) nodePtr = dynamic_pointer_cast>(m_net->GetNodeFromName(name)); + else if (isSparse) + nodePtr = builder.CreateSparseInputNode(name, tensorShape); else - nodePtr = builder.CreateInputNode(name, rows, cols); + nodePtr = builder.CreateInputNode (name, tensorShape); } } - else if (OperationNameOf(SparseInputValue) == cnNodeType) + else if (cnNodeType == L"ImageInput" || cnNodeType == L"SparseImageInput") { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) - { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - - // first look for this node already existing in the network - if (m_net->NodeNameExists(name)) - nodePtr = dynamic_pointer_cast>(m_net->GetNodeFromName(name)); - else - nodePtr = builder.CreateSparseInputNode(name, rows, cols); - } - } - else if (cnNodeType == L"ImageInput") - { - if (parameter.size() < 3 || parameter.size() > 4) - RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); + bool isSparse = (cnNodeType == L"SparseImageInput"); + if (parameter.size() < 3 || parameter.size() > 4) // we allow 4 for legacy (numImages, was ignored) + RuntimeError("%ls should have 3 parameters[imageWidth, imageHeight, imageChannels].", cnNodeType.c_str()); if (pass == ndlPassInitial) { @@ -108,46 +97,37 @@ namespace Microsoft { namespace MSR { namespace CNTK { size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); - size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; // BUGBUG: This comes through MBLayout, and should be forbidden. ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC")); - nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages); + if (isSparse) + nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind)); + else + nodePtr = builder.CreateInputNode (name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind)); } } - else if (cnNodeType == L"SparseImageInput") + else if (OperationNameOf(LearnableParameter) == cnNodeType || cnNodeType == L"ImageParameter") { - if (parameter.size() < 3 || parameter.size() > 4) - RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str()); - - if (pass == ndlPassInitial) + bool isImage = (cnNodeType == L"ImageParameter"); + if (!isImage) { - // evaluate only scalar parameters - vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t imageWidth = ((NDLNode*)params[0])->GetScalar(); - size_t imageHeight = ((NDLNode*)params[1])->GetScalar(); - size_t imageChannels = ((NDLNode*)params[2])->GetScalar(); - size_t numImages = parameter.size() > 3 ? ((NDLNode*)params[3])->GetScalar() : 1; - ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC")); - - nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages); + if (parameter.size() < 1) + RuntimeError("%ls should have 1 or more parameters (tensor dimensions, e.g. [rows, cols]) plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); + } + else + { + if (parameter.size() < 3) + RuntimeError("%ls should have 3 parameters [imageWidth, imageHeight, imageChannels] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); } - } - else if (OperationNameOf(LearnableParameter) == cnNodeType) - { - if (parameter.size() < 1 || parameter.size() > 2) - RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str()); if (pass == ndlPassInitial) { // evaluate only scalar parameters vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); - size_t rows = ((NDLNode*)params[0])->GetScalar(); - size_t cols = params.size() > 1 ? ((NDLNode*)params[1])->GetScalar() : 1; - + size_t i = 0; + auto tensorShape = ProcessTensorShapeParameters(node, params, i, isImage, cnNodeType); bool needGradient = node->GetOptionalParameter("needGradient", "true"); - nodePtr = builder.CreateLearnableParameter(name, rows, cols); - + nodePtr = builder.CreateLearnableParameter(name, tensorShape); nodePtr->SetParameterUpdateRequired(needGradient); } else if (pass == ndlPassFinal) @@ -332,11 +312,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { else if (cnNodeType == OperationNameOf(PastValueNode) || cnNodeType == OperationNameOf(FutureValueNode)) { - if (parameter.size() <2 || parameter.size() >3) - RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [cols], m, [timeStep=1, defaultPastValue=0.1])."); + if (parameter.size() < 2 || parameter.size() > 3) // we allow 3 for legacy (cols parameter which is now unused) + RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [timeStep=1, defaultPastValue=0.1])."); + // TODO: allow a tensor descriptor nodeParamCount = 1; nodeParamStart = parameter.size() > 2?2:1; + // TODO: What are these ^^ for? We are not setting this for InputValue if (pass == ndlPassInitial) { @@ -344,24 +326,22 @@ namespace Microsoft { namespace MSR { namespace CNTK { vector params = EvaluateParameters(node, baseName, 0, parameter.size(), pass); size_t rows = ((NDLNode*)params[0])->GetScalar(); // if we have three parameters the second is columns - size_t cols = parameter.size() > 2 ? ((NDLNode*)params[1])->GetScalar() : 1; + // ignore legacy size_t cols = parameter.size() > 2 ? ((NDLNode*)params[1])->GetScalar() : 1; - bool needGradient = node->GetOptionalParameter("needGradient", "false"); + //bool needGradient = node->GetOptionalParameter("needGradient", "false"); // TODO: what's this for? float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1"); // TODO: parameter should be called 'defaultHiddenActivation' - //for backward compatibility we check timeStep first + // for backward compatibility we check 'timeStep' first size_t timeStep = node->GetOptionalParameter("timeStep", "1"); if (timeStep == 1) - { timeStep = node->GetOptionalParameter("delayTime", "1"); - } if (cnNodeType == OperationNameOf(PastValueNode)) - nodePtr = builder.PastValue(NULL, defaultHiddenActivity, rows, cols, timeStep, name); + nodePtr = builder.PastValue(NULL, defaultHiddenActivity, rows, timeStep, name); else - nodePtr = builder.FutureValue(NULL, defaultHiddenActivity, rows, cols, timeStep, name); + nodePtr = builder.FutureValue(NULL, defaultHiddenActivity, rows, timeStep, name); - nodePtr->SetParameterUpdateRequired(needGradient); // TODO: what's this for? + //nodePtr->SetParameterUpdateRequired(needGradient); // TODO: what's this for? } } else if (cnNodeType == OperationNameOf(ConvolutionNode)) @@ -546,6 +526,32 @@ namespace Microsoft { namespace MSR { namespace CNTK { } } + // ProcessTensorShapeParameters - assume positional parameters starting from position i are tensor dimensions--parse those. + // Is isImage then must be a 3D tensor, which is interpreted as (W,H,C), and optional parameter 'imageLayout' says how. + template + TensorShape SynchronousNodeEvaluator::ProcessTensorShapeParameters(const NDLNode* node, const vector & params, size_t & i, bool isImage, const wstring & cnNodeType/*for error messages only*/) + { + // gather dims + vector dims; + dims.push_back(((NDLNode*)params[i])->GetScalar()); // first is mandatory + for (i++; i < params.size(); i++) + dims.push_back(((NDLNode*)params[i])->GetScalar()); + + // turn into tensor + TensorShape tensorShape(dims); + + // if image then interpret as W, H, C with layout according to optional imageLayout parameter + if (isImage) + { + if (dims.size() != 3) + RuntimeError("%ls should have 3 parameters [width, height, numChannels].", cnNodeType.c_str()); + ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC")); + tensorShape = ImageDimensions::AsTensorShape(tensorShape[0], tensorShape[1], tensorShape[2], imageLayoutKind); + } + + return tensorShape; + } + template class SynchronousExecutionEngine; template class SynchronousExecutionEngine; diff --git a/Source/CNTK/SynchronousExecutionEngine.h b/Source/CNTK/SynchronousExecutionEngine.h index ecd1dae27bcb..808a7a122039 100644 --- a/Source/CNTK/SynchronousExecutionEngine.h +++ b/Source/CNTK/SynchronousExecutionEngine.h @@ -290,7 +290,7 @@ class SynchronousNodeEvaluator : public NDLNodeEvaluator { fprintf(stderr, "'multiSeq' tag is defunct.\n"); } - else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters + else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters. Yikes!! { SetOutputNode(m_net->EvaluationNodes(), compNode); } @@ -326,9 +326,10 @@ class SynchronousNodeEvaluator : public NDLNodeEvaluator return nullptr; } - virtual ~SynchronousNodeEvaluator() - { - } + virtual ~SynchronousNodeEvaluator() { } + +protected: + TensorShape ProcessTensorShapeParameters(const NDLNode* node, const vector & params, size_t & i, bool isImage, const wstring & cnNodeType/*for error messages only*/); private: ComputationNetworkPtr m_net; diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp index e794d5095eba..b28fbc82b13b 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp @@ -175,6 +175,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), paramName, rows, cols)); } + template shared_ptr> ComputationNetworkBuilder::CreateLearnableParameter(const std::wstring & paramName, const TensorShape & tensorShape) + { + return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), paramName, tensorShape)); + } + #if 0 // not functional at present //sparse matrix size is optionally specified template shared_ptr> ComputationNetworkBuilder::CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size) @@ -183,28 +188,24 @@ namespace Microsoft { namespace MSR { namespace CNTK { } #endif - template shared_ptr> ComputationNetworkBuilder::CreateInputNode(const std::wstring & inputName, const size_t rows, const size_t cols) + template shared_ptr> ComputationNetworkBuilder::CreateInputNode(const std::wstring & inputName, const size_t rows) { - return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, rows, cols)); + return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, rows)); } - template shared_ptr> ComputationNetworkBuilder::CreateSparseInputNode(const std::wstring & inputName, const size_t rows, const size_t cols) + template shared_ptr> ComputationNetworkBuilder::CreateSparseInputNode(const std::wstring & inputName, const size_t rows) { - return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, rows, cols)); + return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, rows)); } - template shared_ptr> ComputationNetworkBuilder::CreateInputNode(const std::wstring & inputName, - const TensorShape & imageLayout, - const size_t numImages) + template shared_ptr> ComputationNetworkBuilder::CreateInputNode(const std::wstring & inputName, const TensorShape & sampleLayout) { - return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, imageLayout, numImages)); + return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, sampleLayout)); } - template shared_ptr> ComputationNetworkBuilder::CreateSparseInputNode(const std::wstring & inputName, - const TensorShape & imageLayout, - const size_t numImages) + template shared_ptr> ComputationNetworkBuilder::CreateSparseInputNode(const std::wstring & inputName, const TensorShape & imageLayout) { - return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, imageLayout, numImages)); + return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), inputName, imageLayout)); } template shared_ptr> ComputationNetworkBuilder::CreatePairNetworkNode(const std::wstring & inputName, const size_t rows, const size_t cols) @@ -548,14 +549,14 @@ namespace Microsoft { namespace MSR { namespace CNTK { return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), a); } - template shared_ptr> ComputationNetworkBuilder::PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName) + template shared_ptr> ComputationNetworkBuilder::PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName) { - return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName, initHiddenActivity, row_size, col_size, timeStep), a); + return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName, initHiddenActivity, row_size, timeStep), a); } - template shared_ptr> ComputationNetworkBuilder::FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName) + template shared_ptr> ComputationNetworkBuilder::FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName) { - return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName, initHiddenActivity, row_size, col_size, timeStep), a); + return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName, initHiddenActivity, row_size, timeStep), a); } template shared_ptr> ComputationNetworkBuilder::Parallel(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName) diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h index 0649c3204164..ccd4d6785b7e 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h @@ -9,6 +9,7 @@ #include "ComputationNetwork.h" #include "TrainingCriterionNodes.h" // for NCEEvalMode #include "ScriptableObjects.h" +#include "DataTensor.h" #include namespace Microsoft { namespace MSR { namespace CNTK { @@ -39,12 +40,13 @@ namespace Microsoft { namespace MSR { namespace CNTK { // TODO: separate into nodes that have inputs and those that duplicate functions with input adding except just not adding inputs. Clear? ComputationNodePtr CreateLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols); + ComputationNodePtr CreateLearnableParameter(const std::wstring & paramName, const TensorShape & tensorShape); //sparse matrix size is optionally specified //ComputationNodePtr CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size = 0); - ComputationNodePtr CreateInputNode(const std::wstring & inputName, const size_t rows, const size_t cols); - ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const size_t rows, const size_t cols); - ComputationNodePtr CreateInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages); - ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages); + ComputationNodePtr CreateInputNode(const std::wstring & inputName, const size_t rows); + ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const size_t rows); + ComputationNodePtr CreateInputNode(const std::wstring & inputName, const TensorShape & sampleLayout); + ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const TensorShape & sampleLayout); ComputationNodePtr CreatePairNetworkNode(const std::wstring & inputName, const size_t rows, const size_t cols); ComputationNodePtr CreateConvolutionNode(const std::wstring & nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0); ComputationNodePtr CreateMaxPoolingNode(const std::wstring & nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind); @@ -52,10 +54,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { // this is the catch-all for all cases not covered as special cases above // Unlike the specialized ones above, this one creates nodes by type given as a string. ComputationNodePtr CreateComputationNode(const std::wstring & nodeType, const std::wstring & nodeName); - // TODO: These next three functions are wrappers around CreateXXXNode(). Remove these. - ComputationNodePtr Parameter(const size_t rows, size_t cols, const std::wstring nodeName = L"") { return CreateLearnableParameter(nodeName, rows, cols); } // TODO: remove - ComputationNodePtr Input(const size_t rows, const size_t cols, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, rows, cols); } // TODO: remove - ComputationNodePtr Input(const TensorShape & imageLayout, const size_t numImages, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, imageLayout, numImages); } // TODO: remove // The following functions create nodes and link them to the network and their inputs. // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code? ComputationNodePtr PairNetwork(const ComputationNodePtr & a, const std::wstring nodeName = L""); @@ -122,8 +120,8 @@ namespace Microsoft { namespace MSR { namespace CNTK { ComputationNodePtr Reshape(const ComputationNodePtr a, const size_t num_rows, const TensorShape & imageLayout, const std::wstring nodeName = L""); ComputationNodePtr RowRepeat(const ComputationNodePtr a, const size_t num_repeat, const std::wstring nodeName = L""); ComputationNodePtr Diagonal(const ComputationNodePtr a, const std::wstring nodeName = L""); - ComputationNodePtr PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName = L""); - ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName = L""); + ComputationNodePtr PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L""); + ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L""); ComputationNodePtr Parallel(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); ComputationNodePtr RowSlice(const ComputationNodePtr a, const size_t start_index, const size_t num_rows, const std::wstring nodeName = L""); ComputationNodePtr RowStack(const std::vector pinputs, const std::wstring nodeName = L""); diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index b55924795fb7..084fe9ce9a69 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -649,7 +649,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // We do call validate(final) as many times as needed, since stuff may have changed underneath. node->PrintSelfBeforeValidation(); node->Validate(isFinalValidationPass/*final*/); // all nodes have been visited: do verification instead of just inference - fprintf(stderr, " -> [%lu, %s%lu]", node->GetNumRows(), node->HasMBLayout() ? "MBSize " : "", node->GetNumCols()); + fprintf(stderr, " -> [%lu [%s], %s%lu]", node->GetNumRows(), string(node->GetSampleLayout()).c_str(), node->HasMBLayout() ? "MBSize " : "", node->GetNumCols()); node->m_visited = true; // also take the opportunity to propagate m_needsGradient auto needsGradient = node->m_needsGradient; diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index d184a41390e6..6ae542997cb5 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -1399,7 +1399,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { template inline shared_ptr New(_Types&&... _Args) { return make_shared(forward<_Types>(_Args)...); - //return ComputationNode::template New(forward<_Types>(_Args)...); } // ======================================================================= diff --git a/Source/ComputationNetworkLib/InputAndParamNodes.h b/Source/ComputationNetworkLib/InputAndParamNodes.h index 47cd02d8f83a..8d9ee85ca599 100644 --- a/Source/ComputationNetworkLib/InputAndParamNodes.h +++ b/Source/ComputationNetworkLib/InputAndParamNodes.h @@ -263,23 +263,17 @@ namespace Microsoft { namespace MSR { namespace CNTK { m_parameterUpdateRequired = false; } protected: - InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, bool isSparse) : - Base(deviceId, name) - { - Init(TensorShape(), isSparse); - } - InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols, bool isSparse) : - Base(deviceId, name) - { - cols; // BUGBUG: There should be no 'cols' parameter for InputValues, since they must be minibatches. - Init(TensorShape(rows), isSparse); - } - InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages, bool isSparse) : + InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & sampleLayout, bool isSparse) : Base(deviceId, name) { - numImages; // BUGBUG: There should be no 'numImages' parameter for InputValues, since they must be minibatches. - Init(imageLayout, isSparse); + Init(sampleLayout, isSparse); } + InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, bool isSparse) : + InputValueBase(deviceId, name, TensorShape(rows), isSparse) + { } + InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, bool isSparse) : + InputValueBase(deviceId, name, TensorShape(), isSparse) + { } InputValueBase(const ScriptableObjects::IConfigRecordPtr configp, bool isSparse) : Base(configp->Get(L"deviceId"), L"") { @@ -361,11 +355,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { InputValue(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name, false) { } - InputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) : - Base(deviceId, name, rows, cols, false) + InputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows) : + Base(deviceId, name, rows, false) { } - InputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) : - Base(deviceId, name, imageLayout, numImages, false) + InputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & sampleLayout) : + Base(deviceId, name, sampleLayout, false) { } InputValue(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp, false) @@ -390,11 +384,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name, true) { } - SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) : - Base(deviceId, name, rows, cols, true) + SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows) : + Base(deviceId, name, rows, true) { } - SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) : - Base(deviceId, name, imageLayout, numImages, true) + SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout) : + Base(deviceId, name, imageLayout, true) { } SparseInputValue(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp, true) diff --git a/Source/ComputationNetworkLib/RecurrentNodes.h b/Source/ComputationNetworkLib/RecurrentNodes.h index 1f522a053e3a..98216f83393c 100644 --- a/Source/ComputationNetworkLib/RecurrentNodes.h +++ b/Source/ComputationNetworkLib/RecurrentNodes.h @@ -86,33 +86,31 @@ namespace Microsoft { namespace MSR { namespace CNTK { typedef std::shared_ptr> DelayedNodeStatePtr; static const std::wstring TypeName() { return L"DelayedValue"; } private: - void Init(size_t row_size, size_t col_size, ElemType initialActivationValue = (ElemType)DEFAULT_HIDDEN_ACTIVATION) + void Init(const TensorShape & sampleLayout, ElemType initialActivationValue) { m_initialActivationValue = initialActivationValue; m_timeStep = 1; CreateMatrixIfNull(m_value); - SetDims(TensorShape(row_size), col_size); // TODO: needed? Can we not infer it? How about setting a sample layout? - m_isHistoryCarryOverManagedExternally = false; // used for PairNetworkNode/PastValueNode combination + SetDims(sampleLayout, 0); // TODO: needed? Can we not infer it? How about setting a sample layout? + m_isHistoryCarryOverManagedExternally = false; // used for PairNetworkNode/PastValueNode combination, which is deprecated + m_value->SetValue(m_initialActivationValue); // is this needed? } protected: DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name), m_delayedActivation(deviceId) { - Init(1, 1); + Init(TensorShape(), (ElemType)DEFAULT_HIDDEN_ACTIVATION); } - DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, size_t row_size, size_t col_size, size_t timeStep) : + DelayedValueNodeBase(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, const TensorShape & sampleLayout, size_t timeStep) : Base(deviceId, name), m_delayedActivation(deviceId) { - Init(row_size, col_size, initialActivationValue); - - m_timeStep = (int)timeStep; - - m_value->SetValue(m_initialActivationValue); + Init(sampleLayout, initialActivationValue); + m_timeStep = (int)timeStep; // TODO: pass this to Init() instead as well } DelayedValueNodeBase(const ScriptableObjects::IConfigRecordPtr configp) : - DelayedValueNodeBase(configp->Get(L"deviceId"), L"", configp->Get(L"defaultHiddenActivation"), configp->Get(L"rows"), configp->Get(L"cols"), configp->Get(L"timeStep")) + DelayedValueNodeBase(configp->Get(L"deviceId"), L"", configp->Get(L"defaultHiddenActivation"), configp->Get(L"shape"), configp->Get(L"timeStep")) { // We do NOT attach the inputs, as we cannot resolve them without causing a circular reference. // Instead, we capture them in a lambda, which will be called by ComputationNetwork during the build process through LateAttachInputs() below. @@ -593,8 +591,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { PastValueNode(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name) { } - PastValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, size_t row_size, size_t col_size, size_t timeStep) : - Base(deviceId, name, initialActivationValue, row_size, col_size, timeStep) + PastValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, const TensorShape & sampleLayout, size_t timeStep) : + Base(deviceId, name, initialActivationValue, sampleLayout, timeStep) + { } + PastValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, size_t numRows, size_t timeStep) : + PastValueNode(deviceId, name, initialActivationValue, TensorShape(numRows), timeStep) { } PastValueNode(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp) @@ -619,8 +620,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { FutureValueNode(DEVICEID_TYPE deviceId, const wstring & name) : Base(deviceId, name) { } - FutureValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, size_t row_size, size_t col_size, size_t timeStep) : - Base(deviceId, name, initialActivationValue, row_size, col_size, timeStep) + FutureValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, const TensorShape & sampleLayout, size_t timeStep) : + Base(deviceId, name, initialActivationValue, sampleLayout, timeStep) + { } + FutureValueNode(DEVICEID_TYPE deviceId, const wstring & name, ElemType initialActivationValue, size_t numRows, size_t timeStep) : + FutureValueNode(deviceId, name, initialActivationValue, TensorShape(numRows), timeStep) { } FutureValueNode(const ScriptableObjects::IConfigRecordPtr configp) : Base(configp) diff --git a/Tests/EndToEndTests/Speech/LSTM/cntk.config b/Tests/EndToEndTests/Speech/LSTM/cntk.config index c4f15456a2c3..3ee89359bd5c 100644 --- a/Tests/EndToEndTests/Speech/LSTM/cntk.config +++ b/Tests/EndToEndTests/Speech/LSTM/cntk.config @@ -66,8 +66,8 @@ speechTrain = [ C(c) = DiagTimes(WeightParam(cellDim, 1), Stabilize(c)) // cell-to-hiddden // LSTM cell - dh = PastValue(outputDim, 1, output); // hidden state(t-1) - dc = PastValue(cellDim, 1, ct); // cell(t-1) + dh = PastValue(outputDim, output); // hidden state(t-1) + dc = PastValue(cellDim, ct); // cell(t-1) // note: the W(inputx) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B() it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t) diff --git a/Tests/EndToEndTests/Speech/LSTM/lstm.bs b/Tests/EndToEndTests/Speech/LSTM/lstm.bs index 65fd9974e2c8..ca4dc1dc3801 100644 --- a/Tests/EndToEndTests/Speech/LSTM/lstm.bs +++ b/Tests/EndToEndTests/Speech/LSTM/lstm.bs @@ -74,8 +74,8 @@ speechTrain = new TrainAction [ C(c) = DiagTimes(WeightParam(cellDim, 1), Stabilize(c)) // cell-to-hiddden // LSTM cell - dh = PastValue(outputDim, 1, output); // hidden state(t-1) - dc = PastValue(cellDim, 1, ct); // cell(t-1) + dh = PastValue(outputDim, output); // hidden state(t-1) + dc = PastValue(cellDim, ct); // cell(t-1) // note: the W(inputx) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B() it = Sigmoid(W(inputx) + B() + H(dh) + C(dc)) // input gate(t)