Skip to content

Commit

Permalink
Past/FutureValue now takes a dimension tensor in BrainScript;
Browse files Browse the repository at this point in the history
removed cols/numImages parameter from InputValue and Past/FutureValue, since those always process data samples. Lots of little deletions in SimpleNetworkBuilder.cpp
  • Loading branch information
frankseide committed Jan 5, 2016
1 parent 6f9b664 commit 7e780c3
Show file tree
Hide file tree
Showing 13 changed files with 290 additions and 290 deletions.
8 changes: 4 additions & 4 deletions Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ using namespace std;
L"Parameter = LearnableParameter // deprecated \n"
L"ParameterTensor(dims, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// ^^ already works; vv untested
L"Input(rows, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
L"SparseInput(rows, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n"
L"Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
L"SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n"
L"ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]\n"
L"SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]\n"
L"Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, needGradient = false, init = 'fixedValue', value = val) \n"
L"PastValue(rows, cols, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input /*plus the function args*/ ]\n"
L"FutureValue(rows, cols, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input /*plus the function args*/ ]\n"
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
L"RowSlice(startIndex, numRows, input, needGradient = false, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
L"RowRepeat(input, numRepeats, needGradient = false, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
Expand Down
261 changes: 129 additions & 132 deletions Source/CNTK/SimpleNetworkBuilder.cpp

Large diffs are not rendered by default.

34 changes: 17 additions & 17 deletions Source/CNTK/SimpleNetworkBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,41 +256,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {

ComputationNetworkPtr BuildSimpleDNN();

ComputationNetworkPtr BuildSimpleRNN(size_t mbSize = 1);
ComputationNetworkPtr BuildSimpleRNN();

ComputationNetworkPtr BuildClassEntropyNetwork(size_t mbSize = 1);
ComputationNetworkPtr BuildClassEntropyNetwork();

ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);

ComputationNodePtr BuildLSTMNodeComponent(ULONG &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);

ComputationNodePtr BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t mbSize, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse = false);
ComputationNodePtr BuildLSTMComponentWithMultiInputs(ULONG &randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse = false);

ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);

ComputationNetworkPtr BuildLogBilinearNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLogBilinearNetworkFromDescription();

ComputationNetworkPtr BuildNeuralProbNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildNeuralProbNetworkFromDescription();

ComputationNetworkPtr BuildLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLSTMNetworkFromDescription();

ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();

ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();

ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();

ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();

ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription();

ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription();

ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();

ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);

ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);

//layer is 0 based
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
Expand Down
134 changes: 70 additions & 64 deletions Source/CNTK/SynchronousExecutionEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@
#include "ConvolutionalNodes.h"
#include "NonlinearityNodes.h"
#include "ReshapingNodes.h"
#include "DataTensor.h"

namespace Microsoft { namespace MSR { namespace CNTK {

using namespace std;

template<class ElemType>
void SynchronousNodeEvaluator<ElemType>::Evaluate(NDLNode<ElemType>* node, const wstring& baseName, const NDLPass pass)
{
Expand Down Expand Up @@ -58,48 +61,34 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

if (OperationNameOf(InputValue) == cnNodeType)
if (OperationNameOf(InputValue) == cnNodeType || OperationNameOf(SparseInputValue) == cnNodeType)
{
if (parameter.size() < 1 || parameter.size() > 2)
RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str());
bool isSparse = (OperationNameOf(SparseInputValue) == cnNodeType);
if (parameter.size() < 1)
RuntimeError("%ls should have 1 or more parameters (tensor dimensions, e.g. [rows, cols]).", cnNodeType.c_str());

if (pass == ndlPassInitial)
{
// evaluate only scalar parameters
vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
size_t i = 0;
auto tensorShape = ProcessTensorShapeParameters(node, params, i, /*isImage=*/false, cnNodeType);

// first look for this node already existing in the network
// BUGBUG: How does this set the dimensions then?
if (m_net->NodeNameExists(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
else if (isSparse)
nodePtr = builder.CreateSparseInputNode(name, tensorShape);
else
nodePtr = builder.CreateInputNode(name, rows, cols);
nodePtr = builder.CreateInputNode (name, tensorShape);
}
}
else if (OperationNameOf(SparseInputValue) == cnNodeType)
else if (cnNodeType == L"ImageInput" || cnNodeType == L"SparseImageInput")
{
if (parameter.size() < 1 || parameter.size() > 2)
RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]].", cnNodeType.c_str());

if (pass == ndlPassInitial)
{
// evaluate only scalar parameters
vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

// first look for this node already existing in the network
if (m_net->NodeNameExists(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
else
nodePtr = builder.CreateSparseInputNode(name, rows, cols);
}
}
else if (cnNodeType == L"ImageInput")
{
if (parameter.size() < 3 || parameter.size() > 4)
RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str());
bool isSparse = (cnNodeType == L"SparseImageInput");
if (parameter.size() < 3 || parameter.size() > 4) // we allow 4 for legacy (numImages, was ignored)
RuntimeError("%ls should have 3 parameters[imageWidth, imageHeight, imageChannels].", cnNodeType.c_str());

if (pass == ndlPassInitial)
{
Expand All @@ -108,46 +97,37 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t imageWidth = ((NDLNode<ElemType>*)params[0])->GetScalar();
size_t imageHeight = ((NDLNode<ElemType>*)params[1])->GetScalar();
size_t imageChannels = ((NDLNode<ElemType>*)params[2])->GetScalar();
size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1; // BUGBUG: This comes through MBLayout, and should be forbidden.
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));

nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
if (isSparse)
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
else
nodePtr = builder.CreateInputNode (name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
}
}
else if (cnNodeType == L"SparseImageInput")
else if (OperationNameOf(LearnableParameter) == cnNodeType || cnNodeType == L"ImageParameter")
{
if (parameter.size() < 3 || parameter.size() > 4)
RuntimeError("%ls should have 3 or 4 parameters[imageWidth, imageHeight, imageChannels, [numImages=1]].", cnNodeType.c_str());

if (pass == ndlPassInitial)
bool isImage = (cnNodeType == L"ImageParameter");
if (!isImage)
{
// evaluate only scalar parameters
vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
size_t imageWidth = ((NDLNode<ElemType>*)params[0])->GetScalar();
size_t imageHeight = ((NDLNode<ElemType>*)params[1])->GetScalar();
size_t imageChannels = ((NDLNode<ElemType>*)params[2])->GetScalar();
size_t numImages = parameter.size() > 3 ? ((NDLNode<ElemType>*)params[3])->GetScalar() : 1;
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));

nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), numImages);
if (parameter.size() < 1)
RuntimeError("%ls should have 1 or more parameters (tensor dimensions, e.g. [rows, cols]) plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str());
}
else
{
if (parameter.size() < 3)
RuntimeError("%ls should have 3 parameters [imageWidth, imageHeight, imageChannels] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str());
}
}
else if (OperationNameOf(LearnableParameter) == cnNodeType)
{
if (parameter.size() < 1 || parameter.size() > 2)
RuntimeError("%ls should have 1 or 2 parameters[rows, [cols=1]] plus other optional parameters (needGradient=[true|false], init=[uniform|gaussian|fixedvalue], initValueScale=[1|float], value=[0|float]).", cnNodeType.c_str());

if (pass == ndlPassInitial)
{
// evaluate only scalar parameters
vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

size_t i = 0;
auto tensorShape = ProcessTensorShapeParameters(node, params, i, isImage, cnNodeType);
bool needGradient = node->GetOptionalParameter("needGradient", "true");

nodePtr = builder.CreateLearnableParameter(name, rows, cols);

nodePtr = builder.CreateLearnableParameter(name, tensorShape);
nodePtr->SetParameterUpdateRequired(needGradient);
}
else if (pass == ndlPassFinal)
Expand Down Expand Up @@ -332,36 +312,36 @@ namespace Microsoft { namespace MSR { namespace CNTK {
else if (cnNodeType == OperationNameOf(PastValueNode) ||
cnNodeType == OperationNameOf(FutureValueNode))
{
if (parameter.size() <2 || parameter.size() >3)
RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [cols], m, [timeStep=1, defaultPastValue=0.1]).");
if (parameter.size() < 2 || parameter.size() > 3) // we allow 3 for legacy (cols parameter which is now unused)
RuntimeError("PastValue or FutureValue should have two to three fixed parameters. Usage: PastValue(rows, [timeStep=1, defaultPastValue=0.1]).");
// TODO: allow a tensor descriptor

nodeParamCount = 1;
nodeParamStart = parameter.size() > 2?2:1;
// TODO: What are these ^^ for? We are not setting this for InputValue

if (pass == ndlPassInitial)
{
// evaluate only scalar parameters
vector<void*> params = EvaluateParameters(node, baseName, 0, parameter.size(), pass);
size_t rows = ((NDLNode<ElemType>*)params[0])->GetScalar();
// if we have three parameters the second is columns
size_t cols = parameter.size() > 2 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
// ignore legacy size_t cols = parameter.size() > 2 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;

bool needGradient = node->GetOptionalParameter("needGradient", "false");
//bool needGradient = node->GetOptionalParameter("needGradient", "false"); // TODO: what's this for?
float defaultHiddenActivity = node->GetOptionalParameter("defaultHiddenActivity", "0.1"); // TODO: parameter should be called 'defaultHiddenActivation'

//for backward compatibility we check timeStep first
// for backward compatibility we check 'timeStep' first
size_t timeStep = node->GetOptionalParameter("timeStep", "1");
if (timeStep == 1)
{
timeStep = node->GetOptionalParameter("delayTime", "1");
}

if (cnNodeType == OperationNameOf(PastValueNode))
nodePtr = builder.PastValue(NULL, defaultHiddenActivity, rows, cols, timeStep, name);
nodePtr = builder.PastValue(NULL, defaultHiddenActivity, rows, timeStep, name);
else
nodePtr = builder.FutureValue(NULL, defaultHiddenActivity, rows, cols, timeStep, name);
nodePtr = builder.FutureValue(NULL, defaultHiddenActivity, rows, timeStep, name);

nodePtr->SetParameterUpdateRequired(needGradient); // TODO: what's this for?
//nodePtr->SetParameterUpdateRequired(needGradient); // TODO: what's this for?
}
}
else if (cnNodeType == OperationNameOf(ConvolutionNode))
Expand Down Expand Up @@ -546,6 +526,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

// ProcessTensorShapeParameters - assume positional parameters starting from position i are tensor dimensions--parse those.
// Is isImage then must be a 3D tensor, which is interpreted as (W,H,C), and optional parameter 'imageLayout' says how.
template<class ElemType>
TensorShape SynchronousNodeEvaluator<ElemType>::ProcessTensorShapeParameters(const NDLNode<ElemType>* node, const vector<void*> & params, size_t & i, bool isImage, const wstring & cnNodeType/*for error messages only*/)
{
// gather dims
vector<size_t> dims;
dims.push_back(((NDLNode<ElemType>*)params[i])->GetScalar()); // first is mandatory
for (i++; i < params.size(); i++)
dims.push_back(((NDLNode<ElemType>*)params[i])->GetScalar());

// turn into tensor
TensorShape tensorShape(dims);

// if image then interpret as W, H, C with layout according to optional imageLayout parameter
if (isImage)
{
if (dims.size() != 3)
RuntimeError("%ls should have 3 parameters [width, height, numChannels].", cnNodeType.c_str());
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));
tensorShape = ImageDimensions::AsTensorShape(tensorShape[0], tensorShape[1], tensorShape[2], imageLayoutKind);
}

return tensorShape;
}

template class SynchronousExecutionEngine<float>;
template class SynchronousExecutionEngine<double>;

Expand Down
Loading

0 comments on commit 7e780c3

Please sign in to comment.