Skip to content

Commit

Permalink
removed two more Multinetworks configs from SimpleNetworkBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
frankseide committed Jan 22, 2016
1 parent 482a6a4 commit 6d31cda
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 307 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ CNTK_SRC =\
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
$(SOURCEDIR)/ActionsLib/OtherActions.cpp \
$(SOURCEDIR)/ActionsLib/EsotericActions.cpp \
$(SOURCEDIR)/ActionsLib/SpecialPurposeActions.cpp \
$(SOURCEDIR)/SequenceTrainingLib/latticeforwardbackward.cpp \
$(SOURCEDIR)/SequenceTrainingLib/parallelforwardbackward.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
Expand Down
287 changes: 0 additions & 287 deletions Source/CNTK/SimpleNetworkBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
case RCRF:
net = BuildSeqTrnLSTMNetworkFromDescription();
break;
case UNIDIRECTIONALLSTM:
net = BuildUnidirectionalLSTMNetworksFromDescription();
break;
case BIDIRECTIONALLSTM:
net = BuildBiDirectionalLSTMNetworksFromDescription();
break;
default:
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
}
Expand Down Expand Up @@ -1323,130 +1317,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
return m_net;
}

/**
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
Because the past prediction is used, decoding requires beam search decoder
Developed by Kaisheng Yao
This is used in the following work
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;

size_t numHiddenLayers = m_layerSizes.size() - 2;

size_t numRecurrentLayers = m_recurrentLayers.size();
size_t dims = 0;

ComputationNodePtr input, w, b, u, e, Wxo, output, label, prior;
vector<ComputationNodePtr> streams;
vector<size_t> streamdims;
ComputationNodePtr inputforward, inputbackward, inputletter;
ComputationNodePtr transcription_prediction;

map<wstring, size_t> featDim;

assert(m_streamSizes.size() > 0);
inputbackward = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
m_net->FeatureNodes().push_back(inputbackward);
featDim[L"featurepastValueedTarget"] = m_streamSizes[0];

inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
m_net->FeatureNodes().push_back(inputletter);
featDim[L"ltrForward"] = m_streamSizes[1];

size_t layerIdx = 0;
size_t idx = 0;
int recur_idx = 0;
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
{
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
if (m_applyMeanVarNorm)
{
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);

input = output;
}

size_t idim = input->GetSampleMatrixNumRows();
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());

e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));

streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
input = output;
streams.push_back(input);
}

layerIdx++;

output = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");
input = output;
dims = streamdims[0] + streamdims[1];

/// now merge the streams
if (numHiddenLayers > 0)
{
while (layerIdx < numHiddenLayers)
{
switch (m_rnnType)
{
case UNIDIRECTIONALLSTM:
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, dims, m_layerSizes[layerIdx + 1], input);
break;
default:
LogicError("This is for unidorectional LSTM model. Check rnntype to see whether it is UNIDIRECTIONALLSTMWITHPASTPREDICTION or TRANSDUCER");
}

layerIdx++;
dims = m_layerSizes[layerIdx];
input = output;
}
}

/// directly connect transcription model output/feature to the output layer
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);

output = builder.Times(Wxo, input);
input = output;

/// here uses "labels", so only one label from multiple stream inputs are used.
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);

AddTrainAndEvalCriterionNodes(input, label, w);

//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(input, L"outputs");

if (m_needPrior)
{
prior = builder.Mean(label);
input = builder.Log(prior, L"LogOfPrior");
ComputationNodePtr scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
m_net->OutputNodes().push_back(scaledLogLikelihood);
}
else
m_net->OutputNodes().push_back(output);
}

return m_net;
}

template <class ElemType>
shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilder<ElemType>::BuildLSTMComponentWithMultiInputs(ULONG& randomSeed, size_t iLayer, const vector<size_t>& inputDim, size_t outputDim, const vector<ComputationNodePtr>& inputObs, bool inputWeightSparse)
{
Expand Down Expand Up @@ -1637,163 +1507,6 @@ shared_ptr<ComputationNode<ElemType>> /*ComputationNodePtr*/ SimpleNetworkBuilde
return output;
}

/**
Build a bi-directional LSTM network to compute the following
p(y_t | y_1^{t-1}, x_1^T)
The target side for y_t is a LSTM language model with past prediction y_{t-1} as its input. This language model also uses
the outputs from the forwawrd direction LSTM and the output from the backward direction LSTM that are operated on the source side.
Developed by Kaisheng Yao.
This is used in the following works:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;

size_t numHiddenLayers = m_layerSizes.size() - 2;

size_t numRecurrentLayers = m_recurrentLayers.size();

ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior, Wxo;
ComputationNodePtr forwardInput, forwardOutput, backwardInput, backwardOutput;
vector<ComputationNodePtr> streams;
vector<size_t> streamdims;
ComputationNodePtr inputprediction, inputletter, ngram;
ComputationNodePtr ltrSource;
size_t ltrDim = 0;

map<wstring, size_t> featDim;

size_t ltrSrcIdx = 1;
/// create projections to use pastValue predictions
inputprediction = builder.CreateInputNode(L"featurepastValueedTarget", m_streamSizes[0]);
m_net->FeatureNodes().push_back(inputprediction);

inputletter = builder.CreateInputNode(L"ltrForward", m_streamSizes[1]);
m_net->FeatureNodes().push_back(inputletter);
featDim[L"ltrForward"] = m_streamSizes[1];

size_t layerIdx = 0;
size_t idx = 0;
int recur_idx = 0;
for (auto p = m_net->FeatureNodes().begin(); p != m_net->FeatureNodes().end(); p++, idx++)
{
layerIdx = 0; /// reset layer id because each input stream starts from layer 0
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
if (m_applyMeanVarNorm)
{
input = dynamic_pointer_cast<ComputationNode<ElemType>>(*p);
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);

input = output;
}

size_t idim = input->GetSampleMatrixNumRows();
assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());

e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, msra::strfun::wstrprintf(L"LOOKUP%d", idx));

streamdims.push_back(m_layerSizes[1] * m_lookupTabelOrderSizes[idx]);
input = output;
streams.push_back(input);

if (idx == ltrSrcIdx)
{
ltrSource = input;
ltrDim = m_layerSizes[1] * m_lookupTabelOrderSizes[idx];
}
}

layerIdx++;

/// glue the two streams
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel0");

if (numHiddenLayers > 0)
{
/// forward direction
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
forwardInput = forwardOutput;

backwardInput = (ComputationNodePtr) builder.TimeReverse(ltrSource);
//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, ltrDim, m_layerSizes[layerIdx + 1], backwardInput);
backwardInput = backwardOutput;

layerIdx++;

while (layerIdx < numHiddenLayers - 1)
{
//forwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
forwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 100, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], forwardInput);
forwardInput = forwardOutput;

//backwardOutput = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
backwardOutput = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx + 200, m_layerSizes[layerIdx], m_layerSizes[layerIdx + 1], backwardInput);
backwardInput = backwardOutput;

layerIdx++;
}

backwardOutput = (ComputationNodePtr) builder.TimeReverse(backwardInput);
}

streams.clear();
streamdims.clear();
streams.push_back(forwardOutput);
streamdims.push_back(m_layerSizes[layerIdx]);
streams.push_back(backwardOutput);
streamdims.push_back(m_layerSizes[layerIdx]);

/// glue the two streams
forwardInput = (ComputationNodePtr) builder.Parallel(streams[0], streams[1], L"Parallel1");

// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, layerIdx, streamdims[0] + streamdims[1], m_layerSizes[layerIdx + 1], forwardInput);

input = output;
layerIdx++;

/// directly connect transcription model output/feature to the output layer
Wxo = builder.CreateLearnableParameter(L"ConnectToLowerLayers", m_layerSizes[numHiddenLayers + 1], m_layerSizes[layerIdx]);
m_net->InitLearnableParameters(Wxo, m_uniformInit, randomSeed++, m_initValueScale);

output = builder.Times(Wxo, input);
input = output;

/// here uses "labels", so only one label from multiple stream inputs are used.
label = builder.CreateInputNode(L"labels", m_layerSizes[numHiddenLayers + 1]);

AddTrainAndEvalCriterionNodes(input, label);

//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(input, L"outputs");

if (m_needPrior)
{
prior = builder.Mean(label);
input = builder.Log(prior, L"LogOfPrior");
ComputationNodePtr
scaledLogLikelihood = builder.Minus(output, input, L"ScaledLogLikelihood");
m_net->OutputNodes().push_back(scaledLogLikelihood);
}
else
m_net->OutputNodes().push_back(output);
}

return m_net;
}

template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription()
{
Expand Down
14 changes: 1 addition & 13 deletions Source/CNTK/SimpleNetworkBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ enum RNNTYPE
CLASSLSTM = 64,
NCELSTM = 128,
CLSTM = 256,
RCRF = 512,
UNIDIRECTIONALLSTM = 19,
BIDIRECTIONALLSTM = 20
RCRF = 512
};

enum class TrainingCriterion : int // TODO: camel-case these
Expand Down Expand Up @@ -188,12 +186,6 @@ class SimpleNetworkBuilder
m_rnnType = CLSTM;
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
m_rnnType = RCRF;
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = UNIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = BIDIRECTIONALLSTM;
else
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
}
Expand Down Expand Up @@ -277,10 +269,6 @@ class SimpleNetworkBuilder

ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();

ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();

ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();

ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription();

ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription();
Expand Down
8 changes: 2 additions & 6 deletions Source/Common/Include/BestGpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
#include "CommonMatrix.h"

// define IConfigRecord and ConfigParameters as incomplete types, in order to avoid having to include "ScriptableObjects.h" and "Config.h", as that confuses some .CU code
namespace Microsoft { namespace MSR { namespace ScriptableObjects {

struct IConfigRecord;
}
}
}
namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConfigRecord; }}}

namespace Microsoft { namespace MSR { namespace CNTK {

Expand All @@ -30,4 +25,5 @@ static inline DEVICEID_TYPE DeviceFromConfig(const ConfigRecordType& /*config*/)
} // tells runtime system to not try to use GPUs
// TODO: find a way to use CPUDEVICE without a huge include overhead; OK so far since CPUONLY mode is sorta special...
#endif

} } }

0 comments on commit 6d31cda

Please sign in to comment.