Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into linux-gcc
Browse files Browse the repository at this point in the history
Conflicts:
	MachineLearning/CNTK/ComputationNetwork.h
	MachineLearning/CNTK/NetworkDescriptionLanguage.cpp
  • Loading branch information
yzhang87 committed Jul 15, 2015
2 parents abc106d + 05d2db2 commit a0ededc
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 15 deletions.
32 changes: 22 additions & 10 deletions MachineLearning/CNTK/ComputationNetwork.h
Original file line number Diff line number Diff line change
Expand Up @@ -1198,6 +1198,10 @@ class ComputationNetwork
{
newNode = new SumElementsNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == SumColumnElementsNode<ElemType>::TypeName())
{
newNode = new SumColumnElementsNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
}
else if (nodeType == ScaleNode<ElemType>::TypeName())
{
newNode = new ScaleNode<ElemType>(fstream, modelVersion, m_deviceId, nodeName);
Expand Down Expand Up @@ -1506,6 +1510,10 @@ class ComputationNetwork
{
newNode = new SumElementsNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == SumColumnElementsNode<ElemType>::TypeName())
{
newNode = new SumColumnElementsNode<ElemType>(m_deviceId, nodeName);
}
else if (nodeType == ScaleNode<ElemType>::TypeName())
{
newNode = new ScaleNode<ElemType>(m_deviceId, nodeName);
Expand Down Expand Up @@ -2962,7 +2970,7 @@ class ComputationNetwork
{
if (!allowFragment)
{
FormRecurentLoops(node);
FormRecurentLoops(node, true);
}
PrintComputationTree(node, false);
size_t actualMBSize = this->GetActualMBSize();
Expand Down Expand Up @@ -3380,21 +3388,21 @@ class ComputationNetwork
}

// get the strong connected component from the graph
void getStrongSCC(const ComputationNodePtr rootNode)
void getStrongSCC(const ComputationNodePtr rootNode, bool isCriterion)
{
std::unordered_set<ComputationNodePtr> visited;
std::list<ComputationNodePtr> sccStack;
size_t index = 0;
size_t loopId = 0;
if (rootNode->isVisisted() == false)
{
strongSCC(rootNode, sccStack, index, loopId);
strongSCC(rootNode, sccStack, index, loopId, isCriterion);
}
}

void strongSCC(ComputationNodePtr cur,
std::list<ComputationNodePtr>& sccStack,
size_t& index, size_t& loopId)
size_t& index, size_t& loopId, bool isCriterion)
{
cur->SetIndex(index);
cur->Setlowlink(index);
Expand All @@ -3408,7 +3416,7 @@ class ComputationNetwork
{
if (cur->Inputs(i)->isVisisted() == false)
{
strongSCC(cur->Inputs(i), sccStack, index, loopId);
strongSCC(cur->Inputs(i), sccStack, index, loopId, isCriterion);
cur->Setlowlink(min(cur->Getlowlink(), cur->Inputs(i)->Getlowlink()));
}
else if (cur->Inputs(i)->isInStack())
Expand Down Expand Up @@ -3436,7 +3444,7 @@ class ComputationNetwork
}
}
rInfo.Reset();
if (sccSize > 1)
if (sccSize > 1 && isCriterion)
{
loopId++;
m_recurrentInfo.push_back(rInfo);
Expand Down Expand Up @@ -3478,11 +3486,11 @@ class ComputationNetwork
}
}
//must be called before ValidateNetwork
void FormRecurentLoops(const ComputationNodePtr rootNode)
void FormRecurentLoops(const ComputationNodePtr rootNode, bool isCriterion = false)
{
std::vector<ComputationNodePtr> sourceLoopNodes;

getStrongSCC(rootNode);
getStrongSCC(rootNode, isCriterion);
std::list<ComputationNodePtr>& nodes = GetEvalOrder(rootNode, sourceLoopNodes);
std::list<ComputationNodePtr> nodesForGrad;

Expand Down Expand Up @@ -3590,10 +3598,14 @@ class ComputationNetwork
}
#endif
}

DetermineLoopTypes();

for (auto iter = nodes.begin(); iter != nodes.end(); iter++)
{
(*iter)->clearCache();
}
}

void DetermineLoopTypes()
{
for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++)
Expand Down
174 changes: 170 additions & 4 deletions MachineLearning/CNTK/LinearAlgebraNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,136 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template class SumElementsNode<float>;
template class SumElementsNode<double>;

template<class ElemType>
class SumColumnElementsNode : public ComputationNode<ElemType>
{
UsingComputationNodeMembers;
public:
SumColumnElementsNode(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
: ComputationNode<ElemType>(deviceId), m_sumValue(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
m_deviceId = deviceId;
MoveMatricesToDevice(deviceId);
InitRecurrentNode();
}

SumColumnElementsNode(File& fstream, const size_t modelVersion, const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX, const std::wstring name = L"")
: ComputationNode<ElemType>(deviceId), m_sumValue(deviceId)
{
m_nodeName = (name == L"" ? CreateUniqNodeName() : name);
LoadFromFile(fstream, modelVersion, deviceId);
}

// copy constructor
SumColumnElementsNode(const SumColumnElementsNode<ElemType>* node, const std::wstring& newName, const CopyNodeFlags flags)
: ComputationNode<ElemType>(node->m_deviceId), m_sumValue(node->m_deviceId)
{
node->CopyTo(this, newName, flags);
}

virtual ComputationNodePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) const
{
const std::wstring& name = (newName == L"") ? NodeName() : newName;

ComputationNodePtr node = new SumColumnElementsNode<ElemType>(this, name, flags);
return node;
}

virtual const std::wstring OperationName() const { return TypeName(); }
static const std::wstring TypeName() { return L"SumColumnElements"; }

virtual void ComputeInputPartial(const size_t inputIndex)
{
if (inputIndex != 0)
throw std::invalid_argument("SumColumnElements only has one input.");
ComputeInputPartialS(Inputs(0)->GradientValues(), GradientValues());
}

virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxInSeq)
{
if (inputIndex != 0)
throw std::invalid_argument("SumColumnElements only has one input.");

Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputGrad = GradientValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

ComputeInputPartialS(sliceInputGrad, sliceOutputGrad);
}

static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
inputGradientValues += gradientValues; //here the assumption is that gradientValues is a row vector
}

virtual void EvaluateThisNode()
{
EvaluateThisNodeS(m_functionValues, Inputs(0)->FunctionValues());
}

virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceInputValue = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

EvaluateThisNodeS(sliceOutputValue, sliceInputValue);
}

static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
{
Matrix<ElemType>::VectorSum(inputFunctionValues, functionValues, true);
#if NANCHECK
functionValues.HasNan("SumColumnElements");
#endif
}

virtual void Validate()
{
PrintSelfBeforeValidation();

if (m_children.size() != 1)
throw std::logic_error("SumColumnElements operation should have one input.");

if (Inputs(0)->FunctionValues().GetNumElements() == 0)
throw std::logic_error("SumColumnElements operation: the input node has 0 element.");

FunctionValues().Resize(1, Inputs(0)->FunctionValues().GetNumCols());
InferImageDimsFromInputs();
}

virtual void InferImageDimsFromInputs()
{
InferImageDimsFromInput(0, false);

m_outputWidth = 1;
m_outputHeight = 1;
m_outputChannels = 1;
}

virtual void AttachInputs(const ComputationNodePtr singleInput)
{
m_children.resize(1);
m_children[0] = singleInput;
}

virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
ComputationNode<ElemType>::CopyTo(nodeP, newName, flags);
SumColumnElementsNode<ElemType>* node = (SumColumnElementsNode<ElemType>*) nodeP;

if (flags & CopyNodeFlags::copyNodeValue)
{
node->m_sumValue = m_sumValue;
}
}

private:
Matrix<ElemType> m_sumValue;
};

template class SumColumnElementsNode<float>;
template class SumColumnElementsNode<double>;

//this node is used to extract part of the input by rows as the output
//it has to be continuous segments of rows since each column is treated as one sample
template<class ElemType>
Expand Down Expand Up @@ -1136,7 +1266,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// inputIndex == 1 (right) - inputGradientValues[1], inputFunctionValues[0]
static void WINAPI ComputeInputPartialS(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
size_t gradCol = gradientValues.GetNumCols();
size_t inputCol = inputFunctionValues.GetNumCols();

if (gradCol != inputCol && inputCol == 1)
{
inputGradientValues.SetValue(gradientValues);
inputGradientValues.ColumnElementMultiplyWith(inputFunctionValues);
}
else
{
inputGradientValues.AddElementProductOf(gradientValues, inputFunctionValues);
}
#if NANCHECK
inputGradientValues.HasNan("ElementTimes");
#endif
Expand All @@ -1159,7 +1300,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {

static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
{
functionValues.AssignElementProductOf(input0, input1);
size_t rows0 = input0.GetNumRows(), cols0 = input0.GetNumCols();
size_t rows1 = input1.GetNumRows(), cols1 = input1.GetNumCols();
if (rows0 == rows1 && cols0 == cols1)
{
functionValues.AssignElementProductOf(input0, input1);
}
else if ((cols0 == 1 || cols1 == 1) && rows1 == rows0) // col vec with matching rows
{
Matrix<ElemType> tmpMat;
if (cols0 == 1)
{
functionValues.SetValue(input1);
functionValues.ColumnElementMultiplyWith(input0);
}
else if (cols1 == 1)
{
functionValues.SetValue(input0);
functionValues.ColumnElementMultiplyWith(input1);
}
}
else
{
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");
}
#if NANCHECK
functionValues.HasNan("ElementTimes");
#endif
Expand Down Expand Up @@ -1191,8 +1355,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (Inputs(0)->FunctionValues().GetNumElements() == 0 || Inputs(1)->FunctionValues().GetNumElements() == 0)
throw std::logic_error("ElementTimes operation: one of the operants has 0 element.");

if (Inputs(1)->FunctionValues().GetNumRows() != Inputs(0)->FunctionValues().GetNumRows() ||
Inputs(1)->FunctionValues().GetNumCols() != Inputs(0)->FunctionValues().GetNumCols())
size_t rows0 = Inputs(0)->FunctionValues().GetNumRows(), cols0 = Inputs(0)->FunctionValues().GetNumCols();
size_t rows1 = Inputs(1)->FunctionValues().GetNumRows(), cols1 = Inputs(1)->FunctionValues().GetNumCols();

if (rows0 != rows1 || (cols0 != cols1 && cols0 != 1 && cols1 != 1))
throw std::logic_error("The Matrix<ElemType> dimension in the ElementTimes operation does not match.");

FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
Expand Down
2 changes: 2 additions & 0 deletions MachineLearning/CNTK/NetworkDescriptionLanguage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true;
else if (EqualInsensitive(nodeType, SumElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, SumColumnElementsNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, ScaleNode<ElemType>::TypeName()))
ret = true;
else if (EqualInsensitive(nodeType, TransposeNode<ElemType>::TypeName()))
Expand Down
2 changes: 1 addition & 1 deletion Math/Math/GPUMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2533,7 +2533,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

if (do_sync) CUDA_CALL(cudaEventCreate(&done));
_vectorSum<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(a.m_pArray, c.m_pArray, n, m, isColWise);
_vectorSum<ElemType> << <blocksPerGrid, threadsPerBlock, 0, t_stream >> >(c.m_pArray, a.m_pArray, n, m, isColWise);
if (do_sync) CUDA_CALL(cudaEventRecord(done));
if (do_sync) CUDA_CALL(cudaEventSynchronize(done));
if (do_sync) CUDA_CALL(cudaEventDestroy(done));
Expand Down

0 comments on commit a0ededc

Please sign in to comment.