Skip to content

Commit

Permalink
Some minor node value matrix memory sharing fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
amitaga committed Jan 17, 2016
1 parent 1805830 commit 582b03d
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {

}


// this function will need to be called before actual validation and execution to
// predetermine how to share matrices to reduce memory usage.
// TODO: find a simple topological order and allocateEvalMatrices on that order directly
Expand Down
22 changes: 7 additions & 15 deletions Source/ComputationNetworkLib/ConvolutionalNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,21 +554,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Base(configp)
{ }

virtual bool OutputUsedInComputingInputNodesGradients() const override
{
// The AveragePoolingNode does not require its output value for computing
// the gradients of its input nodes
return false;
}

virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
{
// The AveragePoolingNode does not require any of it's input's values for computing
// the gradients of its input nodes
UNREFERENCED_PARAMETER(childIndex);
return false;
}

void Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
Expand Down Expand Up @@ -697,6 +682,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// No derivatives with respect to running mean and InvStdDev.
}

virtual bool OutputUsedInComputingInputNodesGradients() const override
{
// The BatchNormalizationNode does not require its output value for computing
// the gradients of its input nodes
return false;
}

void ForwardProp(const FrameRange & fr) override
{
Matrix<ElemType> sliceInputValue = Input(0)->ValueFor(fr);
Expand Down
12 changes: 11 additions & 1 deletion Source/ComputationNetworkLib/EsotericNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
Value().VerifySize(1, 1);
Expand Down Expand Up @@ -1065,6 +1067,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("SequenceDecoder is used for evaluation only.");
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

/// compute posterior probability of label y at position t
virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
Expand Down Expand Up @@ -1389,6 +1394,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override
{
size_t rows0 = Input(0)->GetNumRows(), cols1 = Input(1)->GetNumCols();
Expand Down Expand Up @@ -1573,13 +1580,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
{
if (fr.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
assert(m_value->GetNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
assert(GetNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
assert(m_pMBLayout);

Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override
{
Matrix<ElemType> mTmp = ValueFor(fr);
Expand Down
3 changes: 3 additions & 0 deletions Source/ComputationNetworkLib/EvaluationCriterionNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("%ls operation is used for evaluation only.", OperationName().c_str());
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
{
FrameRange fr(Input(0)->GetMBLayout());
Expand Down
3 changes: 3 additions & 0 deletions Source/ComputationNetworkLib/NonlinearityNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("Hardmax is not differentiable and is used for evaluation only.");
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

/*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
{
//TODO: temp solution, we need to write a math function specifically for this
Expand Down
6 changes: 3 additions & 3 deletions Source/ComputationNetworkLib/RecurrentNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fstream >> m_fromOffset >> m_boundaryMode >> m_shiftDimParam;
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override {return false; }

virtual void BeginForwardProp() override // called after last iteration step of ForwardProp()
{
Base::BeginForwardProp();
Expand Down Expand Up @@ -530,6 +527,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
{
assert(m_inputs.size() == 2);
Expand Down
3 changes: 3 additions & 0 deletions Source/ComputationNetworkLib/ReshapingNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: Once we do in-place, the above must include a copy-to-self check (pay special attention to adding vs. copying).
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

virtual void /*ComputationNode::*/ForwardProp(const FrameRange & fr) override
{
// enforce compatibility of 'dataInput' with 'layoutInput'
Expand Down
26 changes: 23 additions & 3 deletions Source/ComputationNetworkLib/TrainingCriterionNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::Multiply1x1AndWeightedAdd(inputIndex == 0 ? 1.0f : -1.0f, Gradient()/*1x1*/, *m_leftMinusRight, 1.0f, gradient);
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

virtual void UpdateFunctionMBSize() override
{
m_leftMinusRight->Resize(Input(0)->GetNumRows(), Input(0)->GetNumCols());
Expand Down Expand Up @@ -151,6 +154,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

virtual void UpdateFunctionMBSize() override
{
m_logSoftmaxOfRight->Resize(Input(1)->GetNumRows(), Input(1)->GetNumCols());
Expand Down Expand Up @@ -242,7 +247,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}

/*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues,
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

/*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues,
const Matrix<ElemType>& gradientValues)
{
Matrix<ElemType>::Multiply1x1AndWeightedAdd(-1.0f, gradientValues/*1x1*/, logOfRight, 1.0f, inputGradientValues);
Expand Down Expand Up @@ -351,7 +358,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
BackpropToS(*m_gradientOfL1Norm, Input(0)->GradientFor(fr), Gradient(), Input(0)->ValueFor(fr));
}

/*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientOfL1Norm,
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

/*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientOfL1Norm,
Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues)
{
gradientOfL1Norm.AssignSignOf(inputFunctionValues);
Expand Down Expand Up @@ -530,6 +539,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Input(inputIndex)->GradientFor(fr).AssignNCEDerivative(m_ncePrediction, Input(0)->ValueFor(fr), Input(1)->ValueFor(fr), Input(2)->Value(), inputIndex);
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

#if 0 // TODO: delete this. Seems copy-paste leftover?
/*TODO: merge with call site*/void BackpropToRight(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
{
Expand Down Expand Up @@ -727,6 +738,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
sz += nbr_wrd;
}
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

private:
void ComputeCEPartialToSoftmaxInputs(Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues, size_t y_t)
{
Expand Down Expand Up @@ -1024,13 +1038,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
DataWithMBLayoutFor(mBeta, sequenceRange, Input(0)->GetMBLayout()),
Input(2)->ValueFor(fr),
gradient,
mStartLbl, 1);
mStartLbl, 1);
}
}
else
return;
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

// compute forward backward algorithm
/*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType> & functionValues, const Matrix<ElemType> & lbls, const Matrix<ElemType> & pos_scores, const Matrix<ElemType> & pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
{
Expand Down Expand Up @@ -1288,6 +1304,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

// -sum(left_i * log(softmax_i(right)))
virtual void ForwardPropNonLooping()
{
Expand Down Expand Up @@ -1499,6 +1517,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>::Multiply1x1AndWeightedAdd(-1.0f, Gradient()/*1x1*/, *m_temp, 1.0f, gradient);
}

virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

virtual void UpdateFunctionMBSize() override
{
m_classZeroLabels->Resize(Input(0)->GetNumRows(), Input(0)->GetNumCols());
Expand Down

0 comments on commit 582b03d

Please sign in to comment.