diff --git a/Source/CNTK/NetworkDescriptionLanguage.cpp b/Source/CNTK/NetworkDescriptionLanguage.cpp index 37a2acc40d38..06184e60a7ca 100644 --- a/Source/CNTK/NetworkDescriptionLanguage.cpp +++ b/Source/CNTK/NetworkDescriptionLanguage.cpp @@ -155,7 +155,9 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable) bool ret = false; if (EqualInsensitive(nodeType, OperationNameOf(AveragePoolingNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode))) ret = true; +#ifdef COMING_SOON else if (EqualInsensitive(nodeType, OperationNameOf(CRFNode), L"CRF")) ret = true; +#endif else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(ConvolutionNode), L"Convolve")) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(CosDistanceNode), L"CosDist")) ret = true; diff --git a/Source/CNTK/SimpleNetworkBuilder.cpp b/Source/CNTK/SimpleNetworkBuilder.cpp index 5f5c5380cf19..1908a2ce5e73 100644 --- a/Source/CNTK/SimpleNetworkBuilder.cpp +++ b/Source/CNTK/SimpleNetworkBuilder.cpp @@ -1688,10 +1688,12 @@ shared_ptr> SimpleNetworkBuilder::AddTrainAn tinput = builder.Times(matrix, input); output = builder.Logistic(label, tinput, (trainNodeName == L"") ? L"Logistic" : trainNodeName); break; +#ifdef COMING_SOON case TrainingCriterion::CRF: assert(trans != nullptr); output = builder.CRF(label, input, trans, (trainNodeName == L"") ? L"CRF" : trainNodeName); break; +#endif case TrainingCriterion::ClassCrossEntropyWithSoftmax: output = builder.ClassCrossEntropyWithSoftmax(label, input, matrix, clspostprob, (trainNodeName == L"") ? L"ClassCrossEntropyWithSoftmax" : trainNodeName); break; @@ -1743,12 +1745,14 @@ shared_ptr> SimpleNetworkBuilder::AddTrainAn tinput = builder.Times(matrix, input); output = builder.ErrorPrediction(label, tinput, (evalNodeName == L"") ? L"EvalErrorPrediction" : evalNodeName); break; +#ifdef COMING_SOON case EvalCriterion::CRF: assert(trans != nullptr); if (matrix != nullptr && tinput == input) tinput = builder.Times(matrix, input); output = builder.CRF(label, tinput, trans, (evalNodeName == L"") ? L"EvalCRF" : evalNodeName); break; +#endif default: LogicError("Unsupported training criterion."); } @@ -1769,14 +1773,10 @@ template class SimpleNetworkBuilder; TrainingCriterion ParseTrainingCriterionString(wstring s) { - if (!_wcsicmp(s.c_str(), L"crossEntropyWithSoftmax")) - return TrainingCriterion::CrossEntropyWithSoftmax; - if (!_wcsicmp(s.c_str(), L"sequenceWithSoftmax")) - return TrainingCriterion::SequenceWithSoftmax; - else if (!_wcsicmp(s.c_str(), L"squareError")) - return TrainingCriterion::SquareError; - else if (!_wcsicmp(s.c_str(), L"logistic")) - return TrainingCriterion::Logistic; + if (!_wcsicmp(s.c_str(), L"crossEntropyWithSoftmax")) return TrainingCriterion::CrossEntropyWithSoftmax; + else if (!_wcsicmp(s.c_str(), L"sequenceWithSoftmax")) return TrainingCriterion::SequenceWithSoftmax; + else if (!_wcsicmp(s.c_str(), L"squareError")) return TrainingCriterion::SquareError; + else if (!_wcsicmp(s.c_str(), L"logistic")) return TrainingCriterion::Logistic; else if (!_wcsicmp(s.c_str(), L"noiseContrastiveEstimation") || !_wcsicmp(s.c_str(), L"noiseContrastiveEstimationNode" /*spelling error, deprecated*/)) return TrainingCriterion::NCECrossEntropyWithSoftmax; else if (!!_wcsicmp(s.c_str(), L"classCrossEntropyWithSoftmax")) // (twisted logic to keep compiler happy w.r.t. not returning from LogicError) @@ -1786,20 +1786,16 @@ TrainingCriterion ParseTrainingCriterionString(wstring s) EvalCriterion ParseEvalCriterionString(wstring s) { - if (!_wcsicmp(s.c_str(), L"errorPrediction")) - return EvalCriterion::ErrorPrediction; - else if (!_wcsicmp(s.c_str(), L"crossEntropyWithSoftmax")) - return EvalCriterion::CrossEntropyWithSoftmax; - else if (!_wcsicmp(s.c_str(), L"sequenceWithSoftmax")) - return EvalCriterion::SequenceWithSoftmax; - else if (!_wcsicmp(s.c_str(), L"classCrossEntropyWithSoftmax")) - return EvalCriterion::ClassCrossEntropyWithSoftmax; + if (!_wcsicmp(s.c_str(), L"errorPrediction")) return EvalCriterion::ErrorPrediction; + else if (!_wcsicmp(s.c_str(), L"crossEntropyWithSoftmax")) return EvalCriterion::CrossEntropyWithSoftmax; + else if (!_wcsicmp(s.c_str(), L"sequenceWithSoftmax")) return EvalCriterion::SequenceWithSoftmax; + else if (!_wcsicmp(s.c_str(), L"classCrossEntropyWithSoftmax")) return EvalCriterion::ClassCrossEntropyWithSoftmax; + else if (!_wcsicmp(s.c_str(), L"logistic")) return EvalCriterion::Logistic; else if (!_wcsicmp(s.c_str(), L"noiseContrastiveEstimation") || !_wcsicmp(s.c_str(), L"noiseContrastiveEstimationNode" /*spelling error, deprecated*/)) return EvalCriterion::NCECrossEntropyWithSoftmax; - else if (!_wcsicmp(s.c_str(), L"logistic")) - return EvalCriterion::Logistic; else if (!!_wcsicmp(s.c_str(), L"squareError")) LogicError("evalCriterion: Invalid trainingCriterion value. Valid values are (errorPrediction | crossEntropyWithSoftmax | squareError | logistic | sequenceWithSoftmax)"); return EvalCriterion::SquareError; } + } } } diff --git a/Source/Common/Include/Sequences.h b/Source/Common/Include/Sequences.h index 30937a49e908..58624891780c 100644 --- a/Source/Common/Include/Sequences.h +++ b/Source/Common/Include/Sequences.h @@ -464,10 +464,6 @@ typedef MBLayout::MBLayoutPtr MBLayoutPtr; // TODO: This will in the future be able to hold sub-ranges for nested loops as well. // ----------------------------------------------------------------------- -// TODO: We should also have a FrameRange that selects all frames of a single sequence. Currently now possible since that would require Matrix::RowSlice() -// - likewise, LSTMNode does its own iteration, hence needs access to GetNumParallelSequences() or NumCols() in the whole-batch iterator -// BUGBUG: These nodes are currently broken and will need to be fixed: -// - CRFNode does not support > 1 parallel sequence class FrameRange { public: // TODO: make private (currently used from masking and DataFor) ; TODO: rename all members with m_ prefix diff --git a/Source/ComputationNetworkLib/ComputationNetwork.cpp b/Source/ComputationNetworkLib/ComputationNetwork.cpp index 116566baee3c..c18d46bbe197 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.cpp +++ b/Source/ComputationNetworkLib/ComputationNetwork.cpp @@ -418,7 +418,9 @@ bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr) nodePtr->OperationName() == OperationNameOf(CrossEntropyNode) || nodePtr->OperationName() == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode) || nodePtr->OperationName() == OperationNameOf(ErrorPredictionNode) || +#ifdef COMING_SOON nodePtr->OperationName() == OperationNameOf(CRFNode) || +#endif nodePtr->OperationName() == OperationNameOf(DummyCriterionNode)) return true; diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index 28180742ae6f..c5040ada0746 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -981,10 +981,5 @@ typedef ComputationNetwork::ComputationNetworkPtr ComputationNetworkPtr; // - code prettification: // - sort all node implementations' methods into the same order; esp, ForwardProp() comes before partial // - sort important nodes first; move unused/experimental nodes into source files named accordingly -// - finish the job: -// - everywhere complete folding ForwardPropS() into ForwardProp(FrameRange()), same for partial -// - revise node constructors, merge by means of default parameters -// - known issues that need actual test cases to be fixed: -// - CRFNode::BackpropTo() fails for >1 parallel sequence due to DataFor() not being able to return whole sequences -// - implement reading of MB Layout in Binary, DSSM, and LivbSVM readers --is DSSM already done? + } } } diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp index a531e16e03b6..ea89f48fb572 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp @@ -33,8 +33,11 @@ template static shared_ptr> CreateStandardNode(const std::wstring& nodeType, _Types&&... _Args) { // please keep this table sorted - if (nodeType == OperationNameOf(CRFNode)) return New>(forward<_Types>(_Args)...); - else if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))return New>(forward<_Types>(_Args)...); +#ifdef COMING_SOON + if (nodeType == OperationNameOf(CRFNode)) return New>(forward<_Types>(_Args)...); + else +#endif + if (nodeType == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode))return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(CosDistanceNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(CosDistanceWithNegativeSamplesNode)) return New>(forward<_Types>(_Args)...); else if (nodeType == OperationNameOf(CosineNode)) return New>(forward<_Types>(_Args)...); @@ -346,6 +349,7 @@ shared_ptr> ComputationNetworkBuilder::Class return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), label, prediction, input_weight, cls_log_post_prob); } +#ifdef COMING_SOON template shared_ptr> ComputationNetworkBuilder::CRF(const ComputationNodePtr label, const ComputationNodePtr postDepScore, @@ -354,6 +358,7 @@ shared_ptr> ComputationNetworkBuilder::CRF(c { return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName), label, postDepScore, transition_score); } +#endif template shared_ptr> ComputationNetworkBuilder::DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName) @@ -605,4 +610,5 @@ shared_ptr> ComputationNetworkBuilder::Batch template class ComputationNetworkBuilder; template class ComputationNetworkBuilder; + } } } diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h index 5929151c54e1..ae67ca7cd3cc 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h @@ -73,7 +73,9 @@ class ComputationNetworkBuilder ComputationNodePtr AveragePooling(const ComputationNodePtr inputValues, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const std::wstring nodeName = L""); +#ifdef COMING_SOON ComputationNodePtr CRF(const ComputationNodePtr label, const ComputationNodePtr postDepScore, const ComputationNodePtr transition_score, const std::wstring nodeName = L""); +#endif ComputationNodePtr ClassCrossEntropyWithSoftmax(const ComputationNodePtr label, const ComputationNodePtr prediction, const ComputationNodePtr input_weight, const ComputationNodePtr cls_log_post_prob, const std::wstring nodeName = L""); ComputationNodePtr Cos(const ComputationNodePtr a, const std::wstring nodeName = L""); ComputationNodePtr CosDistance(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L""); diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index e7ef547b1066..f42ef2c8380d 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -1599,7 +1599,7 @@ inline shared_ptr New(_Types&&... _Args) // ======================================================================= // ComputationNodeNonLooping -- abstract base class for computation nodes that do not implement eval/partial for individual frames -// Such as CRFNode, LSTMNode, ParallelNode, SequenceDecoderNode, TimeReverseNode (BatchModeNode), and TransposeNode. +// Such as CRFNode, SequenceDecoderNode, and training criteria. // ======================================================================= // This will provide default implementations for those two functions that will fail at runtime with a meaningful error. diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h index aefe04355c69..65743a25ee87 100644 --- a/Source/ComputationNetworkLib/TrainingNodes.h +++ b/Source/ComputationNetworkLib/TrainingNodes.h @@ -1016,6 +1016,8 @@ class ClassBasedCrossEntropyWithSoftmaxNode : public ComputationNodeNonLooping / template class ClassBasedCrossEntropyWithSoftmaxNode; template class ClassBasedCrossEntropyWithSoftmaxNode; +#ifdef COMING_SOON + // ----------------------------------------------------------------------- // CRFNode (labels, position_dependent_scores, transition_scores) // - labels: output label vector of [0:T-1] @@ -1315,6 +1317,8 @@ class CRFNode : public ComputationNodeNonLooping /*ComputationNode*/, int mEndLbl; }; +#endif + // ----------------------------------------------------------------------- // LogisticNode (labels, prediction, weight) // calculates: -sum(left * log(right) + (1-left)*log(1-right)) (optionally * weight)