diff --git a/Source/1BitSGD b/Source/1BitSGD index f785679a6bd5..f8013e1415c1 160000 --- a/Source/1BitSGD +++ b/Source/1BitSGD @@ -1 +1 @@ -Subproject commit f785679a6bd5cc089b138b3c6bcb68e4b1f345ae +Subproject commit f8013e1415c129f681eb60b01af6e1485b078caf diff --git a/Source/Common/Include/Basics.h b/Source/Common/Include/Basics.h index b5ed7953de45..5daa33c8ef13 100644 --- a/Source/Common/Include/Basics.h +++ b/Source/Common/Include/Basics.h @@ -57,10 +57,10 @@ __declspec_noreturn static inline void ThrowFormatted(const char* format, ...) va_start(args, format); vsprintf(buffer, format, args); - Microsoft::MSR::CNTK::DebugUtil::PrintCallStack(); #ifdef _DEBUG // print this to log before throwing, so we can see what the error is fprintf(stderr, "About to throw exception '%s'\n", buffer); #endif + Microsoft::MSR::CNTK::DebugUtil::PrintCallStack(); throw E(buffer); }; #pragma warning(pop) diff --git a/Source/Common/Include/TensorShape.h b/Source/Common/Include/TensorShape.h index ebbb47059fc8..d02a8433d9f9 100644 --- a/Source/Common/Include/TensorShape.h +++ b/Source/Common/Include/TensorShape.h @@ -440,14 +440,9 @@ struct TensorShape } // accessors - size_t GetDim(size_t k) const - { - return m_dims[k]; - } - size_t GetRank() const - { - return m_dims.size(); - } + size_t GetDim(size_t k) const { return m_dims[k]; } + size_t GetDimPadded(size_t k) const { return k < GetRank() ? GetDim(k) : 1; } // like GetDim() but return 1 for extra (out of bounds) dimensions + size_t GetRank() const { return m_dims.size(); } size_t GetNumElements() const { if (m_dims.empty()) @@ -467,24 +462,20 @@ struct TensorShape } // vector-like accessors - size_t operator[](size_t k) const - { - return GetDim(k); - } - size_t size() const - { - return GetRank(); - } + size_t operator[](size_t k) const { return GetDim(k); } + size_t size() const { return GetRank(); } - const SmallVector& GetDims() const - { - return m_dims; - } // get all, e.g. for logging or for constructing derived tensors with edited dimensions - const SmallVector& GetStrides() const + const SmallVector& GetDims() const { return m_dims; } // get all, e.g. for logging or for constructing derived tensors with edited dimensions + const SmallVector& GetStrides() const { return m_strides; } + + // test whether the tensor represents a column vector (but allowing added broadcasting dimensions) + bool IsColumnVector() const { - return m_strides; + for (size_t k = 1; k < size(); k++) + if (m_dims[k] != 1) + return false; + return true; } - // legacy helper function for RowSliceNode. Will go away. bool IsVectorStoredAsImage() const { @@ -580,22 +571,29 @@ struct TensorShape m_strides[k] = 0; return *this; } - TensorShape& PadInPlace(size_t numDims) // append singleton dimensions + TensorShape& PadRankInPlace(size_t desiredRank) // append singleton dimensions { VerifyIsDense(); - if (numDims < GetRank()) + if (desiredRank < GetRank()) LogicError("Pad() cannot drop a shorten the dimensions."); else - while (GetRank() < numDims) + while (GetRank() < desiredRank) { m_strides.push_back(GetRank() > 0 ? m_strides.back() * (ptrdiff_t) m_dims.back() : 1); m_dims.push_back(1); } return *this; } + TensorShape PadRank(size_t desiredRank) const // append singleton dimensions + { + // TODO: simplify to: return TensorShape(*this).PadRankInPlace(desiredRank); + TensorShape result(*this); + result.PadRankInPlace(desiredRank); + return result; + } TensorShape& AppendInPlace(size_t rank, size_t newDim) // concatenate one new dimension at position 'rank' { - PadInPlace(rank); + PadRankInPlace(rank); // TODO: How to do this right in case of arbitrary strides? Compute the new stride based on m_allocation or something? Is it even possible? Or do we need to guard? m_strides.push_back(GetRank() > 0 ? m_strides.back() * (ptrdiff_t) m_dims.back() : 1); m_dims.push_back(newDim); @@ -608,19 +606,25 @@ struct TensorShape result.AppendInPlace(rank, newDim); return result; } + // narrow a dimension k to given bounds [begin, end), done in-place + TensorShape& NarrowTo(size_t k, size_t begin, size_t end) + { + if (k >= size()) + LogicError("NarrowTo: Index out of bounds."); + if (end <= begin || end > m_dims[k]) + LogicError("NarrowTo: Invalid bounds parameter, dimensions must be at least one."); + m_offset += m_strides[k] * begin; + m_dims[k] = end - begin; + return *this; + } + // narrow all dimensions to two given bounds vectors, done in-place template TensorShape& NarrowTo(const std::pair& bounds /*begin[], end[]*/) { if (size() != bounds.first.size() || size() != bounds.second.size()) - LogicError("NarrowedTo: Bounds parameter must have same rank as tensor."); + LogicError("NarrowTo: Bounds parameter must have same rank as tensor."); for (size_t k = 0; k < size(); k++) - if (bounds.second[k] <= bounds.first[k] || (size_t) bounds.second[k] > m_dims[k]) - LogicError("NarrowedTo: Invalid bounds parameter, dimensions must be at least one."); - for (size_t k = 0; k < size(); k++) - { - m_offset += m_strides[k] * bounds.first[k]; - m_dims[k] = bounds.second[k] - bounds.first[k]; - } + NarrowTo(k, (size_t)bounds.first[k], (size_t)bounds.second[k]); return *this; } diff --git a/Source/ComputationNetworkLib/ComputationNode.cpp b/Source/ComputationNetworkLib/ComputationNode.cpp index 5642121f450d..95b7bf26811c 100644 --- a/Source/ComputationNetworkLib/ComputationNode.cpp +++ b/Source/ComputationNetworkLib/ComputationNode.cpp @@ -55,10 +55,11 @@ void ComputationNodeBase::ValidateUnaryMap(bool isFinalValidationPass) InferMBLayoutFromInputsForStandardCase(); SetDims(Input(0)); } + // binary zip operation, e.g. Plus -// If allowScaling then one can be a sub-dimension of the other (if layout then only for rows, otherwise for cols, too). +// If allowBroadcast then one can be a sub-dimension of the other (if layout then only for rows, otherwise for cols, too). // This also helpfully resizes the children if not yet sized. -void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool allowMultiples) +void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast) { assert(m_inputs.size() == 2); ComputationNodeBase::Validate(isFinalValidationPass); @@ -84,6 +85,7 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all for (size_t k = 0; k < shape1.GetRank(); k++) { size_t dim1 = shape1[k]; + // BUGBUG: We must consider the allowBroadcast flag here. if (dims[k] == 1) // is [0] broadcasting? dims[k] = dim1; // then use dimension we broadcast to else if (dim1 == 1) // if [1] is broadcasting diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index 68b7143fc197..5ee5117a04a6 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -620,7 +620,7 @@ protected: public: // ...the following should be protected, but nodes inquire ab void ValidateUnaryMap(bool isFinalValidationPass); void ValidateUnaryReduce(bool isFinalValidationPass); void ValidateInferBinaryInputDims(); - void ValidateBinaryZip(bool isFinalValidationPass, bool allowMultiples); + void ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast); void ValidateBinaryReduce(bool isFinalValidationPass); void InferMBLayoutFromInputsForStandardCase(); virtual void ValidateInferInputDimsFrom(const TensorShape&) = 0; // (implemented by ComputationNode @@ -1876,7 +1876,7 @@ class BinaryElementWiseNode : public ComputationNode, public NumInputs virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override { - ValidateBinaryZip(isFinalValidationPass, true /*allowMultiples*/); + ValidateBinaryZip(isFinalValidationPass, true /*allowBroadcast*/); } }; diff --git a/Source/ComputationNetworkLib/ReshapingNodes.h b/Source/ComputationNetworkLib/ReshapingNodes.h index d5410ca9af3d..10315a9ef027 100644 --- a/Source/ComputationNetworkLib/ReshapingNodes.h +++ b/Source/ComputationNetworkLib/ReshapingNodes.h @@ -339,7 +339,8 @@ class RowSliceNode : public ComputationNode, public NumInputs<1> // RowSlice cannot slice tensors. // TODO: Create a TensorSlice operation, or just Slice. - if (isFinalValidationPass && Input(0)->HasSampleLayout() && !Input(0)->GetSampleLayout().IsVectorStoredAsImage() // legacy + if (isFinalValidationPass && !Input(0)->GetSampleLayout().IsColumnVector() + && !Input(0)->GetSampleLayout().IsVectorStoredAsImage() // legacy ) RuntimeError("%ls %ls operation: Input must be a vector, tensor shape [%s] not allowed.", NodeName().c_str(), OperationName().c_str(), string(Input(0)->GetSampleLayout()).c_str()); SetDims(TensorShape(m_sliceHeight), HasMBLayout()); @@ -355,6 +356,8 @@ template class RowSliceNode; // ----------------------------------------------------------------------- // RowStackNode (input0, input1, ...) // stacks multiple inputs on top of each other +// The inputs will be spliced w.r.t. their first tensor dimension (the "row" dimension). +// TODO: This is very close to the planned SpliceNode (just make m_spliceDim configurable) except for splicing along time. // ----------------------------------------------------------------------- template @@ -367,6 +370,8 @@ class RowStackNode : public ComputationNode // note: not deriving from return L"RowStack"; } + static const size_t m_spliceDim = 0; // tensor dimension according to which to stack --TODO: Make this a parameter. + public: DeclareConstructorFromConfig(RowStackNode); RowStackNode(DEVICEID_TYPE deviceId, const wstring& name) @@ -380,36 +385,50 @@ class RowStackNode : public ComputationNode // note: not deriving from if (flags & CopyNodeFlags::copyNodeChildren) { auto node = dynamic_pointer_cast>(nodeP); - node->m_startRowIndices = m_startRowIndices; + node->m_firstIndices = m_firstIndices; } } - virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override - { - Input(inputIndex)->GradientFor(fr).AddWithRowSliceValuesOf(GradientFor(fr), m_startRowIndices[inputIndex], Input(inputIndex)->GetSampleMatrixNumRows()); - } +private: - virtual bool OutputUsedInComputingInputNodesGradients() const override + // changes the result slice (which includes all stacked inputs) to the stripe that matches where one of the inputs goes + TensorShape NarrowToStripe(const TensorShape & resultSlice, size_t inputIndex) { - // The RowStackNode does not require its output value for computing - // the gradients of its input nodes - return false; + auto resultSubSlice = resultSlice; + resultSubSlice.NarrowTo(m_spliceDim, m_firstIndices[inputIndex], m_firstIndices[inputIndex + 1]); + return resultSubSlice; } - virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override - { - // The RowStackNode does not require any of it's input's values for computing - // the gradients of its input nodes - UNREFERENCED_PARAMETER(childIndex); - return false; - } +public: virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override { + size_t rank = DetermineElementwiseTensorRank(); + let outputSlice = GetTensorSliceFor(rank, fr); // tensor slice that represents the entire output for FrameRange + for (size_t inputIndex = 0; inputIndex < GetNumInputs(); inputIndex++) - ValueFor(fr).AssignToRowSliceValuesOf(Input(inputIndex)->ValueFor(fr), m_startRowIndices[inputIndex], Input(inputIndex)->GetSampleMatrixNumRows()); + { + let input = Input(inputIndex)->ValueTensorFor(rank, fr.AllowBroadcast()); + let outputSubSlice = NarrowToStripe(outputSlice, inputIndex); + auto output = TensorView(Value(), outputSubSlice); + output.AssignCopyOf(input); + } } + virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override + { + size_t rank = DetermineElementwiseTensorRank(); + let outputSlice = GetTensorSliceFor(rank, fr); // tensor slice that represents the entire output for FrameRange + + auto inputGrad = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast()); + let outputSubSlice = NarrowToStripe(outputSlice, inputIndex); + let outputGrad = TensorView(Gradient(), outputSubSlice); + inputGrad.AddCopyOf(outputGrad); + } + + virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; } + virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; } + virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override { Base::Validate(isFinalValidationPass); @@ -417,45 +436,49 @@ class RowStackNode : public ComputationNode // note: not deriving from // we must fuse all tensor shapes // All dimensions but the last must be the same. (In a future version, we should be able to stack along any given dimension.) - // Note that trailing ones may be stripped/broadcasting, so we must first pad. - SmallVector dims = Input(0)->GetSampleLayout().GetDims(); - size_t maxRank = 0; // TODO: very similar to DetermineElementwiseTensorRank() except that that one also includes the output + + // determine maximum rank (we can stack tensors with lower rank, which will have their dimensions paded to max automatically) + size_t maxRank = m_spliceDim + 1; // spliceDim may exceed all of them, which will create a new dimension, e.g. stacking column vectors into a matrix for (int i = 0; i < GetNumInputs(); i++) - if (maxRank < GetInputSampleLayout(i).GetRank()) - maxRank = GetInputSampleLayout(i).GetRank(); - dims.resize(maxRank - 1, 1); // pad and/or strip trailing dimension - - // count totalRows and form m_startRowIndices[] array, which is the cumulative sum of matrix heights - m_startRowIndices.resize(GetNumInputs()); - size_t totalRows = 0; - size_t totalTrailingDim = 0; // last tensor dimension is what gets stacked up + if (maxRank < Input(i)->GetSampleLayout().GetRank()) + maxRank = Input(i)->GetSampleLayout().GetRank(); + + // the following loop does multiple things: + // - count total dimension along m_spliceDim, and form associated m_firstIndices[] array + // - verify all other dimension's compatibility (we allow broadcasting) + auto dims = Input(0)->GetSampleLayout().PadRank(maxRank).GetDims(); // dimensions padded to max rank; start with dims of first input + dims[m_spliceDim] = 0; // this dimension is created, while all others are verified for consistency + m_firstIndices.assign(1, 0); // accumulative splice dimension; start with 0 for (int i = 0; i < GetNumInputs(); i++) { - m_startRowIndices[i] = totalRows; - totalRows += Input(i)->GetSampleMatrixNumRows(); - SmallVector thisDims = Input(i)->GetSampleLayout().GetDims(); - thisDims.resize(maxRank, 1); // pad and/or strip trailing dimension - totalTrailingDim += thisDims.back(); // count total trailing dimensions (that's what we have after stacking) - thisDims.resize(maxRank - 1); // verify that dimensions match - if (dims != thisDims) - InvalidArgument("%ls %ls operation: Incompatible tensor dimension [%s] for input %ls %ls", - NodeName().c_str(), OperationName().c_str(), std::string(Input(i)->GetSampleLayout()).c_str(), - Input(i)->NodeName().c_str(), Input(i)->OperationName().c_str()); + // check/fuse dims and accumulate the spliced dimension + let & shape = Input(i)->GetSampleLayout(); + for (size_t k = 0; k < maxRank; k++) + { + size_t dim = shape.GetDimPadded(k); + if (k == m_spliceDim) + { + // accumulate the spliced dimension + dims[m_spliceDim] += dim; + m_firstIndices.push_back(dims[m_spliceDim]); // and remember it + } + else + { + // check/fuse dimensions + if (isFinalValidationPass && dim != dims[k] && dim != 1 && dims[k] != 1) + InvalidArgument("%ls %ls operation: Conflicting dimension %d between %ls %ls operation (%d) and other(s) (%d)", + NodeName().c_str(), OperationName().c_str(), (int)k, Input(i)->NodeName().c_str(), Input(i)->OperationName(), (int)dim, (int)dims[k]); + if (dims[k] == 1) // broadcast + dims[k] = dim; + } + } } - // warn that this node will destroy the image size information from the child - if (Input(0)->HasSampleLayout()) - fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n"); - - dims.push_back(totalTrailingDim); SetDims(TensorShape(dims), HasMBLayout()); - - if (totalRows != GetSampleMatrixNumRows()) - LogicError("%ls RowStack operation: Tensor shapes of inputs were not compatible after all?", NodeName().c_str()); } private: - std::vector m_startRowIndices; // start row number in the stacked matrix of each input (child) (cumsum of matrix heights) + std::vector m_firstIndices; // start row number in the stacked matrix of each input (child) (cumsum of matrix heights); plus one final entry that equals the total dimension }; template class RowStackNode; diff --git a/Source/Math/TensorView.cpp b/Source/Math/TensorView.cpp index e2c46f6dd261..359d786a18c5 100644 --- a/Source/Math/TensorView.cpp +++ b/Source/Math/TensorView.cpp @@ -78,7 +78,7 @@ static void PrepareTensorOperands(array shapes, array dims = shapes[i].GetRank(); for (size_t i = 0; i < N; i++) if (shapes[i].GetRank() < dims) - shapes[i].PadInPlace(dims); + shapes[i].PadRankInPlace(dims); // all shapes[] now have the same rank // determine operation shape (max over all dimensions)