rewrote RowStackNode using the tensor lib

Johnson-yue · Jan 24, 2016 · 2f64031 · 2f64031
1 parent 161e6fb
commit 2f64031
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 90 deletions.
diff --git a/Source/1BitSGD b/Source/1BitSGD
diff --git a/Source/Common/Include/Basics.h b/Source/Common/Include/Basics.h
@@ -57,10 +57,10 @@ __declspec_noreturn static inline void ThrowFormatted(const char* format, ...)
 
     va_start(args, format);
     vsprintf(buffer, format, args);
-    Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
 #ifdef _DEBUG // print this to log before throwing, so we can see what the error is
     fprintf(stderr, "About to throw exception '%s'\n", buffer);
 #endif
+    Microsoft::MSR::CNTK::DebugUtil::PrintCallStack();
     throw E(buffer);
 };
 #pragma warning(pop)

diff --git a/Source/Common/Include/TensorShape.h b/Source/Common/Include/TensorShape.h
@@ -440,14 +440,9 @@ struct TensorShape
     }
 
     // accessors
-    size_t GetDim(size_t k) const
-    {
-        return m_dims[k];
-    }
-    size_t GetRank() const
-    {
-        return m_dims.size();
-    }
+    size_t GetDim(size_t k) const { return m_dims[k]; }
+    size_t GetDimPadded(size_t k) const { return k < GetRank() ? GetDim(k) : 1; }   // like GetDim() but return 1 for extra (out of bounds) dimensions
+    size_t GetRank() const { return m_dims.size(); }
     size_t GetNumElements() const
     {
         if (m_dims.empty())
@@ -467,24 +462,20 @@ struct TensorShape
     }
 
     // vector-like accessors
-    size_t operator[](size_t k) const
-    {
-        return GetDim(k);
-    }
-    size_t size() const
-    {
-        return GetRank();
-    }
+    size_t operator[](size_t k) const { return GetDim(k); }
+    size_t size() const { return GetRank(); }
 
-    const SmallVector<size_t>& GetDims() const
-    {
-        return m_dims;
-    } // get all, e.g. for logging or for constructing derived tensors with edited dimensions
-    const SmallVector<ptrdiff_t>& GetStrides() const
+    const SmallVector<size_t>& GetDims() const { return m_dims; } // get all, e.g. for logging or for constructing derived tensors with edited dimensions
+    const SmallVector<ptrdiff_t>& GetStrides() const { return m_strides; }
+
+    // test whether the tensor represents a column vector (but allowing added broadcasting dimensions)
+    bool IsColumnVector() const
     {
-        return m_strides;
+        for (size_t k = 1; k < size(); k++)
+            if (m_dims[k] != 1)
+                return false;
+        return true;
     }
-
     // legacy helper function for RowSliceNode. Will go away.
     bool IsVectorStoredAsImage() const
     {
@@ -580,22 +571,29 @@ struct TensorShape
                 m_strides[k] = 0;
         return *this;
     }
-    TensorShape& PadInPlace(size_t numDims) // append singleton dimensions
+    TensorShape& PadRankInPlace(size_t desiredRank) // append singleton dimensions
     {
         VerifyIsDense();
-        if (numDims < GetRank())
+        if (desiredRank < GetRank())
             LogicError("Pad() cannot drop a shorten the dimensions.");
         else
-            while (GetRank() < numDims)
+            while (GetRank() < desiredRank)
             {
                 m_strides.push_back(GetRank() > 0 ? m_strides.back() * (ptrdiff_t) m_dims.back() : 1);
                 m_dims.push_back(1);
             }
         return *this;
     }
+    TensorShape PadRank(size_t desiredRank) const // append singleton dimensions
+    {
+        // TODO: simplify to: return TensorShape(*this).PadRankInPlace(desiredRank);
+        TensorShape result(*this);
+        result.PadRankInPlace(desiredRank);
+        return result;
+    }
     TensorShape& AppendInPlace(size_t rank, size_t newDim) // concatenate one new dimension at position 'rank'
     {
-        PadInPlace(rank);
+        PadRankInPlace(rank);
         // TODO: How to do this right in case of arbitrary strides? Compute the new stride based on m_allocation or something? Is it even possible? Or do we need to guard?
         m_strides.push_back(GetRank() > 0 ? m_strides.back() * (ptrdiff_t) m_dims.back() : 1);
         m_dims.push_back(newDim);
@@ -608,19 +606,25 @@ struct TensorShape
         result.AppendInPlace(rank, newDim);
         return result;
     }
+    // narrow a dimension k to given bounds [begin, end), done in-place
+    TensorShape& NarrowTo(size_t k, size_t begin, size_t end)
+    {
+        if (k >= size())
+            LogicError("NarrowTo: Index out of bounds.");
+        if (end <= begin || end > m_dims[k])
+            LogicError("NarrowTo: Invalid bounds parameter, dimensions must be at least one.");
+        m_offset += m_strides[k] * begin;
+        m_dims[k] = end - begin;
+        return *this;
+    }
+    // narrow all dimensions to two given bounds vectors, done in-place
     template <class DimensionVector>
     TensorShape& NarrowTo(const std::pair<DimensionVector, DimensionVector>& bounds /*begin[], end[]*/)
     {
         if (size() != bounds.first.size() || size() != bounds.second.size())
-            LogicError("NarrowedTo: Bounds parameter must have same rank as tensor.");
+            LogicError("NarrowTo: Bounds parameter must have same rank as tensor.");
         for (size_t k = 0; k < size(); k++)
-            if (bounds.second[k] <= bounds.first[k] || (size_t) bounds.second[k] > m_dims[k])
-                LogicError("NarrowedTo: Invalid bounds parameter, dimensions must be at least one.");
-        for (size_t k = 0; k < size(); k++)
-        {
-            m_offset += m_strides[k] * bounds.first[k];
-            m_dims[k] = bounds.second[k] - bounds.first[k];
-        }
+            NarrowTo(k, (size_t)bounds.first[k], (size_t)bounds.second[k]);
         return *this;
     }
 

diff --git a/Source/ComputationNetworkLib/ComputationNode.cpp b/Source/ComputationNetworkLib/ComputationNode.cpp
@@ -55,10 +55,11 @@ void ComputationNodeBase::ValidateUnaryMap(bool isFinalValidationPass)
     InferMBLayoutFromInputsForStandardCase();
     SetDims(Input(0));
 }
+
 // binary zip operation, e.g. Plus
-// If allowScaling then one can be a sub-dimension of the other (if layout then only for rows, otherwise for cols, too).
+// If allowBroadcast then one can be a sub-dimension of the other (if layout then only for rows, otherwise for cols, too).
 // This also helpfully resizes the children if not yet sized.
-void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool allowMultiples)
+void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast)
 {
     assert(m_inputs.size() == 2);
     ComputationNodeBase::Validate(isFinalValidationPass);
@@ -84,6 +85,7 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
     for (size_t k = 0; k < shape1.GetRank(); k++)
     {
         size_t dim1 = shape1[k];
+        // BUGBUG: We must consider the allowBroadcast flag here.
         if (dims[k] == 1)                                  // is [0] broadcasting?
             dims[k] = dim1;                                // then use dimension we broadcast to
         else if (dim1 == 1)                                // if [1] is broadcasting

diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h
@@ -620,7 +620,7 @@ protected: public: // ...the following should be protected, but nodes inquire ab
     void ValidateUnaryMap(bool isFinalValidationPass);
     void ValidateUnaryReduce(bool isFinalValidationPass);
     void ValidateInferBinaryInputDims();
-    void ValidateBinaryZip(bool isFinalValidationPass, bool allowMultiples);
+    void ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast);
     void ValidateBinaryReduce(bool isFinalValidationPass);
     void InferMBLayoutFromInputsForStandardCase();
     virtual void ValidateInferInputDimsFrom(const TensorShape&) = 0;    // (implemented by ComputationNode<ElemType>
@@ -1876,7 +1876,7 @@ class BinaryElementWiseNode : public ComputationNode<ElemType>, public NumInputs
 
     virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
     {
-        ValidateBinaryZip(isFinalValidationPass, true /*allowMultiples*/);
+        ValidateBinaryZip(isFinalValidationPass, true /*allowBroadcast*/);
     }
 };
 

diff --git a/Source/ComputationNetworkLib/ReshapingNodes.h b/Source/ComputationNetworkLib/ReshapingNodes.h
@@ -339,7 +339,8 @@ class RowSliceNode : public ComputationNode<ElemType>, public NumInputs<1>
 
         // RowSlice cannot slice tensors.
         // TODO: Create a TensorSlice operation, or just Slice.
-        if (isFinalValidationPass && Input(0)->HasSampleLayout() && !Input(0)->GetSampleLayout().IsVectorStoredAsImage() // legacy
+        if (isFinalValidationPass && !Input(0)->GetSampleLayout().IsColumnVector()
+            && !Input(0)->GetSampleLayout().IsVectorStoredAsImage() // legacy
             )
             RuntimeError("%ls %ls operation: Input must be a vector, tensor shape [%s] not allowed.", NodeName().c_str(), OperationName().c_str(), string(Input(0)->GetSampleLayout()).c_str());
         SetDims(TensorShape(m_sliceHeight), HasMBLayout());
@@ -355,6 +356,8 @@ template class RowSliceNode<double>;
 // -----------------------------------------------------------------------
 // RowStackNode (input0, input1, ...)
 // stacks multiple inputs on top of each other
+// The inputs will be spliced w.r.t. their first tensor dimension (the "row" dimension).
+// TODO: This is very close to the planned SpliceNode (just make m_spliceDim configurable) except for splicing along time.
 // -----------------------------------------------------------------------
 
 template <class ElemType>
@@ -367,6 +370,8 @@ class RowStackNode : public ComputationNode<ElemType> // note: not deriving from
         return L"RowStack";
     }
 
+    static const size_t m_spliceDim = 0;    // tensor dimension according to which to stack  --TODO: Make this a parameter.
+
 public:
     DeclareConstructorFromConfig(RowStackNode);
     RowStackNode(DEVICEID_TYPE deviceId, const wstring& name)
@@ -380,82 +385,100 @@ class RowStackNode : public ComputationNode<ElemType> // note: not deriving from
         if (flags & CopyNodeFlags::copyNodeChildren)
         {
             auto node = dynamic_pointer_cast<RowStackNode<ElemType>>(nodeP);
-            node->m_startRowIndices = m_startRowIndices;
+            node->m_firstIndices = m_firstIndices;
         }
     }
 
-    virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
-    {
-        Input(inputIndex)->GradientFor(fr).AddWithRowSliceValuesOf(GradientFor(fr), m_startRowIndices[inputIndex], Input(inputIndex)->GetSampleMatrixNumRows());
-    }
+private:
 
-    virtual bool OutputUsedInComputingInputNodesGradients() const override
+    // changes the result slice (which includes all stacked inputs) to the stripe that matches where one of the inputs goes
+    TensorShape NarrowToStripe(const TensorShape & resultSlice, size_t inputIndex)
     {
-        // The RowStackNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
+        auto resultSubSlice = resultSlice;
+        resultSubSlice.NarrowTo(m_spliceDim, m_firstIndices[inputIndex], m_firstIndices[inputIndex + 1]);
+        return resultSubSlice;
     }
 
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The RowStackNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+public:
 
     virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
     {
+        size_t rank = DetermineElementwiseTensorRank();
+        let outputSlice = GetTensorSliceFor(rank, fr); // tensor slice that represents the entire output for FrameRange
+
         for (size_t inputIndex = 0; inputIndex < GetNumInputs(); inputIndex++)
-            ValueFor(fr).AssignToRowSliceValuesOf(Input(inputIndex)->ValueFor(fr), m_startRowIndices[inputIndex], Input(inputIndex)->GetSampleMatrixNumRows());
+        {
+            let input = Input(inputIndex)->ValueTensorFor(rank, fr.AllowBroadcast());
+            let outputSubSlice = NarrowToStripe(outputSlice, inputIndex);
+            auto output = TensorView<ElemType>(Value(), outputSubSlice);
+            output.AssignCopyOf(input);
+        }
     }
 
+    virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
+    {
+        size_t rank = DetermineElementwiseTensorRank();
+        let outputSlice = GetTensorSliceFor(rank, fr); // tensor slice that represents the entire output for FrameRange
+
+        auto inputGrad = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
+        let outputSubSlice = NarrowToStripe(outputSlice, inputIndex);
+        let outputGrad = TensorView<ElemType>(Gradient(), outputSubSlice);
+        inputGrad.AddCopyOf(outputGrad);
+    }
+
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
+
     virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
     {
         Base::Validate(isFinalValidationPass);
         InferMBLayoutFromInputsForStandardCase();
 
         // we must fuse all tensor shapes
         // All dimensions but the last must be the same. (In a future version, we should be able to stack along any given dimension.)
-        // Note that trailing ones may be stripped/broadcasting, so we must first pad.
-        SmallVector<size_t> dims = Input(0)->GetSampleLayout().GetDims();
-        size_t maxRank = 0; // TODO: very similar to DetermineElementwiseTensorRank() except that that one also includes the output
+
+        // determine maximum rank (we can stack tensors with lower rank, which will have their dimensions paded to max automatically)
+        size_t maxRank = m_spliceDim + 1; // spliceDim may exceed all of them, which will create a new dimension, e.g. stacking column vectors into a matrix
         for (int i = 0; i < GetNumInputs(); i++)
-            if (maxRank < GetInputSampleLayout(i).GetRank())
-                maxRank = GetInputSampleLayout(i).GetRank();
-        dims.resize(maxRank - 1, 1); // pad and/or strip trailing dimension
-
-        // count totalRows and form m_startRowIndices[] array, which is the cumulative sum of matrix heights
-        m_startRowIndices.resize(GetNumInputs());
-        size_t totalRows = 0;
-        size_t totalTrailingDim = 0; // last tensor dimension is what gets stacked up
+            if (maxRank < Input(i)->GetSampleLayout().GetRank())
+                maxRank = Input(i)->GetSampleLayout().GetRank();
+
+        // the following loop does multiple things:
+        //  - count total dimension along m_spliceDim, and form associated m_firstIndices[] array
+        //  - verify all other dimension's compatibility (we allow broadcasting)
+        auto dims = Input(0)->GetSampleLayout().PadRank(maxRank).GetDims(); // dimensions padded to max rank; start with dims of first input
+        dims[m_spliceDim] = 0;                                              // this dimension is created, while all others are verified for consistency
+        m_firstIndices.assign(1, 0);                                        // accumulative splice dimension; start with 0
         for (int i = 0; i < GetNumInputs(); i++)
         {
-            m_startRowIndices[i] = totalRows;
-            totalRows += Input(i)->GetSampleMatrixNumRows();
-            SmallVector<size_t> thisDims = Input(i)->GetSampleLayout().GetDims();
-            thisDims.resize(maxRank, 1);         // pad and/or strip trailing dimension
-            totalTrailingDim += thisDims.back(); // count total trailing dimensions (that's what we have after stacking)
-            thisDims.resize(maxRank - 1);        // verify that dimensions match
-            if (dims != thisDims)
-                InvalidArgument("%ls %ls operation: Incompatible tensor dimension [%s] for input %ls %ls",
-                                NodeName().c_str(), OperationName().c_str(), std::string(Input(i)->GetSampleLayout()).c_str(),
-                                Input(i)->NodeName().c_str(), Input(i)->OperationName().c_str());
+            // check/fuse dims and accumulate the spliced dimension
+            let & shape = Input(i)->GetSampleLayout();
+            for (size_t k = 0; k < maxRank; k++)
+            {
+                size_t dim = shape.GetDimPadded(k);
+                if (k == m_spliceDim)
+                {
+                    // accumulate the spliced dimension
+                    dims[m_spliceDim] += dim;
+                    m_firstIndices.push_back(dims[m_spliceDim]);    // and remember it
+                }
+                else
+                {
+                    // check/fuse dimensions
+                    if (isFinalValidationPass && dim != dims[k] && dim != 1 && dims[k] != 1)
+                        InvalidArgument("%ls %ls operation: Conflicting dimension %d between %ls %ls operation (%d) and other(s) (%d)",
+                                        NodeName().c_str(), OperationName().c_str(), (int)k, Input(i)->NodeName().c_str(), Input(i)->OperationName(), (int)dim, (int)dims[k]);
+                    if (dims[k] == 1)   // broadcast
+                        dims[k] = dim;
+                }
+            }
         }
 
-        // warn that this node will destroy the image size information from the child
-        if (Input(0)->HasSampleLayout())
-            fprintf(stderr, "WARNING: RowStack operation cannot inherit image size information from its child. Image size info is lost.\n");
-
-        dims.push_back(totalTrailingDim);
         SetDims(TensorShape(dims), HasMBLayout());
-
-        if (totalRows != GetSampleMatrixNumRows())
-            LogicError("%ls RowStack operation: Tensor shapes of inputs were not compatible after all?", NodeName().c_str());
     }
 
 private:
-    std::vector<size_t> m_startRowIndices; // start row number in the stacked matrix of each input (child) (cumsum of matrix heights)
+    std::vector<size_t> m_firstIndices;  // start row number in the stacked matrix of each input (child) (cumsum of matrix heights); plus one final entry that equals the total dimension
 };
 
 template class RowStackNode<float>;

diff --git a/Source/Math/TensorView.cpp b/Source/Math/TensorView.cpp
@@ -78,7 +78,7 @@ static void PrepareTensorOperands(array<TensorShape, N> shapes, array<size_t, N>
             dims = shapes[i].GetRank();
     for (size_t i = 0; i < N; i++)
         if (shapes[i].GetRank() < dims)
-            shapes[i].PadInPlace(dims);
+            shapes[i].PadRankInPlace(dims);
     // all shapes[] now have the same rank
 
     // determine operation shape (max over all dimensions)