changed the mapping of tensors onto the Matrix storage objects: In ca…

…se of no MBLayout, the Matrix objects now always have column dimension 1. Actual matrices (as used by TimesNode) are now 2D tensors. As part of this, GetNumCols()/-Rows() no longer exist, but were split into GetSampleMatrixCols()/-Rows() (minibatch interpretation) and GetAsMatrixCols()/-Rows() (2D matrix interpretation)
fredatshift · Jan 21, 2016 · 342b1ba · 342b1ba
1 parent 01bdefe
commit 342b1ba
Show file tree

Hide file tree

Showing 26 changed files with 887 additions and 1,142 deletions.
diff --git a/Source/CNTK/ModelEditLanguage.cpp b/Source/CNTK/ModelEditLanguage.cpp
@@ -138,12 +138,12 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
         std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
 
         auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
-#if 1 // support for a specific kind of legacy format, for the sole purpose of allowing users to convert (=load & save) them
+#if 1   // support for a specific kind of legacy format, for the sole purpose of allowing users to convert (=load & save) them
         if (modelFormat == L"cntk_legacy_no_tensorlib")
         {
             cn->Read<ElemType>(params[1]);
             for (auto node : cn->FeatureNodes())
-                node->SetDims(TensorShape(node->GetNumRows()), 0); // pre-tensorlib InputValues had incorrect tensor dimensions
+                node->SetDims(TensorShape(node->GetSampleMatrixNumRows()), node->HasMBLayout()); // pre-tensorlib InputValues had incorrect tensor dimensions
             cn->CompileNetwork();
         }
         else

diff --git a/Source/CNTK/NDLUtil.h b/Source/CNTK/NDLUtil.h
@@ -31,12 +31,6 @@ class NDLUtil
     {
     }
 
-    // FixupInputMinibatchSize - go through all the inputs and make sure they have a consistent minibatch size
-    void FixupInputMinibatchSize()
-    {
-        m_net->FixupInputMinibatchSize();
-    }
-
     // ProcessNDLConfig - Process the NDL script from a configuration string value
     // config - configuration string containing script
     void ProcessNDLConfig(const ConfigValue& config, bool fullValidate = false)
@@ -105,10 +99,7 @@ class NDLUtil
         SynchronousNodeEvaluator<ElemType> ndlEvaluator(m_net);
         NDLNode<ElemType>* lastNode = script->Evaluate(ndlEvaluator, L"", ndlPass, skipThrough);
         if (ndlPass == ndlPassResolve)
-        {
             SetOutputNodes(script);
-            FixupInputMinibatchSize();
-        }
         return lastNode;
     }
 

diff --git a/Source/CNTK/SimpleNetworkBuilder.cpp b/Source/CNTK/SimpleNetworkBuilder.cpp
@@ -1753,7 +1753,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNet
                 input = output;
             }
 
-            size_t idim = input->GetNumRows();
+            size_t idim = input->GetSampleMatrixNumRows();
             assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
 
             e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
@@ -2069,7 +2069,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetw
                 input = output;
             }
 
-            size_t idim = input->GetNumRows();
+            size_t idim = input->GetSampleMatrixNumRows();
             assert(m_lookupTabelOrderSizes.size() == m_streamSizes.size());
 
             e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"Embedding%d", idx), m_layerSizes[1], idim / m_lookupTabelOrderSizes[idx]);
@@ -2295,7 +2295,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(co
     unsigned long randomSeed = 1;
 
     ComputationNodePtr input, w, b, output, label, prior, scaledLogLikelihood;
-    shared_ptr<PreComputedNode<ElemType>> pcNodePtr;
+    shared_ptr<PreComputedNodeBase<ElemType>> pcNodePtr;
 
     File fstream(dbnModelFileName, FileOptions::fileOptionsBinary | FileOptions::fileOptionsRead);
 
@@ -2354,11 +2354,11 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(co
             contextStdDev.TransferFromDeviceToDevice(CPUDEVICE, m_deviceId, true, false, false);
 
             w = builder.Mean(input, L"MeanOfFeatures");
-            static_pointer_cast<PreComputedNode<ElemType>>(w)->SideLoadFromMatrix(contextMean);
+            static_pointer_cast<PreComputedNodeBase<ElemType>>(w)->SideLoadFromMatrix(contextMean);
             w->SetParameterUpdateRequired(false);
 
             b = builder.InvStdDev(input, L"InvStdOfFeatures");
-            static_pointer_cast<PreComputedNode<ElemType>>(b)->SideLoadFromMatrix(contextStdDev);
+            static_pointer_cast<PreComputedNodeBase<ElemType>>(b)->SideLoadFromMatrix(contextStdDev);
             b->SetParameterUpdateRequired(false);
 
             output = builder.PerDimMeanVarNormalization(input, w, b, L"MVNormalizedFeatures");
@@ -2418,7 +2418,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(co
         assert(priorVals.GetNumCols() == 1 && priorVals.GetNumRows() == m_outputLayerSize);
 
         prior = builder.Mean(label, L"Prior");
-        static_pointer_cast<PreComputedNode<ElemType>>(prior)->SideLoadFromMatrix(priorVals);
+        static_pointer_cast<PreComputedNodeBase<ElemType>>(prior)->SideLoadFromMatrix(priorVals);
         prior->SetParameterUpdateRequired(false);
     }
     else // pretrained network - need to add output layer, initalize
@@ -2431,7 +2431,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(co
         else
             std::runtime_error("Output layer size must be specified when converting pretrained network, use outputLayerSize=");
 
-        size_t penultimateSize = input->GetNumRows();
+        size_t penultimateSize = input->GetSampleMatrixNumRows();
 
         wstring nameOfW = msra::strfun::wstrprintf(L"W%d", i);
         wstring nameOfB = msra::strfun::wstrprintf(L"B%d", i);
@@ -2450,7 +2450,7 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(co
         {
             Matrix<ElemType> zeros = Matrix<ElemType>::Zeros(outputLayerSize, 1, m_deviceId);
             prior = builder.Mean(label, L"Prior");
-            static_pointer_cast<PreComputedNode<ElemType>>(prior)->MarkComputed(false);
+            static_pointer_cast<PreComputedNodeBase<ElemType>>(prior)->MarkComputed(false);
             prior->Value().SetValue(zeros);
         }
     }

diff --git a/Source/Common/Include/TensorShape.h b/Source/Common/Include/TensorShape.h
@@ -383,6 +383,7 @@ struct TensorShape
     {
         return m_dims == other.m_dims;
     }
+    bool operator!=(const TensorShape& other) const { return !operator==(other); }  // duh!
 
     // verify that this refers to a dense matrix (no strides)
     void VerifyIsDense() const
@@ -622,6 +623,19 @@ struct TensorShape
         return *this;
     }
 
+    // compare two TensorShapes, whether they are compatible, considering padding and broadcasting
+    bool IsElementwiseCompatibleWith(const TensorShape & other) const
+    {
+        for (size_t i = 0; i < m_dims.size(); i++)
+        {
+            size_t dim = m_dims[i];
+            size_t otherDim = i < other.size() ? other[i] : 1;
+            if (dim != otherDim && dim != 1 && otherDim != 1)   // dims mismatch, and neither is broadcasting
+                return false;
+        }
+        return true;
+    }
+
     // pretty-printing. Returns tensor dims in the form "I x J x K".
     operator std::string() const
     {