Fixed bug with BN engine param name in BS and fixed CPU build.

aschi87 · Feb 12, 2016 · 820cfc2 · 820cfc2
1 parent 11fe2c6
commit 820cfc2
Show file tree

Hide file tree

Showing 6 changed files with 42 additions and 6 deletions.
diff --git a/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp b/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
@@ -66,7 +66,7 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
     L"ColumnwiseCrossProduct = KhatriRaoProduct // deprecated \n" // TODO: should it be deprecated? It is described as easier to understand in the CNTKBook.
     L"ClassificationError = ErrorPrediction \n"
     L"Delay = PastValue \n" // TODO: should it allow negative offsets and an if test here?
-    L"BatchNormalization(input, scale, bias, runMean, runInvStdDev, eval, spatial, expAvgFactor, epsilon, engine, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runInvStdDev) /*plus the function args*/ ]\n"
+    L"BatchNormalization(input, scale, bias, runMean, runInvStdDev, eval, spatial, expAvgFactor, epsilon, useCntkEngine, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runInvStdDev) /*plus the function args*/ ]\n"
 // standard nodes. We use macros to define these strings.
 #define UnaryStandardNode(Op, a) L## #Op L"(" L## #a L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = " L## #a L" /*plus the function args*/ ]\n"
 #define BinaryStandardNode(Op, a, b) L## #Op L"(" L## #a L", " L## #b L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L") /*plus the function args*/ ]\n"

diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h
@@ -1787,7 +1787,10 @@ class BatchNormalizationNode : public ComputationNode<ElemType>, public NumInput
         {
             if (m_spatial && m_imageLayoutKind != CHW)
             {
-                InvalidArgument("Batch normalization currently supports only cuDNN (CHW) format. Please specify imageLayout=\"cudnn\" in BatchNormalization node in your NDL/BrainScript.");
+                InvalidArgument(
+                    "Batch normalization currently supports only cuDNN (CHW) data layout." 
+                    "Please specify imageLayout=\"cudnn\" in BatchNormalization node in your NDL/BrainScript "
+                    "and make sure your input data layout is CHW");
             }
 
             auto shape = GetSampleLayout();

diff --git a/Source/Math/ConvolutionEngine.h b/Source/Math/ConvolutionEngine.h
@@ -290,7 +290,7 @@ class MATH_API PoolingEngine
     PoolingEngine& operator=(PoolingEngine&&) = delete;
 };
 
-// REVIEW alexeyk: this is a temporary hack until we find a better place for poor BatchNorm.
+// REVIEW alexeyk: this is a temporary hack until we find a better place for the BatchNorm engine(s).
 enum class BatchNormImpl
 {
     CuDnn,

diff --git a/Source/Math/CuDnnConvolutionEngine.cu b/Source/Math/CuDnnConvolutionEngine.cu
@@ -38,6 +38,13 @@ bool CuDnnConvolutionEngineFactory<ElemType>::IsSupported(DEVICEID_TYPE deviceId
 #endif
 }
 
+CudaTimer::~CudaTimer()
+{
+    if (m_start != nullptr)
+        CUDA_CALL(cudaEventDestroy(reinterpret_cast<cudaEvent_t>(m_start)));
+    if (m_stop != nullptr)
+        CUDA_CALL(cudaEventDestroy(reinterpret_cast<cudaEvent_t>(m_stop)));
+}
 void CudaTimer::Start()
 {
     cudaEvent_t start;
@@ -394,7 +401,9 @@ public:
         assert(inT.n() == in.GetNumCols());
         assert(saveMean.GetNumElements() >= runMean.GetNumElements());
         assert(saveInvStdDev.GetNumElements() >= runInvStdDev.GetNumElements());
-        UNUSED(crowIn);
+#ifndef _DEBUG
+        UNUSED(crowIn); // crowIn used only in asserts.
+#endif
 
         if (m_bnImpl == BatchNormImpl::CuDnn)
         {
@@ -443,7 +452,9 @@ public:
         assert(inT.n() == in.GetNumCols());
         assert(runMean.GetNumCols() == 1);
         assert(runInvStdDev.GetNumCols() == 1);
-        UNUSED(crowIn);
+#ifndef _DEBUG
+        UNUSED(crowIn); // crowIn used only in asserts.
+#endif
 
         cudnnBatchNormMode_t mode = spatial ? CUDNN_BATCHNORM_SPATIAL : CUDNN_BATCHNORM_PER_ACTIVATION;
         CUDNN_CALL(cudnnBatchNormalizationForwardInference(m_cudnn, mode, &C::One, &C::Zero, t(inT), ptr(in), t(inT), ptr(out),
@@ -476,7 +487,9 @@ public:
         assert(scaleGrad.GetNumCols() == scale.GetNumCols());
         assert(biasGrad.GetNumRows() == scale.GetNumRows());
         assert(biasGrad.GetNumCols() == scale.GetNumCols());
-        UNUSED(crowIn);
+#ifndef _DEBUG
+        UNUSED(crowIn); // crowIn used only in asserts.
+#endif
 
         if (m_bnImpl == BatchNormImpl::CuDnn)
         {

diff --git a/Source/Math/CuDnnConvolutionEngine.h b/Source/Math/CuDnnConvolutionEngine.h
@@ -39,15 +39,22 @@ class CuDnnConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
     static bool IsSupported(DEVICEID_TYPE deviceId);
 };
 
+// REVIEW alexeyk: wrong place.
 class MATH_API CudaTimer
 {
 public:
     CudaTimer(): m_start(nullptr), m_stop(nullptr)
     {
     }
+    ~CudaTimer();
     void Start();
     void Stop();
     float Elapsed();
+
+    CudaTimer(const CudaTimer& src) = delete;
+    CudaTimer& operator=(const CudaTimer& src) = delete;
+    CudaTimer(CudaTimer&& src) = delete;
+    CudaTimer& operator=(CudaTimer&& src) = delete;
 private:
     void* m_start;
     void* m_stop;

diff --git a/Source/Math/NoGPU.cpp b/Source/Math/NoGPU.cpp
@@ -2173,10 +2173,23 @@ bool CuDnnConvolutionEngineFactory<ElemType>::IsSupported(DEVICEID_TYPE)
 
 template class CuDnnConvolutionEngineFactory<float>;
 template class CuDnnConvolutionEngineFactory<double>;
+
+CudaTimer::~CudaTimer()
+{
+}
+void CudaTimer::Start()
+{
 }
+void CudaTimer::Stop()
+{
 }
+float CudaTimer::Elapsed()
+{
+    return 0;
 }
 
+} } }
+
 // define a dummy GPUWatcher class too
 #include "GPUWatcher.h"