changed reduction to 'double' (since we are not 100% identical anyway…

…, we can make this switch); bug fix: an XCOPY command line missed a /D option
nikoma · May 9, 2016 · 6a88bcb · 6a88bcb
1 parent 8f34f54
commit 6a88bcb
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 5 deletions.
diff --git a/Source/Math/GPUTensor.cu b/Source/Math/GPUTensor.cu
@@ -265,8 +265,8 @@ struct TensorOps
 // function to compute the value for a given output location (including reduction)
 // -----------------------------------------------------------------------
 
-//#define ReduceElemType double
-#define ReduceElemType ElemType
+#define ReduceElemType double
+//#define ReduceElemType ElemType
 
 template <class ElemType, C_size_t N, C_int M, C_int m>
 struct TensorOpReduce
@@ -611,9 +611,11 @@ static void LaunchTensorOpWithReduction(ElemType beta, array<ElemType*, N> point
     GridDim grid(NN);
     let& props = GridDim::GetDeviceProps();
     // === simple case: NN large, one thread per output element
+    bool disableParallelReduction = false;                       // (for debugging)
     if (reductionDim == 1 ||                                     // no reduction
         grid.m_blocksPerGrid >= props.multiProcessorCount ||     // enough output elements to fill all multiprocs
         reductionDim * numElements <= 2 * props.warpSize ||      // trivial operation not worth the trouble (2* because the more complex one also needs 2 kernel launches)
+        disableParallelReduction ||                              // (for debugging)
         reductionDim * numElements <= props.multiProcessorCount) // recursive call from reduction below
     {
         // we got enough elements to generate: do one element per thread, and reduction inside

diff --git a/Source/Math/MathCUDA.vcxproj b/Source/Math/MathCUDA.vcxproj
@@ -86,7 +86,7 @@
     </CudaCompile>
     <PostBuildEvent>
       <Command>for %%l in ($(CudaDlls)) do if exist "$(CudaPath)\bin\%%l" xcopy /D /Y "$(CudaPath)\bin\%%l*" "$(OutputPath)"
-if exist "$(CuDnnDll)" xcopy /Y "$(CuDnnDll)" "$(OutputPath)"
+if exist "$(CuDnnDll)" xcopy /D /Y "$(CuDnnDll)" "$(OutputPath)"
 </Command>
     </PostBuildEvent>
   </ItemDefinitionGroup>
@@ -194,4 +194,4 @@ if exist "$(CuDnnDll)" xcopy /Y "$(CuDnnDll)" "$(OutputPath)"
     <Error Condition="!Exists('$(CUB_PATH)')" Text="CNTK requires the NVIDIA CUB library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
     <Error Condition="!Exists('$(CUDNN_PATH)')" Text="CNTK requires the NVIDIA cuDNN library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
   </Target>
-</Project>
+</Project>
diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp
@@ -1128,7 +1128,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
                     mbProgNumPrecision = max(mbProgNumPrecision - 2, 2);
                 }
             }
-            else // TODO: What's the meaning of this? Some sort of extrapolation?
+            else // estimate epoch size
                 m_maxComputedEpochSize = numMBsRun * trainSamplesSinceLastLogged / m_numMBsToShowResult;
 
             // progress tracing for compute cluster management