Made inclusion of quantized gradient aggregation header compile-time …

…conditional. Removed dependency on top-level MatrixQuantizer type from the quantizer unti tests - the tests now work with the underlying MatrixQuantizerImpl type.
roy881020 · Jan 7, 2016 · fe6e622 · fe6e622
1 parent 6ed460b
commit fe6e622
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 10 deletions.
diff --git a/Source/Math/MatrixQuantizerImpl.cpp b/Source/Math/MatrixQuantizerImpl.cpp
@@ -7,7 +7,7 @@
 namespace Microsoft { namespace MSR { namespace CNTK {
 
     template<class ElemType>
-    /*static*/ MatrixQuantizerImpl<ElemType>* MatrixQuantizerImpl<ElemType>::CreateMatrixQuantizerImpl(int deviceId, bool useAsync)
+    /*static*/ MatrixQuantizerImpl<ElemType>* MatrixQuantizerImpl<ElemType>::Create(int deviceId, bool useAsync)
     {
         if (deviceId >= 0)
         {

diff --git a/Source/Math/MatrixQuantizerImpl.h b/Source/Math/MatrixQuantizerImpl.h
@@ -19,7 +19,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
     class MATH_API MatrixQuantizerImpl
     {
     public:
-        static MatrixQuantizerImpl<ElemType>* CreateMatrixQuantizerImpl(int deviceId, bool useAsync);
+        static MatrixQuantizerImpl<ElemType>* Create(int deviceId, bool useAsync);
         virtual ~MatrixQuantizerImpl() 
         {
         }

diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp
@@ -5,7 +5,13 @@
 #include "Basics.h"
 #include "SGD.h"
 #include "DataReaderHelpers.h"
+
+#include "MatrixQuantizerImpl.h"
+
+#ifdef QUANTIZED_GRADIENT_AGGREGATION
 #include "AllReduceDistGradAggregator.h"
+#endif
+
 #include "SimpleDistGradAggregator.h"
 #include "ProgressTracing.h"
 

diff --git a/Tests/UnitTests/MathTests/MatrixQuantizerTests.cpp b/Tests/UnitTests/MathTests/MatrixQuantizerTests.cpp
@@ -8,7 +8,7 @@
 #include <memory>
 #include <io.h>
 
-#include "../../../Source/Math/MatrixQuantizer.h"
+#include "../../../Source/Math/MatrixQuantizerImpl.h"
 #include "../../../Source/Math/CUDAPageLockedMemAllocator.h"
 #include "../../../Source/Math/ValueQuantizer.h"
 
@@ -256,10 +256,11 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
         std::unique_ptr<MemAllocator> allocator(deviceId == CPUDEVICE ? nullptr : new CUDAPageLockedMemAllocator(deviceId));
 
         Matrix<ElemType> inMatrix(numRows, numCols, deviceId);
-        std::unique_ptr<MatrixQuantizer<ElemType>> quantizer(new MatrixQuantizer<ElemType>(numRows, numCols, deviceId, false /*useAsync*/));
+        std::unique_ptr<MatrixQuantizerImpl<ElemType>> quantizer(MatrixQuantizerImpl<ElemType>::Create(deviceId, false /*useAsync*/));
+        Matrix<ElemType> residueMatrix(numRows, numCols, deviceId);
 
         // Verify that the initial residue is comprised of all zeros
-        verifyAllZerosFunc(quantizer->GetResidualMatrix());
+        verifyAllZerosFunc(residueMatrix);
         Matrix<ElemType> outMatrix(numRows, numCols, deviceId);
         // Verify that the outMatrix is initialized with all zeros
         verifyAllZerosFunc(outMatrix);
@@ -269,7 +270,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
             inMatrix = Matrix<ElemType>::RandomUniform(numRows, numCols, rangeLow, rangeHigh, seed + iterNum, deviceId);
 
             std::unique_ptr<ElemType[]> gpuInMatrix(inMatrix.CopyToArray());
-            std::unique_ptr<ElemType[]> gpuPrevResidualMatrix(quantizer->GetResidualMatrix().CopyToArray());
+            std::unique_ptr<ElemType[]> gpuPrevResidualMatrix(residueMatrix.CopyToArray());
             std::unique_ptr<ElemType[]> gpuPrevOutMatrix(outMatrix.CopyToArray());
 
             size_t numRowsToPrint(0);
@@ -291,18 +292,18 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
                 }
 
                 inMatrix.Print("Input Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
-                quantizer->GetResidualMatrix().Print("Old Residual Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
+                residueMatrix.Print("Old Residual Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
                 outMatrix.Print("Old Output Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
             }
 
             QuantizedMatrix<ElemType> tempCPUQuantizationBuffer(numRows, numCols, numBits, CPUDEVICE, allocator.get());
-            quantizer->QuantizeAsync(inMatrix, tempCPUQuantizationBuffer, zeroThresholdFor1Bit);
+            quantizer->QuantizeAsync(inMatrix, residueMatrix, tempCPUQuantizationBuffer, residueMatrix, zeroThresholdFor1Bit);
             quantizer->WaitQuantizeAsyncDone();
 
             if (createDebugOut)
             {
                 tempCPUQuantizationBuffer.Print("Quantized Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
-                quantizer->GetResidualMatrix().Print("New residual Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
+                residueMatrix.Print("New residual Matrix", 0, numRowsToPrint - 1, 0, numColsToPrint - 1);
             }
 
             quantizer->UnquantizeAsync(tempCPUQuantizationBuffer, outMatrix, (iterNum > 0));
@@ -314,7 +315,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { namespace Test {
             }
 
             // Now verify the quantization results
-            std::unique_ptr<ElemType[]> gpuNewResidualMatrix(quantizer->GetResidualMatrix().CopyToArray());
+            std::unique_ptr<ElemType[]> gpuNewResidualMatrix(residueMatrix.CopyToArray());
             std::unique_ptr<ElemType[]> gpuNewOutMatrix(outMatrix.CopyToArray());
 
             ElemType precisionTolerance = (std::is_same<ElemType, double>::value) ? static_cast<ElemType>(c_DoublePrecisionTolerance) : c_SinglePrecisionTolerance;