From a6d0f8176d06f8c4579924a8acd6372a27ba561f Mon Sep 17 00:00:00 2001 From: Dong Yu Date: Wed, 15 Oct 2014 19:00:30 -0700 Subject: [PATCH] fixed a bug in GPUMatrix::RowElementMultiplyWith and RowElementDivideBy. Changed ComputationNetwork.h to support loading nodes with number of children larger than 3. --- .gitignore | 1 + MachineLearning/cn/CompositeComputationNode.h | 1 - MachineLearning/cn/ComputationNetwork.h | 17 ++++++++++++++++- Math/Math/GPUMatrix.cu | 18 +++++++++--------- Math/Math/Matrix.cpp | 4 ++-- 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index f43a2b5af398..0842538b194b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.suo *.user *.sln.docstates +*.orig # Build results diff --git a/MachineLearning/cn/CompositeComputationNode.h b/MachineLearning/cn/CompositeComputationNode.h index 6e576368717b..e4c391ce0e7d 100644 --- a/MachineLearning/cn/CompositeComputationNode.h +++ b/MachineLearning/cn/CompositeComputationNode.h @@ -2135,7 +2135,6 @@ namespace Microsoft { namespace MSR { namespace CNTK { else //all samples share the same prior posterior.ColumnElementMultiplyWith(prior); - //compute GMM log-likelihood Matrix::Multiply(ConstOnes(1, numComponent, posterior.GetDeviceId()), false, posterior, false, functionValues); //functionValues <-- total likelihood posterior.RowElementDivideBy(functionValues); //posterior <-- per-comp likelihood / total likelihood diff --git a/MachineLearning/cn/ComputationNetwork.h b/MachineLearning/cn/ComputationNetwork.h index 376750491b38..f9a34a6061b3 100644 --- a/MachineLearning/cn/ComputationNetwork.h +++ b/MachineLearning/cn/ComputationNetwork.h @@ -336,7 +336,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { } ComputationNodePtr nodePtr = GetNodeFromName(nodeName); - ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2; + ComputationNodePtr childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4; switch (numChildren) { case 1: @@ -354,6 +354,21 @@ namespace Microsoft { namespace MSR { namespace CNTK { childNodePtr2 = GetNodeFromName(childrenNames[2]); nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2); break; + case 4: + childNodePtr0 = GetNodeFromName(childrenNames[0]); + childNodePtr1 = GetNodeFromName(childrenNames[1]); + childNodePtr2 = GetNodeFromName(childrenNames[2]); + childNodePtr3 = GetNodeFromName(childrenNames[3]); + nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3); + break; + case 5: + childNodePtr0 = GetNodeFromName(childrenNames[0]); + childNodePtr1 = GetNodeFromName(childrenNames[1]); + childNodePtr2 = GetNodeFromName(childrenNames[2]); + childNodePtr3 = GetNodeFromName(childrenNames[3]); + childNodePtr4 = GetNodeFromName(childrenNames[4]); + nodePtr->AttachInputs(childNodePtr0, childNodePtr1, childNodePtr2, childNodePtr3, childNodePtr4); + break; default: throw std::logic_error("Invalid number of children."); } diff --git a/Math/Math/GPUMatrix.cu b/Math/Math/GPUMatrix.cu index adaa7d0637b4..701290eafc6c 100644 --- a/Math/Math/GPUMatrix.cu +++ b/Math/Math/GPUMatrix.cu @@ -1371,9 +1371,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); - long N=(long)a.GetNumRows(); - long M=(long)this->GetNumCols(); - int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); + long N = (long)this->GetNumRows(); + long M = (long)a.GetNumCols(); + int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1394,9 +1394,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == 1 && a.GetNumCols() == GetNumCols())) throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns."); - long N = (long)a.GetNumRows(); - long M = (long)this->GetNumCols(); - int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock); + long N = (long)this->GetNumRows(); + long M = (long)a.GetNumCols(); + int blocksPerGrid = (int)ceil(1.0*M / threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); @@ -1417,9 +1417,9 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) throw std::invalid_argument("ColumnElementDivideBy: The input matrix should be a col vector and match [this]'s rows."); - long N=(long)a.GetNumRows(); - long M=(long)this->GetNumCols(); - int blocksPerGrid =(int)ceil(1.0*N/threadsPerBlock); + long N = (long)a.GetNumRows(); + long M = (long)this->GetNumCols(); + int blocksPerGrid = (int)ceil(1.0*N / threadsPerBlock); a.PrepareDevice(); cudaEvent_t done = nullptr; if (do_sync) CUDA_CALL(cudaEventCreate(&done)); diff --git a/Math/Math/Matrix.cpp b/Math/Math/Matrix.cpp index 692836ff603e..93d6bbfcc91a 100644 --- a/Math/Math/Matrix.cpp +++ b/Math/Math/Matrix.cpp @@ -1737,7 +1737,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty() || IsEmpty()) throw std::logic_error("RowElementMultiplyWith: Matrix is empty."); - if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) + if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1)) throw std::invalid_argument("RowElementMultiplyWith: The input matrix should be a row vector and match [this]'s columns."); //WARNING: a and this must have same type @@ -1763,7 +1763,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { if (a.IsEmpty() || IsEmpty()) throw std::logic_error("RowElementDivideBy: Matrix is empty."); - if (!(a.GetNumRows() == GetNumRows() && a.GetNumCols() == 1)) + if (!(a.GetNumCols() == GetNumCols() && a.GetNumRows() == 1)) throw std::invalid_argument("RowElementDivideBy: The input matrix should be a row vector and match [this]'s columns."); //WARNING: a and this must have same type