From 9d750a855df9c277c39022cf2a06872ef7be4f53 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Thu, 17 Mar 2016 17:07:49 -0700 Subject: [PATCH] Fixed a memshare bug that was incorrectly reallocation network matrices during cross-validation though the allocation had been established before the training commencement --- Source/ComputationNetworkLib/ComputationNetwork.h | 3 +++ .../ComputationNetworkLib/ComputationNetworkEvaluation.cpp | 5 +++++ .../Speech/DNN/ParallelCrossValidation/run-test | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index a5cf2dd93e75..a7802f8658d7 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -46,6 +46,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb ComputationNetwork() : m_randomSeedOffset(0), m_isCompiled(false), + m_areMatricesAllocated(false), m_pMBLayout(make_shared()) { } @@ -169,6 +170,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb void CollectInputAndLearnableParameters(const ComputationNodeBasePtr& rootNode); void CollectInputAndLearnableParametersRec(const ComputationNodeBasePtr& node, set& visited, list& inputs, list& learnableParameters); bool IsCompiled() const { return m_isCompiled; } + bool AreMatricesAllocated() const { return m_areMatricesAllocated; } void VerifyIsCompiled(const char* where) const; public: void AllocateAllMatrices(const std::vector& evalRootNodes, const std::vector& outValueRootNodes, ComputationNodeBasePtr trainRootNode); @@ -884,6 +886,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb // cache for evaluation ordering: bool m_isCompiled; // CompileNetwork has been called + bool m_areMatricesAllocated; // AllocateAllMatrices has been called // cached network iterations std::map> m_evalOrders; // [out node] flat depth-first traversal starting from out node diff --git a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp index dc2b224ba356..97e472e2fe58 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkEvaluation.cpp @@ -701,6 +701,9 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector& outValueRootNodes, ComputationNodeBasePtr trainRootNode) { + if (AreMatricesAllocated()) + return; + // Allocate memory for forward/backward computation fprintf(stderr, "\n\nAllocating matrices for forward and/or backward propagation.\n"); @@ -833,6 +836,8 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector& parentCount) diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/run-test b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/run-test index 7cf51f442e93..5bf6a25b2829 100755 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/run-test +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/run-test @@ -8,7 +8,7 @@ Instances=2 NumCPUThreads=$(threadsPerInstance $Instances) # cntkmpirun -cntkmpirun "-n $Instances" cntkcv.cntk "numCPUThreads=$NumCPUThreads" +cntkmpirun "-n $Instances" cntkcv.cntk "numCPUThreads=$NumCPUThreads shareNodeValueMatrices=true" ExitCode=$? sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0 sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1