Skip to content

Commit

Permalink
Apply code review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
Wayne-Xiong committed Mar 3, 2016
1 parent 9c4ed05 commit b2b6691
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 49 deletions.
10 changes: 6 additions & 4 deletions Source/ActionsLib/EvalActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool paralleltrain = config(L"parallelTrain", false);
//TODO: switch to a global parallel setting for both training and evaluation.
bool useParallel = config(L"parallelTrain", false);

ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
Expand All @@ -70,7 +71,7 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)

auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);

SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches, paralleltrain);
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches, useParallel);
eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize);
}

Expand Down Expand Up @@ -119,7 +120,8 @@ void DoCrossValidate(const ConfigParameters& config)
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool paralleltrain = config(L"parallelTrain", false);
//TODO: switch to a global parallel setting for both training and evaluation.
bool useParallel = config(L"parallelTrain", false);

ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
Expand Down Expand Up @@ -153,7 +155,7 @@ void DoCrossValidate(const ConfigParameters& config)
cvModels.push_back(cvModelPath);
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, cvModelPath);

SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches, paralleltrain);
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel, maxSamplesInRAM, numSubminiBatches, useParallel);

fprintf(stderr, "model %ls --> \n", cvModelPath.c_str());
auto evalErrors = eval.Evaluate(&cvDataReader, evalNodeNamesVector, mbSize[0], epochSize);
Expand Down
2 changes: 1 addition & 1 deletion Source/CNTK/ModelEditLanguage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
case melPropBatchNormMode:
{
bool evalMode = params[2];
netNdl->cn->SetBatchNormlizationNodesBelowEvalMode(evalMode, node);
netNdl->cn->SetBatchNormalizationNodesBelowEvalMode(evalMode, node);
break;
}
default:
Expand Down
2 changes: 1 addition & 1 deletion Source/ComputationNetworkLib/ComputationNetwork.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ class ComputationNetwork : public ScriptableObjects::Object, public ScriptableOb
void AddFeatureNode(ComputationNodeBasePtr featureNode);
void RemoveFeatureNode(ComputationNodeBasePtr featureNode);
void SetLearnableNodesBelowLearningRateMultiplier(const float learningRateMultiplier, const ComputationNodeBasePtr& rootNode = nullptr);
void SetBatchNormlizationNodesBelowEvalMode(const bool evalMode, const ComputationNodeBasePtr& rootNode = nullptr);
void SetBatchNormalizationNodesBelowEvalMode(const bool evalMode, const ComputationNodeBasePtr& rootNode = nullptr);

// -----------------------------------------------------------------------
// node access
Expand Down
2 changes: 1 addition & 1 deletion Source/ComputationNetworkLib/ComputationNetworkEditing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ void ComputationNetwork::SetLearnableNodesBelowLearningRateMultiplier(const floa
}
}

void ComputationNetwork::SetBatchNormlizationNodesBelowEvalMode(const bool evalMode, const ComputationNodeBasePtr& rootNode /* = nullptr */)
void ComputationNetwork::SetBatchNormalizationNodesBelowEvalMode(const bool evalMode, const ComputationNodeBasePtr& rootNode /* = nullptr */)
{
vector<ComputationNodeBasePtr> nodes;
if (rootNode == nullptr)
Expand Down
14 changes: 9 additions & 5 deletions Source/SGDLib/DataReaderHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,23 +186,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

template<class ElemType>
static size_t GetNumSubminibatchNeeded(IDataReader<ElemType>* dataReader,
static size_t GetNumSubminibatchesNeeded(IDataReader<ElemType>* dataReader,
size_t maxSamplesInRAM,
size_t numSubminiBatches,
size_t numSubminibatches,
size_t tunedMBSize)
{
if (numSubminiBatches > 1)
return numSubminiBatches;
if (numSubminibatches > 1) // user-specified maximum number of samples
return numSubminibatches;

if (maxSamplesInRAM < SIZE_MAX)
{
// into how many pieces would we need to break the minibatch?
// TODO: The following calculation relies on the ill-devised definition of "minibatch" of the current truncated BPTT implementation. Adapt this once fixed.
size_t numParallelSequences = dataReader->GetNumParallelSequences();
size_t estimatedMBSize = tunedMBSize * numParallelSequences;
return (size_t)std::ceil((float)estimatedMBSize / maxSamplesInRAM);
return (estimatedMBSize + maxSamplesInRAM - 1) / maxSamplesInRAM;
}

// The return value of this method decides how many subminibatch needed for the training or
// eval process. The current process only starts the subminibatch loop when the calculated
// subminibatch number is larger than 1. So here returning 0 or 1 shares the same behavior.
// But the default value should still be 0 which means no subminibatch needed for this case.
return 0;
}

Expand Down
6 changes: 3 additions & 3 deletions Source/SGDLib/SGD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,

// set dropout rate for this epoch
ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropOutSeed);
net->SetBatchNormlizationNodesBelowEvalMode(false, criterionNodes[0]);
net->SetBatchNormalizationNodesBelowEvalMode(false, criterionNodes[0]);

// learning rate adjustment
if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None || i < m_learningRatesParam.size())
Expand Down Expand Up @@ -438,7 +438,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
timer.Stop();
double epochTime = timer.ElapsedSeconds();

net->SetBatchNormlizationNodesBelowEvalMode(true, criterionNodes[0]);
net->SetBatchNormalizationNodesBelowEvalMode(true, criterionNodes[0]);

if (m_useEvalCriterionControlLR && epochEvalErrors.size() > 0)
{
Expand Down Expand Up @@ -781,7 +781,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
// prepare for sub-minibatching
// Sub-minibatching is used if a single minibatch is too large to fit into GPU RAM.
DataReaderHelpers::SubminibatchDispatcher<ElemType> smbDispatcher;
size_t numSubminibatchesNeeded = DataReaderHelpers::GetNumSubminibatchNeeded(trainSetDataReader, m_maxSamplesInRAM, m_numSubminiBatches, tunedMBSize);
size_t numSubminibatchesNeeded = DataReaderHelpers::GetNumSubminibatchesNeeded(trainSetDataReader, m_maxSamplesInRAM, m_numSubminiBatches, tunedMBSize);
// this is non-trivial, we need a manager object to handle this
if (numSubminibatchesNeeded > 1)
smbDispatcher.Init(net, learnableNodes, criterionNodes, evaluationNodes);
Expand Down
1 change: 1 addition & 0 deletions Source/SGDLib/SimpleDistGradAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <future>
#include "GPUDataTransferer.h"
#include "TimerUtility.h"
#include "MatrixQuantizerImpl.h"

namespace Microsoft { namespace MSR { namespace CNTK {

Expand Down
61 changes: 27 additions & 34 deletions Source/SGDLib/SimpleEvaluator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include "ProgressTracing.h"
#include "DistGradHeader.h"
#include "IDistGradAggregator.h"
#include "MatrixQuantizerImpl.h"
#include "SimpleDistGradAggregator.h"

#include <vector>
Expand Down Expand Up @@ -108,78 +107,72 @@ class SimpleEvaluator
for (int i = 0; i < evalResults.size(); i++)
evalResultsLastMBs.push_back((ElemType) 0);

//TODO: we should add support for distributed reading
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
m_net->StartEvaluateMinibatchLoop(evalNodes);

std::vector<Matrix<ElemType>*> learnParamsGradients;
DataReaderHelpers::SubminibatchDispatcher<ElemType> smbDispatcher;
size_t numSubminibatchesNeeded = DataReaderHelpers::GetNumSubminibatchNeeded(dataReader, m_maxSamplesInRAM, m_numSubminiBatches, mbSize);
size_t numSubminibatchesNeeded = DataReaderHelpers::GetNumSubminibatchesNeeded(dataReader, m_maxSamplesInRAM, m_numSubminiBatches, mbSize);

// Passing in two empty node lists so the dispatcher can work for the evalNodes.
std::list<ComputationNodeBasePtr> learnableNodes;
std::vector<ComputationNodeBasePtr> criterionNodes;
if (numSubminibatchesNeeded > 1)
smbDispatcher.Init(m_net, learnableNodes, criterionNodes, evalNodes);

bool noMoreSamplesToProcess = false;
for (;;)
while (DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, m_parallelRun, inputMatrices, actualMBSize))
{
bool wasDataRead = DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, m_parallelRun, inputMatrices, actualMBSize);
if (!wasDataRead && (!m_parallelRun || noMoreSamplesToProcess))
break;

if (!wasDataRead)
actualMBSize = 0;

if (actualMBSize > 0)
size_t actualNumSubminibatches = numSubminibatchesNeeded <= 1 ? 1 : smbDispatcher.GetMinibatchIntoCache(*dataReader, *m_net, inputMatrices, numSubminibatchesNeeded);
for (size_t ismb = 0; ismb < actualNumSubminibatches; ismb++)
{
size_t actualNumSubminibatches = numSubminibatchesNeeded <= 1 ? 1 : smbDispatcher.GetMinibatchIntoCache(*dataReader, *m_net, inputMatrices, numSubminibatchesNeeded);
for (size_t ismb = 0; ismb < actualNumSubminibatches; ismb++)
if (actualNumSubminibatches > 1)
{
if (actualNumSubminibatches > 1)
{
smbDispatcher.GetSubMinibatchToNet(ismb); // get sub-minibatch from full-size one
}

ComputationNetwork::BumpEvalTimeStamp(featureNodes);
ComputationNetwork::BumpEvalTimeStamp(labelNodes);
smbDispatcher.GetSubMinibatchToNet(ismb); // get sub-minibatch from full-size one
}

m_net->ForwardProp(evalNodes);
ComputationNetwork::BumpEvalTimeStamp(featureNodes);
ComputationNetwork::BumpEvalTimeStamp(labelNodes);

// house-keeping for sub-minibatching
if (actualNumSubminibatches > 1)
smbDispatcher.DoneWithCurrentSubMinibatch(ismb); // page state out
} // end sub-minibatch loop
m_net->ForwardProp(evalNodes);

// house-keeping for sub-minibatching
if (actualNumSubminibatches > 1)
smbDispatcher.DoneWithCurrentMinibatch();
} // if (actualMBSize > 0)
smbDispatcher.DoneWithCurrentSubMinibatch(ismb); // page state out
} // end sub-minibatch loop

if (actualNumSubminibatches > 1)
smbDispatcher.DoneWithCurrentMinibatch();

size_t numSamplesWithLabel = wasDataRead ? m_net->GetNumSamplesWithLabel(actualMBSize) : 0;
size_t numSamplesWithLabel = m_net->GetNumSamplesWithLabel(actualMBSize);
size_t aggregateNumSamplesWithLabel = numSamplesWithLabel;
if (m_parallelRun)
{
if (m_gradHeader == nullptr)
{
m_gradHeader = DistGradHeader::Create(evalNodes.size());
m_distGradAgg = new SimpleDistGradAggregator<ElemType>(g_mpi, false, m_traceLevel);
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(g_mpi, false, m_traceLevel);
}

m_gradHeader->numEvalNode = evalNodes.size();
m_gradHeader->numSamples = actualMBSize;
m_gradHeader->numSamplesWithLabel = numSamplesWithLabel;
m_gradHeader->criterion = 0.0;
for (size_t i = 0; i < evalNodes.size(); i++)
m_gradHeader->evalErrors[i] = actualMBSize > 0 ? evalNodes[i]->Get00Element() : 0.0;
m_gradHeader->evalErrors[i] = evalNodes[i]->Get00Element();

// TODO: We are reusing the aggregation logic inside SimpleDistGradAggregator, which has a heavy dependency
// on the gradient matrix. At some point we should refacotr the aggregator class to be able to only calculating
// eval results and then remove this hack.
if (learnParamsGradients.size() == 0)
{
Matrix<ElemType>* matrix = new Matrix<ElemType>((DEVICEID_TYPE)m_net->GetDeviceId());
learnParamsGradients.push_back(matrix);
}

bool samplesProcessed = m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader, 0);
noMoreSamplesToProcess = !samplesProcessed;
// Using SimpleDistAggregator for eval results only. At some point we should rename the class to be just
// IDistAggregator and SimpleDistAggregator.
m_distGradAgg->AggregateGradients(learnParamsGradients, m_gradHeader, 0);
aggregateNumSamplesWithLabel = m_gradHeader->numSamplesWithLabel;
for (size_t i = 0; i < evalResults.size(); i++)
evalResults[i] += m_gradHeader->evalErrors[i];
Expand Down Expand Up @@ -294,7 +287,7 @@ class SimpleEvaluator
size_t m_numSubminiBatches;
bool m_parallelRun;

IDistGradAggregator<ElemType>* m_distGradAgg;
shared_ptr<IDistGradAggregator<ElemType>> m_distGradAgg;
struct DistGradHeader* m_gradHeader;
int m_traceLevel;
void operator=(const SimpleEvaluator&); // (not assignable)
Expand Down

0 comments on commit b2b6691

Please sign in to comment.