Skip to content

Commit

Permalink
minor: swapped order of two functions in DataReaderHelpers.h
Browse files Browse the repository at this point in the history
  • Loading branch information
frankseide committed Nov 30, 2015
1 parent 98d5016 commit 1065cdc
Showing 1 changed file with 62 additions and 62 deletions.
124 changes: 62 additions & 62 deletions MachineLearning/CNTKSGDLib/DataReaderHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,68 +14,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {

/*static*/ struct DataReaderHelpers
{
// -------------------------------------------------------------------
// DecimateMinibatch - decimate minibatch for parallelization
// -------------------------------------------------------------------

// We sub-sample the parallel utterances.
template<class ElemType>
static void DecimateMinibatch(std::map<std::wstring, Matrix<ElemType>*> &mb, // matrix to be decimated
int numprocs, int rank, // rank info
MBLayoutPtr pMBLayout) // gets decimated as well
{
if (numprocs == 1)
return;

// For RNN, a input Matrix is organized in the following way:
// | x_t^1 x_t^2 ... x_t^N | ... | x_{t+T-1}^1 ... x_{t+T-1}^N |
// |<--- micro-batch 1 --->| ... |<------ micro-batch T ------>|
// N is the number of parallel sequences.
// The decimation here is to split each block to individual GPUs
// Decimation will select a sub-range st..en of parallel sequences from each micro-batch:
// | x_t^{st} ... x_t^{en}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en} |
// This function will update the MB data and also the MBLayout.

size_t nOrigParallelUtts = pMBLayout->GetNumParallelSequences();
size_t T = pMBLayout->GetNumTimeSteps();

// decide new parallel utterances
size_t sent_start = nOrigParallelUtts * (size_t)rank / numprocs;
size_t sent_end = nOrigParallelUtts * (size_t)(rank + 1) / numprocs;
size_t newNumParallelSequences = sent_end - sent_start;

// decimate data
size_t rv = 0;
for (auto & it : mb)
{
MSR::CNTK::Matrix<ElemType> &mat = *it.second;
size_t nCols = mat.GetNumCols();

// assert the cols are even among nodes
if (rv == 0)
rv = nCols;
else if (rv != nCols)
LogicError("DecimateMinibatch: Inconsistent number of columns among inputs (found %d and %d).", (int)rv, (int)nCols);

if (T != nCols / nOrigParallelUtts)
LogicError("ERROR: MBLayout borked, GetNumTimeSteps() mismatches minibatch number of columns\n");
if (T * nOrigParallelUtts != nCols) // (should really not happen)
LogicError("ERROR: minibatch size %d, but with %d parallel utterances --layout information borked\n", (int)nCols, (int)nOrigParallelUtts);

// copy the respective columns
// TODO: not efficient. We should use a special row-slice assignment function that allows overlapping input/output.
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), newNumParallelSequences*T, mat.GetPreferredDeviceId(), mat.GetMatrixType());
for (size_t t = 0; t < T; t++)
tmp.SetColumnSlice(mat.ColumnSlice(t*nOrigParallelUtts + sent_start, newNumParallelSequences), t*newNumParallelSequences, newNumParallelSequences);
mat.SetValue(tmp); // update matrix in-place (new matrix has less parallel streams)
}
// decimate layout
auto pNewMBLayout = make_shared<MBLayout>(newNumParallelSequences, T, true);
for (size_t t = 0; t < T; t++) for (size_t id = 0; id < newNumParallelSequences; id++)
pNewMBLayout->Set(id, t, pMBLayout->Get(id + sent_start, t));
pMBLayout->MoveFrom(pNewMBLayout); // update layout in-place
}

// -------------------------------------------------------------------
// GetMinibatchIntoNetwork() -- get one minibatch from Reader (this->trainSetDataReader) into Network (this->net)
// Returns false if end of epoch has been reached.
Expand Down Expand Up @@ -164,6 +102,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {

return true;
}

// -------------------------------------------------------------------
// DecimateMinibatch - decimate minibatch for parallelization
// -------------------------------------------------------------------

// We sub-sample the parallel utterances.
template<class ElemType>
static void DecimateMinibatch(std::map<std::wstring, Matrix<ElemType>*> &mb, // matrix to be decimated
int numprocs, int rank, // rank info
MBLayoutPtr pMBLayout) // gets decimated as well
{
if (numprocs == 1)
return;

// For RNN, a input Matrix is organized in the following way:
// | x_t^1 x_t^2 ... x_t^N | ... | x_{t+T-1}^1 ... x_{t+T-1}^N |
// |<--- micro-batch 1 --->| ... |<------ micro-batch T ------>|
// N is the number of parallel sequences.
// The decimation here is to split each block to individual GPUs
// Decimation will select a sub-range st..en of parallel sequences from each micro-batch:
// | x_t^{st} ... x_t^{en}| .... | x_{t+T-1}^{st} ... x_{t+T-1}^{en} |
// This function will update the MB data and also the MBLayout.

size_t nOrigParallelUtts = pMBLayout->GetNumParallelSequences();
size_t T = pMBLayout->GetNumTimeSteps();

// decide new parallel utterances
size_t sent_start = nOrigParallelUtts * (size_t)rank / numprocs;
size_t sent_end = nOrigParallelUtts * (size_t)(rank + 1) / numprocs;
size_t newNumParallelSequences = sent_end - sent_start;

// decimate data
size_t rv = 0;
for (auto & it : mb)
{
MSR::CNTK::Matrix<ElemType> &mat = *it.second;
size_t nCols = mat.GetNumCols();

// assert the cols are even among nodes
if (rv == 0)
rv = nCols;
else if (rv != nCols)
LogicError("DecimateMinibatch: Inconsistent number of columns among inputs (found %d and %d).", (int)rv, (int)nCols);

if (T != nCols / nOrigParallelUtts)
LogicError("ERROR: MBLayout borked, GetNumTimeSteps() mismatches minibatch number of columns\n");
if (T * nOrigParallelUtts != nCols) // (should really not happen)
LogicError("ERROR: minibatch size %d, but with %d parallel utterances --layout information borked\n", (int)nCols, (int)nOrigParallelUtts);

// copy the respective columns
// TODO: not efficient. We should use a special row-slice assignment function that allows overlapping input/output.
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), newNumParallelSequences*T, mat.GetPreferredDeviceId(), mat.GetMatrixType());
for (size_t t = 0; t < T; t++)
tmp.SetColumnSlice(mat.ColumnSlice(t*nOrigParallelUtts + sent_start, newNumParallelSequences), t*newNumParallelSequences, newNumParallelSequences);
mat.SetValue(tmp); // update matrix in-place (new matrix has less parallel streams)
}
// decimate layout
auto pNewMBLayout = make_shared<MBLayout>(newNumParallelSequences, T, true);
for (size_t t = 0; t < T; t++) for (size_t id = 0; id < newNumParallelSequences; id++)
pNewMBLayout->Set(id, t, pMBLayout->Get(id + sent_start, t));
pMBLayout->MoveFrom(pNewMBLayout); // update layout in-place
}
};

}}}

0 comments on commit 1065cdc

Please sign in to comment.