Skip to content

Commit

Permalink
Merge branch 'master' of https://git01.codeplex.com/cntk
Browse files Browse the repository at this point in the history
  • Loading branch information
kaisheng committed Jul 28, 2015
2 parents 7bfcb0d + 779c6d8 commit 4ce1621
Show file tree
Hide file tree
Showing 23 changed files with 443 additions and 286 deletions.
2 changes: 1 addition & 1 deletion CNTK.sln
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CheckInSuites", "CheckInSui
{E6F26F9A-FF64-4F0A-B749-CD309EE357EE} = {E6F26F9A-FF64-4F0A-B749-CD309EE357EE}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NDRMReader", "DataReader\NDRMReader\NDRMReader.vcxproj", "{CE429AA2-3778-4619-8FD1-49BA3B81197B}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SparsePCReader", "DataReader\SparsePCReader\SparsePCReader.vcxproj", "{CE429AA2-3778-4619-8FD1-49BA3B81197B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
4 changes: 2 additions & 2 deletions Common/DataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ void DataReader<ElemType>::GetDataReader(const ConfigParameters& config)
string randomizeString = config("randomize");
if (randomizeString == "None")
{
mDoRandomize = false;
this->mDoRandomize = false;
}
else if (randomizeString == "Auto")
{
mDoRandomize = true;
this->mDoRandomize = true;
}
}

Expand Down
7 changes: 5 additions & 2 deletions DataReader/Kaldi2Reader/HTKMLFReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1771,8 +1771,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void HTKMLFReader<ElemType>::SetSentenceSegBatch(Matrix<ElemType> &sentenceBegin, vector<MinibatchPackingFlag>& minibatchPackingFlag)
{
sentenceBegin.SetValue(m_sentenceBegin);
minibatchPackingFlag = m_minibatchPackingFlag;
if (!m_framemode)
{
sentenceBegin.SetValue(m_sentenceBegin);
minibatchPackingFlag = m_minibatchPackingFlag;
}
}

// For Kaldi2Reader, we now make the following assumptions
Expand Down
2 changes: 1 addition & 1 deletion DataReader/Kaldi2Reader/rollingwindowsource.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ namespace msra { namespace dbn {
{
msra::util::attempt (5, [&]()
{
reader.readNoAlloc (ppath, featkind, sampperiod, feat); // whole file read as columns of feature vectors
reader.readAlloc (ppath, featkind, sampperiod, feat); // whole file read as columns of feature vectors
});
if (featdim == 0) // first time
featdim = feat.rows();
Expand Down
28 changes: 20 additions & 8 deletions DataReader/Kaldi2Reader/utterancesourcemulti.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class minibatchutterancesourcemulti : public minibatchsource
}
// page in data for this chunk
// We pass in the feature info variables by ref which will be filled lazily upon first read
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource) const
void requiredata (string & featkind, size_t & featdim, unsigned int & sampperiod, const latticesource & latticesource, int verbosity=0) const
{

if (numutterances() == 0)
Expand Down Expand Up @@ -142,7 +142,10 @@ class minibatchutterancesourcemulti : public minibatchsource
latticesource.getlattices (utteranceset[i].key(), lattices[i], uttframes.cols());
}
//fprintf (stderr, "\n");
fprintf (stderr, "requiredata: %zu utterances read\n", utteranceset.size());
if (verbosity)
{
fprintf (stderr, "requiredata: %zu utterances read\n", utteranceset.size());
}
}
catch (...)
{
Expand Down Expand Up @@ -360,12 +363,17 @@ class minibatchutterancesourcemulti : public minibatchsource
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");

foreach_index(i, infiles[m]){
utterancedesc utterance(msra::asr::htkfeatreader::parsedpath(infiles[m][i],featuresections[m]), 0); //mseltzer - is this foolproof for multiio? is classids always non-empty?
utterancedesc utterance(msra::asr::htkfeatreader::parsedpath(infiles[m][i],featuresections[m]), 0); //mseltzer - is this foolproof for multiio? is classids always non-empty?
const size_t uttframes = utterance.numframes(); // will throw if frame bounds not given --required to be given in this mode
// we need at least 2 frames for boundary markers to work
if (uttframes < 2)
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
if (uttframes > frameref::maxframesperutterance)
{
//throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it less than. frames (%d) for frameref bit field: %S", i, uttframes, 2, key.c_str());
uttduration[i] = 0;
uttisvalid[i] = false;
}
if (uttframes > frameref::maxframesperutterance || uttframes <2)
{
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
uttduration[i] = 0;
Expand Down Expand Up @@ -672,7 +680,8 @@ class minibatchutterancesourcemulti : public minibatchsource
return sweep;

currentsweep = sweep;
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");
if (verbosity>0)
fprintf (stderr, "lazyrandomization: re-randomizing for sweep %zu in %s mode\n", currentsweep, framemode ? "frame" : "utterance");

const size_t sweepts = sweep * _totalframes; // first global frame index for this sweep

Expand Down Expand Up @@ -984,6 +993,7 @@ class minibatchutterancesourcemulti : public minibatchsource
auto & chunkdata = randomizedchunks[m][k].getchunkdata();
if (chunkdata.isinram())
{
if (verbosity)
fprintf (stderr, "releaserandomizedchunk: paging out randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n",
k, randomizedchunks[m][k].globalts, randomizedchunks[m][k].globalte()-1, chunksinram-1);
chunkdata.releasedata();
Expand Down Expand Up @@ -1029,10 +1039,11 @@ class minibatchutterancesourcemulti : public minibatchsource
{
auto & chunk = randomizedchunks[m][chunkindex];
auto & chunkdata = chunk.getchunkdata();
if (verbosity)
fprintf (stderr, "feature set %d: requirerandomizedchunk: paging in randomized chunk %zu (frame range [%zu..%zu]), %zu resident in RAM\n", m, chunkindex, chunk.globalts, chunk.globalte()-1, chunksinram+1);
msra::util::attempt (5, [&]() // (reading from network)
{
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices);
chunkdata.requiredata (featkind[m], featdim[m], sampperiod[m], this->lattices, verbosity);
});
}
chunksinram++;
Expand Down Expand Up @@ -1228,7 +1239,8 @@ class minibatchutterancesourcemulti : public minibatchsource
const size_t lastchunk = chunkforframepos (globalte-1);
const size_t windowbegin = randomizedchunks[0][firstchunk].windowbegin;
const size_t windowend = randomizedchunks[0][lastchunk].windowend;
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
if (verbosity)
fprintf (stderr, "getbatch: getting randomized frames [%zu..%zu] (%zu frames out of %zu requested) in sweep %zu; chunks [%zu..%zu] -> chunk window [%zu..%zu)\n",
globalts, globalte, mbframes, framesrequested, sweep, firstchunk, lastchunk, windowbegin, windowend);
// release all data outside, and page in all data inside
for (size_t k = 0; k < windowbegin; k++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
#include "stdafx.h"
#define DATAREADER_EXPORTS
#include "DataReader.h"
#include "NDRMReader.h"
#include "SparsePCReader.h"

namespace Microsoft { namespace MSR { namespace CNTK {

template<class ElemType>
void DATAREADER_API GetReader(IDataReader<ElemType>** preader)
{
*preader = new NDRMReader<ElemType>();
*preader = new SparsePCReader<ElemType>();
}

extern "C" DATAREADER_API void GetReaderF(IDataReader<float>** preader)
Expand Down
Loading

0 comments on commit 4ce1621

Please sign in to comment.