Skip to content

Commit

Permalink
minor changes based on code review
Browse files Browse the repository at this point in the history
  • Loading branch information
mlseltzer committed Jul 20, 2015
1 parent 45e79ad commit 9bb8d4d
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 61 deletions.
44 changes: 21 additions & 23 deletions DataReader/HTKMLFReader/utterancesourcemulti.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,30 +313,28 @@ class minibatchutterancesourcemulti : public minibatchsource
LogicError("lattices not supported in utterancereadermulti");
}

allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
featdim = std::vector<size_t>(infiles.size(), 0);
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
featkind = std::vector<string>(infiles.size(), "");

numclasses = std::vector<size_t>(labels.size(), 0);
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());

foreach_index (i, labels)
{
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
numclasses.push_back(0);
counts.push_back(std::vector<size_t>());
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
//std::unordered_map<std::string,size_t> modelsymmap;
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));

}

// m is index for feature stream
// i is index for files within a stream (items in SCP file)
foreach_index(m, infiles){
allchunks.push_back(std::vector<utterancechunkdata>());
featdim.push_back(0); // initialize
sampperiod.push_back(0);
featkind.push_back("");
}


// first check consistency across feature streams
// We'll go through the SCP files for each stream to make sure the duration is consistent
// If not, we'll plan to ignore the utterance, and inform the user
// m indexes the feature stream
// i indexes the files within a stream, i.e. in the SCP file)
foreach_index(m, infiles){
if (m == 0){
numutts = infiles[m].size();
Expand All @@ -354,7 +352,7 @@ class minibatchutterancesourcemulti : public minibatchsource
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
if (uttframes > frameref::maxframesperutterance)
{
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str());
uttduration[i] = 0;
uttisvalid[i] = false;
}
Expand All @@ -364,7 +362,7 @@ class minibatchutterancesourcemulti : public minibatchsource
uttisvalid[i] = true;
}
else if (uttduration[i] != uttframes){
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)", i, uttduration[i], uttframes);
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes);
uttduration[i] = 0;
uttisvalid[i] = false;
}
Expand All @@ -377,21 +375,21 @@ class minibatchutterancesourcemulti : public minibatchsource
invalidutts++;
}
if (invalidutts > uttisvalid.size() / 2)
throw std::runtime_error("minibatchutterancesource: too many files not found in with inconsistent durations, assuming broken configuration\n");
throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n");
else if (invalidutts>0)
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files.", invalidutts, uttisvalid.size());
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size());


// now process the features and labels
size_t utterancesetsize = 0;
foreach_index (m, infiles)
{
utteranceset.clear();
if (m==0)
numutts = infiles[m].size();
else
if (infiles[m].size()!=numutts)
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
//if (m==0)
// numutts = infiles[m].size();
//else
// if (infiles[m].size()!=numutts)
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
if (m==0)
classidsbegin.clear();

Expand Down Expand Up @@ -505,7 +503,7 @@ class minibatchutterancesourcemulti : public minibatchsource
_totalframes += uttframes;
}
}
else if (uttisvalid[i])
else
{
utteranceset.push_back(std::move(utterance));
}
Expand Down
37 changes: 16 additions & 21 deletions DataReader/HTKMLFReader_linux/utterancesourcemulti.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,26 +335,21 @@ class minibatchutterancesourcemulti : public minibatchsource
LogicError("lattices not supported in utterancereadermulti");
}

allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
featdim = std::vector<size_t>(infiles.size(), 0);
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
featkind = std::vector<string>(infiles.size(), "");
numclasses = std::vector<size_t>(labels.size(), 0);
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());
foreach_index (i, labels)
{
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
numclasses.push_back(0);
counts.push_back(std::vector<size_t>());
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
//std::unordered_map<std::string,size_t> modelsymmap;
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));

}

// m is index for feature stream
// i is index for files within a stream (items in SCP file)
foreach_index(m, infiles){
allchunks.push_back(std::vector<utterancechunkdata>());
featdim.push_back(0); // initialize
sampperiod.push_back(0);
featkind.push_back("");
}

// first check consistency across feature streams
// We'll go through the SCP files for each stream to make sure the duration is consistent
Expand All @@ -376,7 +371,7 @@ class minibatchutterancesourcemulti : public minibatchsource
throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported");
if (uttframes > frameref::maxframesperutterance)
{
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str());
fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str());
uttduration[i] = 0;
uttisvalid[i] = false;
}
Expand All @@ -386,7 +381,7 @@ class minibatchutterancesourcemulti : public minibatchsource
uttisvalid[i] = true;
}
else if (uttduration[i] != uttframes){
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)", i, uttduration[i], uttframes);
fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes);
uttduration[i] = 0;
uttisvalid[i] = false;
}
Expand All @@ -399,21 +394,21 @@ class minibatchutterancesourcemulti : public minibatchsource
invalidutts++;
}
if (invalidutts > uttisvalid.size() / 2)
throw std::runtime_error("minibatchutterancesource: too many files not found in with inconsistent durations, assuming broken configuration\n");
throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n");
else if (invalidutts>0)
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files.", invalidutts, uttisvalid.size());
fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size());


// now process the features and labels
size_t utterancesetsize = 0;
foreach_index (m, infiles)
{
utteranceset.clear();
if (m==0)
numutts = infiles[m].size();
else
if (infiles[m].size()!=numutts)
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
//if (m==0)
// numutts = infiles[m].size();
//else
// if (infiles[m].size()!=numutts)
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
if (m==0)
classidsbegin.clear();

Expand Down Expand Up @@ -528,7 +523,7 @@ class minibatchutterancesourcemulti : public minibatchsource
_totalframes += uttframes;
}
}
else if (uttisvalid[i])
else
{
utteranceset.push_back(std::move(utterance));
}
Expand Down
32 changes: 15 additions & 17 deletions DataReader/Kaldi2Reader/utterancesourcemulti.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,30 +328,28 @@ class minibatchutterancesourcemulti : public minibatchsource
LogicError("lattices not supported in utterancereadermulti");
}

allchunks = std::vector<std::vector<utterancechunkdata>>(infiles.size(), std::vector<utterancechunkdata>());
featdim = std::vector<size_t>(infiles.size(), 0);
sampperiod = std::vector<unsigned int>(infiles.size(), 0);
featkind = std::vector<string>(infiles.size(), "");

numclasses = std::vector<size_t>(labels.size(), 0);
counts = std::vector<std::vector<size_t>>(labels.size(), std::vector<size_t>());
foreach_index (i, labels)
{
//classids.push_back(biggrowablevector<CLASSIDTYPE>());
classids.push_back(unique_ptr<biggrowablevector<CLASSIDTYPE>>(new biggrowablevector<CLASSIDTYPE>()));
numclasses.push_back(0);
counts.push_back(std::vector<size_t>());
//std::pair<std::vector<wstring>,std::vector<wstring>> latticetocs;
//std::unordered_map<std::string,size_t> modelsymmap;
//lattices.push_back(shared_ptr<latticesource>(new latticesource(latticetocs, modelsymmap)));

}

// m is index for feature stream
// i is index for files within a stream (items in SCP file)
foreach_index(m, infiles){
allchunks.push_back(std::vector<utterancechunkdata>());
featdim.push_back(0); // initialize
sampperiod.push_back(0);
featkind.push_back("");
}

// first check consistency across feature streams
// We'll go through the SCP files for each stream to make sure the duration is consistent
// If not, we'll plan to ignore the utterance, and inform the user
// m indexes the feature stream
// i indexes the files within a stream, i.e. in the SCP file)
foreach_index(m, infiles){
if (m == 0){
numutts = infiles[m].size();
Expand Down Expand Up @@ -402,11 +400,11 @@ class minibatchutterancesourcemulti : public minibatchsource
foreach_index (m, infiles)
{
utteranceset.clear();
if (m==0)
numutts = infiles[m].size();
else
if (infiles[m].size()!=numutts)
throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances");
//if (m==0)
// numutts = infiles[m].size();
//else
// if (infiles[m].size()!=numutts)
// throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n");
if (m==0)
classidsbegin.clear();

Expand Down Expand Up @@ -522,7 +520,7 @@ class minibatchutterancesourcemulti : public minibatchsource
_totalframes += uttframes;
}
}
else if (uttisvalid[i])
else
{
utteranceset.push_back(std::move(utterance));
}
Expand Down

0 comments on commit 9bb8d4d

Please sign in to comment.