diff --git a/DataReader/HTKMLFReader/utterancesourcemulti.h b/DataReader/HTKMLFReader/utterancesourcemulti.h index f0bfa54907bc..96717db1625c 100644 --- a/DataReader/HTKMLFReader/utterancesourcemulti.h +++ b/DataReader/HTKMLFReader/utterancesourcemulti.h @@ -313,30 +313,28 @@ class minibatchutterancesourcemulti : public minibatchsource LogicError("lattices not supported in utterancereadermulti"); } + allchunks = std::vector>(infiles.size(), std::vector()); + featdim = std::vector(infiles.size(), 0); + sampperiod = std::vector(infiles.size(), 0); + featkind = std::vector(infiles.size(), ""); + + numclasses = std::vector(labels.size(), 0); + counts = std::vector>(labels.size(), std::vector()); + foreach_index (i, labels) { - //classids.push_back(biggrowablevector()); classids.push_back(unique_ptr>(new biggrowablevector())); - numclasses.push_back(0); - counts.push_back(std::vector()); //std::pair,std::vector> latticetocs; //std::unordered_map modelsymmap; //lattices.push_back(shared_ptr(new latticesource(latticetocs, modelsymmap))); - - } - - // m is index for feature stream - // i is index for files within a stream (items in SCP file) - foreach_index(m, infiles){ - allchunks.push_back(std::vector()); - featdim.push_back(0); // initialize - sampperiod.push_back(0); - featkind.push_back(""); } + // first check consistency across feature streams // We'll go through the SCP files for each stream to make sure the duration is consistent // If not, we'll plan to ignore the utterance, and inform the user + // m indexes the feature stream + // i indexes the files within a stream, i.e. in the SCP file) foreach_index(m, infiles){ if (m == 0){ numutts = infiles[m].size(); @@ -354,7 +352,7 @@ class minibatchutterancesourcemulti : public minibatchsource throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported"); if (uttframes > frameref::maxframesperutterance) { - fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str()); + fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str()); uttduration[i] = 0; uttisvalid[i] = false; } @@ -364,7 +362,7 @@ class minibatchutterancesourcemulti : public minibatchsource uttisvalid[i] = true; } else if (uttduration[i] != uttframes){ - fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)", i, uttduration[i], uttframes); + fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes); uttduration[i] = 0; uttisvalid[i] = false; } @@ -377,9 +375,9 @@ class minibatchutterancesourcemulti : public minibatchsource invalidutts++; } if (invalidutts > uttisvalid.size() / 2) - throw std::runtime_error("minibatchutterancesource: too many files not found in with inconsistent durations, assuming broken configuration\n"); + throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n"); else if (invalidutts>0) - fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files.", invalidutts, uttisvalid.size()); + fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size()); // now process the features and labels @@ -387,11 +385,11 @@ class minibatchutterancesourcemulti : public minibatchsource foreach_index (m, infiles) { utteranceset.clear(); - if (m==0) - numutts = infiles[m].size(); - else - if (infiles[m].size()!=numutts) - throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances"); + //if (m==0) + // numutts = infiles[m].size(); + //else + // if (infiles[m].size()!=numutts) + // throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n"); if (m==0) classidsbegin.clear(); @@ -505,7 +503,7 @@ class minibatchutterancesourcemulti : public minibatchsource _totalframes += uttframes; } } - else if (uttisvalid[i]) + else { utteranceset.push_back(std::move(utterance)); } diff --git a/DataReader/HTKMLFReader_linux/utterancesourcemulti.h b/DataReader/HTKMLFReader_linux/utterancesourcemulti.h index 677730d28827..133c3ace4708 100644 --- a/DataReader/HTKMLFReader_linux/utterancesourcemulti.h +++ b/DataReader/HTKMLFReader_linux/utterancesourcemulti.h @@ -335,26 +335,21 @@ class minibatchutterancesourcemulti : public minibatchsource LogicError("lattices not supported in utterancereadermulti"); } + allchunks = std::vector>(infiles.size(), std::vector()); + featdim = std::vector(infiles.size(), 0); + sampperiod = std::vector(infiles.size(), 0); + featkind = std::vector(infiles.size(), ""); + numclasses = std::vector(labels.size(), 0); + counts = std::vector>(labels.size(), std::vector()); foreach_index (i, labels) { - //classids.push_back(biggrowablevector()); classids.push_back(unique_ptr>(new biggrowablevector())); - numclasses.push_back(0); - counts.push_back(std::vector()); //std::pair,std::vector> latticetocs; //std::unordered_map modelsymmap; //lattices.push_back(shared_ptr(new latticesource(latticetocs, modelsymmap))); } - // m is index for feature stream - // i is index for files within a stream (items in SCP file) - foreach_index(m, infiles){ - allchunks.push_back(std::vector()); - featdim.push_back(0); // initialize - sampperiod.push_back(0); - featkind.push_back(""); - } // first check consistency across feature streams // We'll go through the SCP files for each stream to make sure the duration is consistent @@ -376,7 +371,7 @@ class minibatchutterancesourcemulti : public minibatchsource throw std::runtime_error("minibatchutterancesource: utterances < 2 frames not supported"); if (uttframes > frameref::maxframesperutterance) { - fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S", i, uttframes, frameref::maxframesperutterance, key.c_str()); + fprintf(stderr, "minibatchutterancesource: skipping %d-th file (%d frames) because it exceeds max. frames (%d) for frameref bit field: %S\n", i, uttframes, frameref::maxframesperutterance, key.c_str()); uttduration[i] = 0; uttisvalid[i] = false; } @@ -386,7 +381,7 @@ class minibatchutterancesourcemulti : public minibatchsource uttisvalid[i] = true; } else if (uttduration[i] != uttframes){ - fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)", i, uttduration[i], uttframes); + fprintf(stderr, "minibatchutterancesource: skipping %d-th file due to inconsistency in duration in different feature streams (%d vs %d frames)\n", i, uttduration[i], uttframes); uttduration[i] = 0; uttisvalid[i] = false; } @@ -399,9 +394,9 @@ class minibatchutterancesourcemulti : public minibatchsource invalidutts++; } if (invalidutts > uttisvalid.size() / 2) - throw std::runtime_error("minibatchutterancesource: too many files not found in with inconsistent durations, assuming broken configuration\n"); + throw std::runtime_error("minibatchutterancesource: too many files with inconsistent durations, assuming broken configuration\n"); else if (invalidutts>0) - fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files.", invalidutts, uttisvalid.size()); + fprintf(stderr, "Found inconsistent durations across feature streams in %d out of %d files\n", invalidutts, uttisvalid.size()); // now process the features and labels @@ -409,11 +404,11 @@ class minibatchutterancesourcemulti : public minibatchsource foreach_index (m, infiles) { utteranceset.clear(); - if (m==0) - numutts = infiles[m].size(); - else - if (infiles[m].size()!=numutts) - throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances"); + //if (m==0) + // numutts = infiles[m].size(); + //else + // if (infiles[m].size()!=numutts) + // throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n"); if (m==0) classidsbegin.clear(); @@ -528,7 +523,7 @@ class minibatchutterancesourcemulti : public minibatchsource _totalframes += uttframes; } } - else if (uttisvalid[i]) + else { utteranceset.push_back(std::move(utterance)); } diff --git a/DataReader/Kaldi2Reader/utterancesourcemulti.h b/DataReader/Kaldi2Reader/utterancesourcemulti.h index 1652290be980..45d00d39aef9 100644 --- a/DataReader/Kaldi2Reader/utterancesourcemulti.h +++ b/DataReader/Kaldi2Reader/utterancesourcemulti.h @@ -328,30 +328,28 @@ class minibatchutterancesourcemulti : public minibatchsource LogicError("lattices not supported in utterancereadermulti"); } + allchunks = std::vector>(infiles.size(), std::vector()); + featdim = std::vector(infiles.size(), 0); + sampperiod = std::vector(infiles.size(), 0); + featkind = std::vector(infiles.size(), ""); + + numclasses = std::vector(labels.size(), 0); + counts = std::vector>(labels.size(), std::vector()); foreach_index (i, labels) { - //classids.push_back(biggrowablevector()); classids.push_back(unique_ptr>(new biggrowablevector())); - numclasses.push_back(0); - counts.push_back(std::vector()); //std::pair,std::vector> latticetocs; //std::unordered_map modelsymmap; //lattices.push_back(shared_ptr(new latticesource(latticetocs, modelsymmap))); } - // m is index for feature stream - // i is index for files within a stream (items in SCP file) - foreach_index(m, infiles){ - allchunks.push_back(std::vector()); - featdim.push_back(0); // initialize - sampperiod.push_back(0); - featkind.push_back(""); - } // first check consistency across feature streams // We'll go through the SCP files for each stream to make sure the duration is consistent // If not, we'll plan to ignore the utterance, and inform the user + // m indexes the feature stream + // i indexes the files within a stream, i.e. in the SCP file) foreach_index(m, infiles){ if (m == 0){ numutts = infiles[m].size(); @@ -402,11 +400,11 @@ class minibatchutterancesourcemulti : public minibatchsource foreach_index (m, infiles) { utteranceset.clear(); - if (m==0) - numutts = infiles[m].size(); - else - if (infiles[m].size()!=numutts) - throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances"); + //if (m==0) + // numutts = infiles[m].size(); + //else + // if (infiles[m].size()!=numutts) + // throw std::runtime_error("minibatchutterancesourcemulti: all feature files must have same number of utterances\n"); if (m==0) classidsbegin.clear(); @@ -522,7 +520,7 @@ class minibatchutterancesourcemulti : public minibatchsource _totalframes += uttframes; } } - else if (uttisvalid[i]) + else { utteranceset.push_back(std::move(utterance)); }