Skip to content

Commit

Permalink
Addressing code review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
eldakms committed Mar 21, 2016
1 parent 23a59b4 commit c1154fb
Show file tree
Hide file tree
Showing 16 changed files with 79 additions and 78 deletions.
8 changes: 4 additions & 4 deletions Source/Readers/CNTKTextFormatReader/Indexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ void Indexer::AddSequence(SequenceDescriptor& sd)
{
m_chunks.push_back({});
chunk = &m_chunks.back();
chunk->id = m_chunks.size() - 1;
chunk->m_id = m_chunks.size() - 1;
}
chunk->m_byteSize += sd.m_byteSize;
chunk->numberOfSequences++;
chunk->numberOfSamples += sd.m_numberOfSamples;
sd.m_chunkId = chunk->id;
chunk->m_numberOfSequences++;
chunk->m_numberOfSamples += sd.m_numberOfSamples;
sd.m_chunkId = chunk->m_id;
chunk->m_sequences.push_back(sd);
}

Expand Down
10 changes: 5 additions & 5 deletions Source/Readers/CNTKTextFormatReader/TextParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ ChunkDescriptions TextParser<ElemType>::GetChunkDescriptions()
{
result.push_back(shared_ptr<ChunkDescription>(
new ChunkDescription {
chunk.id,
chunk.numberOfSamples,
chunk.numberOfSequences
chunk.m_id,
chunk.m_numberOfSamples,
chunk.m_numberOfSequences
}));
}

Expand Down Expand Up @@ -207,9 +207,9 @@ void TextParser<ElemType>::GetSequencesForChunk(size_t chunkId, std::vector<Sequ
template <class ElemType>
TextParser<ElemType>::TextDataChunk::TextDataChunk(const ChunkDescriptor& descriptor)
{
m_id = descriptor.id;
m_id = descriptor.m_id;
m_sequenceRequestCount = 0;
m_sequences.reserve(descriptor.numberOfSequences);
m_sequences.reserve(descriptor.m_numberOfSequences);
}

template <class ElemType>
Expand Down
21 changes: 11 additions & 10 deletions Source/Readers/ExperimentalHTKMLFReader/HTKChunkDescription.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Class represents a description of an HTK chunk.
// It is only used internally by the HTK deserializer.
// Can exist without associated data and provides methods for requiring/releasing chunk data.
// TODO: We should consider splitting data load from the description in the future versions.
class HTKChunkDescription
{
// All utterances in the chunk.
std::vector<UtteranceDescription> m_utteranceSet;
std::vector<UtteranceDescription> m_utterances;

// Stores all frames of the chunk consecutively (mutable since this is a cache).
mutable msra::dbn::matrix m_frames;
Expand All @@ -38,7 +39,7 @@ class HTKChunkDescription
// Gets number of utterances in the chunk.
size_t GetNumberOfUtterances() const
{
return m_utteranceSet.size();
return m_utterances.size();
}

// Adds an utterance to the chunk.
Expand All @@ -51,7 +52,7 @@ class HTKChunkDescription

m_firstFrames.push_back(m_totalFrames);
m_totalFrames += utterance.GetNumberOfFrames();
m_utteranceSet.push_back(std::move(utterance));
m_utterances.push_back(std::move(utterance));
}

// Gets total number of frames in the chunk.
Expand All @@ -63,19 +64,19 @@ class HTKChunkDescription
// Get utterance description by its index.
const UtteranceDescription* GetUtterance(size_t index) const
{
return &m_utteranceSet[index];
return &m_utterances[index];
}

// Get utterance by the absolute frame index in chunk.
// Uses the upper bound to do the binary search among sequences of the chunk.
size_t GetUtteranceForChunkFrameIndex(size_t frameIndex) const
{
auto result = std::upper_bound(
m_utteranceSet.begin(),
m_utteranceSet.end(),
m_utterances.begin(),
m_utterances.end(),
frameIndex,
[](size_t fi, const UtteranceDescription& a) { return fi < a.GetStartFrameIndexInsideChunk(); });
return result - 1 - m_utteranceSet.begin();
return result - 1 - m_utterances.begin();
}

// Returns all frames of a given utterance.
Expand Down Expand Up @@ -114,16 +115,16 @@ class HTKChunkDescription

// read all utterances; if they are in the same archive, htkfeatreader will be efficient in not closing the file
m_frames.resize(featureDimension, m_totalFrames);
foreach_index(i, m_utteranceSet)
foreach_index(i, m_utterances)
{
// read features for this file
auto framesWrapper = GetUtteranceFrames(i);
reader.read(m_utteranceSet[i].GetPath(), featureKind, samplePeriod, framesWrapper);
reader.read(m_utterances[i].GetPath(), featureKind, samplePeriod, framesWrapper);
}

if (verbosity)
{
fprintf(stderr, "RequireData: %d utterances read\n", (int)m_utteranceSet.size());
fprintf(stderr, "RequireData: %d utterances read\n", (int)m_utterances.size());
}
}
catch (...)
Expand Down
10 changes: 5 additions & 5 deletions Source/Readers/ExperimentalHTKMLFReader/HTKDataDeserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ ChunkDescriptions HTKDataDeserializer::GetChunkDescriptions()
for (size_t i = 0; i < m_chunks.size(); ++i)
{
auto cd = make_shared<ChunkDescription>();
cd->id = i;
cd->numberOfSamples = m_chunks[i].GetTotalFrames();
cd->numberOfSequences = m_chunks[i].GetTotalFrames();
cd->m_id = i;
cd->m_numberOfSamples = m_chunks[i].GetTotalFrames();
cd->m_numberOfSequences = m_chunks[i].GetTotalFrames();
chunks.push_back(cd);
}
return chunks;
Expand All @@ -192,8 +192,8 @@ void HTKDataDeserializer::GetSequencesForChunk(size_t chunkId, vector<SequenceDe
{
SequenceDescription f;
f.m_chunkId = chunkId;
f.m_key.major = major;
f.m_key.minor = k;
f.m_key.m_major = major;
f.m_key.m_minor = k;
f.m_id = offsetInChunk++;
f.m_isValid = true;
f.m_numberOfSamples = 1;
Expand Down
22 changes: 11 additions & 11 deletions Source/Readers/ExperimentalHTKMLFReader/MLFDataDeserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
if (!stringRegistry.Contains(l.first))
continue;

description.m_key.major = stringRegistry[l.first];
description.m_key.m_major = stringRegistry[l.first];

const auto& utterance = l.second;
description.m_sequenceStart = m_classIds.size();
Expand Down Expand Up @@ -120,18 +120,18 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
description.m_numberOfSamples = numberOfFrames;
totalFrames += numberOfFrames;
m_utteranceIndex.push_back(m_frames.size());
m_keyToSequence[description.m_key.major] = m_utteranceIndex.size() - 1;
m_keyToSequence[description.m_key.m_major] = m_utteranceIndex.size() - 1;

// TODO: Should be created by chunks only.
MLFFrame f;
f.m_chunkId = 0;
f.m_numberOfSamples = 1;
f.m_key.major = description.m_key.major;
f.m_key.m_major = description.m_key.m_major;
f.m_isValid = description.m_isValid;
for (size_t k = 0; k < description.m_numberOfSamples; ++k)
{
f.m_id = m_frames.size();
f.m_key.minor = k;
f.m_key.m_minor = k;
f.m_index = description.m_sequenceStart + k;
m_frames.push_back(f);
}
Expand Down Expand Up @@ -176,9 +176,9 @@ MLFDataDeserializer::MLFDataDeserializer(CorpusDescriptorPtr corpus, const Confi
ChunkDescriptions MLFDataDeserializer::GetChunkDescriptions()
{
auto cd = std::make_shared<ChunkDescription>();
cd->id = 0;
cd->numberOfSequences = m_frames.size();
cd->numberOfSamples = m_frames.size();
cd->m_id = 0;
cd->m_numberOfSequences = m_frames.size();
cd->m_numberOfSamples = m_frames.size();
return ChunkDescriptions{cd};
}

Expand All @@ -189,8 +189,8 @@ void MLFDataDeserializer::GetSequencesForChunk(size_t, std::vector<SequenceDescr
for (size_t i = 0; i < m_frames.size(); ++i)
{
SequenceDescription f;
f.m_key.major = m_frames[i].m_key.major;
f.m_key.minor = m_frames[i].m_key.minor;
f.m_key.m_major = m_frames[i].m_key.m_major;
f.m_key.m_minor = m_frames[i].m_key.m_minor;
f.m_id = m_frames[i].m_id;
f.m_chunkId = m_frames[i].m_chunkId;
f.m_numberOfSamples = 1;
Expand All @@ -217,14 +217,14 @@ static SequenceDescription s_InvalidSequence { 0, 0, 0, false };

void MLFDataDeserializer::GetSequenceDescriptionByKey(const KeyType& key, SequenceDescription& result)
{
auto sequenceId = m_keyToSequence.find(key.major);
auto sequenceId = m_keyToSequence.find(key.m_major);
if (sequenceId == m_keyToSequence.end())
{
result = s_InvalidSequence;
return;
}

size_t index = m_utteranceIndex[sequenceId->second] + key.minor;
size_t index = m_utteranceIndex[sequenceId->second] + key.m_minor;
result = m_frames[index];
}

Expand Down
10 changes: 5 additions & 5 deletions Source/Readers/ImageReader/ImageDataDeserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ ChunkDescriptions ImageDataDeserializer::GetChunkDescriptions()
for (auto const& s : m_imageSequences)
{
auto chunk = std::make_shared<ChunkDescription>();
chunk->id = s.m_chunkId;
chunk->numberOfSamples = 1;
chunk->numberOfSequences = 1;
chunk->m_id = s.m_chunkId;
chunk->m_numberOfSamples = 1;
chunk->m_numberOfSequences = 1;
result.push_back(chunk);
}

Expand Down Expand Up @@ -189,8 +189,8 @@ void ImageDataDeserializer::CreateSequenceDescriptions(std::string mapPath, size
description.m_chunkId = lineIndex;
description.m_path = imagePath;
description.m_classId = std::stoi(classId);
description.m_key.major = description.m_id;
description.m_key.minor = 0;
description.m_key.m_major = description.m_id;
description.m_key.m_minor = 0;

if (description.m_classId >= labelDimension)
{
Expand Down
2 changes: 1 addition & 1 deletion Source/Readers/ImageReader/ImageDataDeserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ImageDataDeserializer : public DataDeserializerBase
virtual ChunkDescriptions GetChunkDescriptions() override;

// Gets sequence descriptions for the chunk.
virtual void GetSequencesForChunk(size_t, std::vector<SequenceDescription>&);
virtual void GetSequencesForChunk(size_t, std::vector<SequenceDescription>&) override;

private:
// Creates a set of sequence descriptions.
Expand Down
6 changes: 3 additions & 3 deletions Source/Readers/ReaderLib/BlockRandomizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ BlockRandomizer::BlockRandomizer(
m_sweepTotalNumberOfSamples = 0;
for (auto const & chunk : m_deserializer->GetChunkDescriptions())
{
m_sweepTotalNumberOfSamples += chunk->numberOfSamples;
m_sweepTotalNumberOfSamples += chunk->m_numberOfSamples;
}
}

Expand Down Expand Up @@ -224,14 +224,14 @@ void BlockRandomizer::RetrieveDataChunks()
continue;
}

auto it = m_chunks.find(chunk.m_original->id);
auto it = m_chunks.find(chunk.m_original->m_id);
if (it != m_chunks.end())
{
chunks[chunk.m_chunkId] = it->second;
}
else
{
chunks[chunk.m_chunkId] = m_deserializer->GetChunk(chunk.m_original->id);
chunks[chunk.m_chunkId] = m_deserializer->GetChunk(chunk.m_original->m_id);
}
}

Expand Down
24 changes: 12 additions & 12 deletions Source/Readers/ReaderLib/Bundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ void Bundler::CreateChunkDescriptions()
for (const auto& c : chunks)
{
auto cd = std::make_shared<BundlerChunkDescription>();
cd->numberOfSamples = c->numberOfSamples;
cd->numberOfSequences = c->numberOfSequences;
cd->id = m_chunks.size();
cd->m_numberOfSamples = c->m_numberOfSamples;
cd->m_numberOfSequences = c->m_numberOfSequences;
cd->m_id = m_chunks.size();
cd->m_original = c;
m_chunks.push_back(cd);
}
Expand All @@ -69,7 +69,7 @@ void Bundler::CreateChunkDescriptions()

// Otherwise build bundling chunks using underlying deserializers.
std::vector<SequenceDescription> sequenceDescriptions;
sequenceDescriptions.reserve(chunks.front()->numberOfSequences);
sequenceDescriptions.reserve(chunks.front()->m_numberOfSequences);
SequenceDescription s;
for (size_t chunkIndex = 0; chunkIndex < chunks.size(); ++chunkIndex)
{
Expand All @@ -78,7 +78,7 @@ void Bundler::CreateChunkDescriptions()
sequenceDescriptions.clear();

// Iterating thru all sequences and identifying whether they are valid among all deserializers.
m_driver->GetSequencesForChunk(chunks[chunkIndex]->id, sequenceDescriptions);
m_driver->GetSequencesForChunk(chunks[chunkIndex]->m_id, sequenceDescriptions);
std::set<size_t> invalid;
for (size_t sequenceIndex = 0; sequenceIndex < sequenceDescriptions.size(); ++sequenceIndex)
{
Expand Down Expand Up @@ -106,9 +106,9 @@ void Bundler::CreateChunkDescriptions()
if (numberOfSamples > 0)
{
auto cd = std::make_shared<BundlerChunkDescription>();
cd->numberOfSamples = numberOfSamples;
cd->numberOfSequences = numberOfSequences;
cd->id = m_chunks.size();
cd->m_numberOfSamples = numberOfSamples;
cd->m_numberOfSequences = numberOfSequences;
cd->m_id = m_chunks.size();
cd->m_original = chunks[chunkIndex];
m_chunks.push_back(cd);
cd->m_invalid = std::move(invalid);
Expand All @@ -127,7 +127,7 @@ void Bundler::GetSequencesForChunk(size_t chunkId, std::vector<SequenceDescripti
{
BundlerChunkDescriptionPtr chunk = m_chunks[chunkId];
ChunkDescriptionPtr original = chunk->m_original;
m_driver->GetSequencesForChunk(original->id, sequences);
m_driver->GetSequencesForChunk(original->m_id, sequences);

// Can return because all sequences are clean.
if (chunk->m_invalid.empty())
Expand Down Expand Up @@ -174,11 +174,11 @@ class Bundler::BundlingChunk : public Chunk
auto& deserializers = m_parent->m_deserializers;
assert(numberOfInputs == deserializers.size());
std::vector<SequenceDescription> sequences;
sequences.reserve(original->numberOfSequences);
sequences.reserve(original->m_numberOfSequences);

// Creating chunk mapping.
m_parent->m_driver->GetSequencesForChunk(original->id, sequences);
ChunkPtr drivingChunk = m_parent->m_driver->GetChunk(original->id);
m_parent->m_driver->GetSequencesForChunk(original->m_id, sequences);
ChunkPtr drivingChunk = m_parent->m_driver->GetChunk(original->m_id);
m_sequenceToSequence.resize(m_numberOfInputs * sequences.size());
m_innerChunks.resize(m_numberOfInputs * sequences.size());
for (size_t sequenceIndex = 0; sequenceIndex < sequences.size(); ++sequenceIndex)
Expand Down
4 changes: 2 additions & 2 deletions Source/Readers/ReaderLib/ChunkRandomizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (size_t chunkIndex = 0; chunkIndex < m_originalChunks.size(); chunkIndex++)
{
const size_t originalChunkIndex = randomizedChunkIndices[chunkIndex];
const size_t numberOfSamples = m_originalChunks[originalChunkIndex]->numberOfSamples;
const size_t numberOfSequences = m_originalChunks[originalChunkIndex]->numberOfSequences;
const size_t numberOfSamples = m_originalChunks[originalChunkIndex]->m_numberOfSamples;
const size_t numberOfSequences = m_originalChunks[originalChunkIndex]->m_numberOfSequences;

RandomizedChunk randomizedChunk;
randomizedChunk.m_chunkId = chunkIndex;
Expand Down
4 changes: 2 additions & 2 deletions Source/Readers/ReaderLib/ChunkRandomizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Position of the last sample of the chunk in the input.
size_t SampleEndPosition() const
{
return m_original->numberOfSamples + m_samplePositionStart;
return m_original->m_numberOfSamples + m_samplePositionStart;
}

// Position of the last sequence of the chunk in the input.
size_t SequenceEndPosition() const
{
return m_original->numberOfSequences + m_sequencePositionStart;
return m_original->m_numberOfSequences + m_sequencePositionStart;
}
};

Expand Down
Loading

0 comments on commit c1154fb

Please sign in to comment.