Skip to content

Commit

Permalink
ReleaseChunks() in SequenceRandomizer::Seek
Browse files Browse the repository at this point in the history
More logging

Only log release chunks if > 0

Fix log message

diagnostics for ChunkRandomizer

Remove excessive logging
  • Loading branch information
mahilleb-msft authored and eldakms committed May 31, 2016
1 parent b7a8bf9 commit c511e04
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Source/Readers/ReaderLib/BlockRandomizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ BlockRandomizer::BlockRandomizer(
assert(deserializer != nullptr);

m_streams = m_deserializer->GetStreamDescriptions();
m_sequenceRandomizer = std::make_shared<SequenceRandomizer>(m_deserializer, m_chunkRandomizer);
m_sequenceRandomizer = std::make_shared<SequenceRandomizer>(verbosity, m_deserializer, m_chunkRandomizer);

// Calculate total number of samples.
m_sweepTotalNumberOfSamples = 0;
Expand Down
16 changes: 15 additions & 1 deletion Source/Readers/ReaderLib/Bundler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#define _CRT_SECURE_NO_WARNINGS

#include "Bundler.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include <set>

namespace Microsoft { namespace MSR { namespace CNTK {
Expand All @@ -25,7 +27,7 @@ Bundler::Bundler(
bool cleanse)
: m_deserializers(deserializers), m_driver(driver)
{
UNUSED(readerConfig);
m_verbosity = readerConfig(L"verbosity", 0);

// Combines streams of underlying deserializers.
for (auto d : deserializers)
Expand All @@ -45,6 +47,9 @@ Bundler::Bundler(
// Creates chunk descriptions based on chunks of underlying deserializers.
void Bundler::CreateChunkDescriptions()
{
if (m_verbosity)
fprintf(stderr, "Bundler::CreateChunkDescriptions(): started\n");

auto chunks = m_driver->GetChunkDescriptions();
if (chunks.size() < 1)
{
Expand All @@ -57,6 +62,9 @@ void Bundler::CreateChunkDescriptions()

m_chunks.reserve(chunks.size());

if (m_verbosity)
fprintf(stderr, "Bundler::CreateChunkDescriptions(): creating descriptions for %" PRIu64 " chunks\n", m_chunks.size());

// If there is not cleaning required simply build chunks based on the chunk descriptions of the primary deserializer.
if (!m_cleanse)
{
Expand All @@ -72,6 +80,9 @@ void Bundler::CreateChunkDescriptions()
return;
}

if (m_verbosity)
fprintf(stderr, "Bundler::CreateChunkDescriptions(): starting to clean chunks\n");

m_takePrimarySequenceLength = true;

// Otherwise build bundling chunks using underlying deserializers.
Expand Down Expand Up @@ -127,6 +138,9 @@ void Bundler::CreateChunkDescriptions()
cd->m_invalid = std::move(invalid);
}
}

if (m_verbosity)
fprintf(stderr, "Bundler::CreateChunkDescriptions(): finished cleaning of %" PRIu64 " chunks\n", m_chunks.size());
}

// Gets chunk descriptions.
Expand Down
3 changes: 3 additions & 0 deletions Source/Readers/ReaderLib/Bundler.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class Bundler : public DataDeserializerBase
// Used for optimization when sequences between different deserializers are of the same length
// (i.e. often in speech)
bool m_takePrimarySequenceLength;

// General configuration
int m_verbosity;
};

}}}
3 changes: 3 additions & 0 deletions Source/Readers/ReaderLib/ChunkRandomizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// got more space, move window to the right.
chunk.m_randomizationWindow.m_end++;
}

// TODO verbosity or remove
// fprintf(stderr, "chunk %u randomizationWindow [%u..%u)\n", chunkId, chunk.m_randomizationWindow.m_begin, chunk.m_randomizationWindow.m_end);
}
}
}}}
49 changes: 48 additions & 1 deletion Source/Readers/ReaderLib/SequenceRandomizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#define _CRT_SECURE_NO_WARNINGS

#define __STDC_FORMAT_MACROS
#include <inttypes.h>
#include "SequenceRandomizer.h"
#include <algorithm>
#include <utility>
Expand All @@ -22,9 +24,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

SequenceRandomizer::SequenceRandomizer(
int verbosity,
IDataDeserializerPtr deserializer,
ChunkRandomizerPtr chunkRandomizer)
: m_randomizedChunks(chunkRandomizer->GetRandomizedChunks()),
: m_verbosity(verbosity),
m_randomizedChunks(chunkRandomizer->GetRandomizedChunks()),
m_chunkWindowBegin(0),
m_randomizedWindowEnd(0),
m_randomizationCursor(0),
Expand Down Expand Up @@ -117,6 +121,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// That means the sequence description that we have got from the previous call can still be in the BlockRandomizer.
size_t currentChunk = std::min(m_currentChunkCursor, m_randomizedChunks.size() - 1);
size_t candidateToUnload = m_chunkWindowBegin;
size_t releasedChunks = 0;
while (candidateToUnload < m_randomizedChunks.size() &&
candidateToUnload < m_randomizedChunks[currentChunk].m_randomizationWindow.m_begin &&
m_randomizedChunks[candidateToUnload].m_randomizationWindow.m_end <= m_currentChunkCursor)
Expand All @@ -126,7 +131,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_randomizedChunkInfo.pop_front();
m_chunkWindowBegin++;
candidateToUnload++;
releasedChunks++;
}

if (m_verbosity && 0 < releasedChunks)
fprintf(stderr,
"SequenceRandomizer::ReleaseChunks(): "
"released %" PRIu64 "chunks, now "
"chunk window [%" PRIu64 "..%u), cursor %" PRIu64 ", "
"randomized window [%" PRIu64 "..%" PRIu64 "), randomization cursor %" PRIu64 "\n",
releasedChunks,
m_chunkWindowBegin, m_chunkWindowEnd,
m_currentChunkCursor,
m_chunkWindowBegin, m_randomizedWindowEnd,
m_randomizationCursor);
}

// Randomize one more chunk if needed after the chunk cursor has been incremented.
Expand Down Expand Up @@ -228,6 +246,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_randomizedWindowEnd++;
m_randomizationCursor = nextRandomizationCursor;
m_chunkWindowEnd = nextChunkWindowEnd;

if (m_verbosity)
fprintf(stderr,
"SequenceRandomizer::RandomizeNextChunkIfNeeded(): "
"chunk window [%" PRIu64 "..%u), cursor %" PRIu64 ", "
"randomized window [%" PRIu64 "..%" PRIu64 "), randomization cursor %" PRIu64 "\n",
m_chunkWindowBegin, m_chunkWindowEnd,
m_currentChunkCursor,
m_chunkWindowBegin, m_randomizedWindowEnd,
m_randomizationCursor);
}

// Sets current cursor to the given sample offset.
Expand All @@ -244,16 +272,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
randomizedWindowEndInSamples = m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples;
}

if (m_verbosity)
fprintf(stderr, "SequenceRandomizer::Seek(): seeking offset %" PRIu64 "in sweep %" PRIu64 "\n",
sweepSampleOffset,
sweep);

if (sweepSampleOffset < randomizeWindowBeginInSamples)
{
// The requested offset is before the earliest randomized sequences we still have.
// Need to start over.
if (m_verbosity)
fprintf(stderr, "SequenceRandomizer::Seek(): starting over \n");

Reset(sweep + 1);
}
else if (sweepSampleOffset < randomizedWindowEndInSamples)
{
// The requested offset is within the randomized window.
// We change the current chunk cursor to contain the requested offset.
if (m_verbosity)
fprintf(stderr, "SequenceRandomizer::Seek(): offset is within randomized window\n");
size_t index;
for (index = 0; index < m_randomizedChunkInfo.size(); index++)
{
Expand All @@ -274,10 +312,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

// Advance sequence by sequence until the desire offset is reached.
if (m_verbosity)
fprintf(stderr, "SequenceRandomizer::Seek(): advancing cursor from %" PRIu64 " to %" PRIu64 "\n",
m_currentSampleCursor,
sweepSampleOffset);

// TODO perhaps optimize this
while (m_currentSampleCursor < sweepSampleOffset)
{
GetNextSequenceDescriptions(1);
if (m_chunkWindowBegin < m_currentChunkCursor)
{
ReleaseChunks();
}
}

return m_currentSampleCursor;
Expand Down
5 changes: 5 additions & 0 deletions Source/Readers/ReaderLib/SequenceRandomizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class SequenceRandomizer
{
public:
SequenceRandomizer(
int verbosity,
IDataDeserializerPtr deserializer,
ChunkRandomizerPtr chunkRandomizer);

Expand Down Expand Up @@ -141,6 +142,7 @@ class SequenceRandomizer
// sequenced randomized.
std::deque<ChunkInfo> m_randomizedChunkInfo;

// TODO consider to change to ChunkIdType where appropriate
// Index of the first chunk in the window (inclusive).
size_t m_chunkWindowBegin;

Expand All @@ -157,6 +159,9 @@ class SequenceRandomizer

// Index of the last chunk in the window (exclusive).
ChunkIdType m_chunkWindowEnd;

// General configuration
int m_verbosity;
};

typedef std::shared_ptr<SequenceRandomizer> SequenceRandomizerPtr;
Expand Down

0 comments on commit c511e04

Please sign in to comment.