Skip to content

Commit

Permalink
Merge branch 'master' of https://git01.codeplex.com/cntk
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyukoon committed Jul 7, 2015
2 parents 54c1ac4 + cac3156 commit b5038c7
Show file tree
Hide file tree
Showing 9 changed files with 559 additions and 142 deletions.
23 changes: 15 additions & 8 deletions Common/Include/DataReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@

namespace Microsoft { namespace MSR { namespace CNTK {

const size_t randomizeAuto = ((size_t)-1)>>2; // randomize range set automatically, parameter value for Init()
const size_t randomizeNone = 0; // don't randomize, parameter value for Init()
const size_t requestDataSize = randomizeAuto; // StartMinibatchLoop default parameter, sets number of requested frames equal to the number of frames in the dataset
// randomize range set automatically, parameter value for Init()
const size_t randomizeAuto = ((size_t) -1) >> 2;

// don't randomize, parameter value for Init()
const size_t randomizeNone = 0;

// StartMinibatchLoop default parameter, sets number of requested
// frames equal to the constant 3fffffffffffffff computed by ((size_t) -1) >> 2 above.
// We use this constant as a stand in for the total number of frames in the dataset.
const size_t requestDataSize = randomizeAuto;

enum EndDataType
{
Expand All @@ -52,7 +59,7 @@ class DATAREADER_API IDataReader

virtual void Init(const ConfigParameters& config) = 0;
virtual void Destroy() = 0;
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize) = 0;
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize) = 0;
virtual bool GetMinibatch(std::map<std::wstring, Matrix<ElemType>*>& matrices) = 0;
virtual size_t NumberSlicesInEachRecurrentIter() = 0;
virtual void SetNbrSlicesEachRecurrentIter(const size_t) = 0;
Expand Down Expand Up @@ -80,7 +87,7 @@ class DataReader : public IDataReader<ElemType>, protected Plugin
typedef typename IDataReader<ElemType>::LabelType LabelType;
typedef typename IDataReader<ElemType>::LabelIdType LabelIdType;
private:
IDataReader<ElemType> *m_dataReader; // reader
IDataReader<ElemType>* m_dataReader; // reader

// Init - Reader Initialize for multiple data sets
// config - [in] configuration parameters for the datareader
Expand Down Expand Up @@ -123,7 +130,7 @@ class DataReader : public IDataReader<ElemType>, protected Plugin
// mbSize - [in] size of the minibatch (number of frames, etc.)
// epoch - [in] epoch number for this loop
// requestedEpochSamples - [in] number of samples to randomize, defaults to requestDataSize which uses the number of samples there are in the dataset
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples=requestDataSize);
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);

// GetMinibatch - Get the next minibatch (features and labels)
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponing matrix,
Expand Down Expand Up @@ -152,10 +159,10 @@ class DataReader : public IDataReader<ElemType>, protected Plugin
// [out] size of buffer filled with data
// recordStart - record to start reading from, defaults to zero (start of data)
// returns: true if data remains to be read, false if the end of data was reached
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart=0);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0);

virtual bool DataEnd(EndDataType endDataType);
void SetSentenceEndInBatch(std::vector<size_t> &sentenceEnd);
void SetSentenceEndInBatch(std::vector<size_t>& sentenceEnd);
};

}}}
46 changes: 24 additions & 22 deletions Common/Include/commandArgUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ class ConfigParser
// pop out of content level
contentLevel = false;
}

if (quoteFound)
{
// skip the closing quote
Expand Down Expand Up @@ -660,7 +661,7 @@ class ConfigParser
std::string ReadConfigFiles(const std::string& filePaths);
std::string ReadConfigFiles(const std::wstring& filePaths);
std::string ResolveIncludeStatements(const std::string& configString, std::vector<std::string>& resolvedConfigFiles);
void LoadConfigFile(const std::wstring & filePath);
void LoadConfigFile(const std::wstring& filePath);
void LoadConfigFileAndResolveVariables(const std::wstring& filePath, const ConfigParameters& config);
void LoadConfigFiles(const std::wstring& filePaths, const std::string* configStringToAppend = nullptr);

Expand Down Expand Up @@ -873,17 +874,17 @@ class ConfigParameters: public ConfigParser, public ConfigDictionary
}

// Insert - insert an 'name=value' string into the dictionary
void Insert(const std::string &str)
void Insert(const std::string& str)
{
ParseValue(str, 0, str.length());
}

bool Exists(const std::wstring & name) const
bool Exists(const std::wstring& name) const
{
return Exists(msra::strfun::utf8(name));
}

bool Exists(const std::string & name) const
bool Exists(const std::string& name) const
{
if (find(name) != end())
{
Expand All @@ -899,42 +900,42 @@ class ConfigParameters: public ConfigParser, public ConfigDictionary
}

// ExistsCurrent - check to see if a key exists in THIS config, don't check parent
bool ExistsCurrent(const std::string & name) const
bool ExistsCurrent(const std::string& name) const
{
return (find(name) != end());
}

// dict(name, default) for strings
ConfigValue operator()(const std::wstring & name,
const wchar_t *defaultvalue) const
ConfigValue operator()(const std::wstring& name,
const wchar_t* defaultvalue) const
{
return operator()(msra::strfun::utf8(name), defaultvalue);
}

// dict(name, default) for strings
ConfigValue operator()(const std::string & name,
const wchar_t *defaultvalue) const
ConfigValue operator()(const std::string& name,
const wchar_t* defaultvalue) const
{
return operator()(name, msra::strfun::utf8(defaultvalue).c_str());
}

// dict(name, default) for strings
ConfigValue operator()(const std::wstring & name,
const char *defaultvalue) const
ConfigValue operator()(const std::wstring& name,
const char* defaultvalue) const
{
return operator()(msra::strfun::utf8(name), defaultvalue);
}

// dict(name, default) for strings
ConfigValue operator()(const std::string & name,
const char *defaultvalue) const
ConfigValue operator()(const std::string& name,
const char* defaultvalue) const
{
ConfigValue value = Find(name, defaultvalue);
return value;
}

ConfigValue Find(const std::string & name,
const char *defaultvalue = NULL) const
ConfigValue Find(const std::string& name,
const char* defaultvalue = NULL) const
{
auto iter = find(name);
ConfigValue result;
Expand Down Expand Up @@ -975,10 +976,11 @@ class ConfigParameters: public ConfigParser, public ConfigDictionary
// any whitespace characters. If an opening "$" is found without a closing "$", an exception is thrown.
// configString - the string that you would like to resolve variables in.
// returns: A copy of 'configString' with all the variables resolved.
std::string ResolveVariablesInSingleLine(const std::string &configLine) const
std::string ResolveVariablesInSingleLine(const std::string& configLine) const
{
// ensure that this method was called on a single line (eg, no newline characters exist in 'configLine').
if (configLine.find_first_of("\n") != std::string::npos) {
if (configLine.find_first_of("\n") != std::string::npos)
{
throw std::logic_error(
"\"ResolveVariablesInSingleLine\" shouldn't be called with a string containing a newline character");
}
Expand Down Expand Up @@ -1053,7 +1055,7 @@ class ConfigParameters: public ConfigParser, public ConfigDictionary
// we shouldn't insert newlines where they didn't already exist.
// configString - the string that you would like to resolve variables in.
// returns: A copy of 'configString' with all the variables resolved.
std::string ResolveVariables(const std::string &configString) const
std::string ResolveVariables(const std::string& configString) const
{
std::string newConfigString;
if (configString.find_first_of("\n") != std::string::npos)
Expand Down Expand Up @@ -1347,14 +1349,14 @@ class argvector: public std::vector<T>
RuntimeError("argvector: invalid arg value");
}
}
static void parse(const std::wstring & in, std::wstring & val)
static void parse(const std::wstring& in, std::wstring& val)
{
val = in;
}

public:
// constructor --construct empty, then assign a wstring from command-line argument
void operator=(const std::wstring & arg)
void operator=(const std::wstring& arg)
{
clear();
// separate the arguments
Expand Down Expand Up @@ -1387,7 +1389,7 @@ class argvector: public std::vector<T>
}

// constructor --use this for setting default values
argvector(const std::wstring & arg)
argvector(const std::wstring& arg)
{
*this = arg;
}
Expand Down Expand Up @@ -1438,7 +1440,7 @@ class argvector: public std::vector<T>
}

// we give full read access to the vector, so we can use it bounded as well
const std::vector<T> & tovector() const
const std::vector<T>& tovector() const
{
return *this;
}
Expand Down
96 changes: 96 additions & 0 deletions Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
Original file line number Diff line number Diff line change
Expand Up @@ -1725,6 +1725,102 @@ numBestSearchEpoch

\end_layout

\begin_layout Standard
Used in the Adaptive Minibatch Sizing mode.
\end_layout

\begin_layout Itemize

\emph on
numMiniBatch4LRSearch
\emph default

\begin_inset Index idx
status open

\begin_layout Plain Layout
numMiniBatch4LRSearch
\end_layout

\end_inset

: the number of minibatches used to search the minibatch size when
in adaptive minibatch size mode.
Default value is 500.
It's typically set to 10-20% of the total minibatches in an epoch
this is shared with the search for learning rate in
SearchBeforeEpoch mode.

\end_layout

\begin_layout Itemize

\emph on
autoAdjustMinibatch
\emph default

\begin_inset Index idx
status open

\begin_layout Plain Layout
autoAdjustMinibatch
\end_layout

\end_inset

: enable or disable whether minibatch size is adaptively adjusted.
Default value is false.
Adapative minibatch sizing will begin on
epochs starting after user minbatch sizes expcitily
specified are complete. For example if the user
specifed minibatchSize=256:1024, then 256 and 1024
are used in the first 2 Epochs and adaptive minibatch
sizing is used aferwards

\end_layout

\begin_layout Itemize

\emph on
minibatchSizeTuningFrequency
\emph default

\begin_inset Index idx
status open

\begin_layout Plain Layout
minibatchSizeTuningFrequency
\end_layout

\end_inset

: The number of epochs to skip, on a periodic basis, before
dynamically adjusting the minibatch size.
Default value is 1.

\end_layout

\begin_layout Itemize

\emph on
minibatchSizeTuningMax
\emph default

\begin_inset Index idx
status open

\begin_layout Plain Layout
minibatchSizeTuningMax
\end_layout

\end_inset

: The maximum size allowed for an
adaptively adjusted minibatch size.
Default value is 1048576.

\end_layout

\end_deeper
\begin_layout Subsubsection
Gradient control
Expand Down
Loading

0 comments on commit b5038c7

Please sign in to comment.