forked from microsoft/CNTK
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
50 changed files
with
17,562,962 additions
and
275 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,243 @@ | ||
// | ||
// <copyright file="ReaderTestHelper.h" company="Microsoft"> | ||
// Copyright (c) Microsoft Corporation. All rights reserved. | ||
// </copyright> | ||
// | ||
#pragma once | ||
|
||
#include "DataReader.h" | ||
#include "boost/filesystem.hpp" | ||
|
||
using namespace Microsoft::MSR::CNTK; | ||
|
||
namespace Microsoft { namespace MSR { namespace CNTK | ||
{ | ||
namespace Test | ||
{ | ||
struct ReaderFixture | ||
{ | ||
// This fixture sets up paths so the tests can assume the right location for finding the configuration | ||
// file as well as the input data and control data. | ||
// subPath : an optional sub path (or full path) for the location of data. | ||
ReaderFixture(string subPath = "") | ||
{ | ||
BOOST_TEST_MESSAGE("Setup fixture"); | ||
m_initialWorkingPath = boost::filesystem::current_path().generic_string(); | ||
BOOST_TEST_MESSAGE("Current working directory: " + m_initialWorkingPath); | ||
|
||
boost::filesystem::path path(boost::unit_test::framework::master_test_suite().argv[0]); | ||
m_parentPath = path.parent_path().generic_string(); | ||
m_testDataPath = m_parentPath + "/../../../Tests/UnitTests/ReaderTests"; | ||
|
||
BOOST_TEST_MESSAGE("Setting test data path to: " + m_testDataPath); | ||
|
||
string newCurrentPath; | ||
|
||
// Determine if a subpath has been specified and it is not a relative path | ||
if (subPath.length()) | ||
{ | ||
// Retrieve the full path from the environment variable (if any) | ||
// Currently limited to a single expansion of an environment variable at the beginning of the string. | ||
if (subPath[0] == '%') | ||
{ | ||
auto end = subPath.find_last_of(subPath[0]); | ||
string environmentVariable = subPath.substr(1, end - 1); | ||
|
||
BOOST_TEST_MESSAGE("Retrieving environment variable: " + environmentVariable); | ||
|
||
const char* p = std::getenv(environmentVariable.c_str()); | ||
if (p) | ||
{ | ||
newCurrentPath = p + subPath.substr(end + 1); | ||
} | ||
else | ||
{ | ||
BOOST_TEST_MESSAGE("Invalid environment variable: " + subPath); | ||
newCurrentPath = m_testDataPath; | ||
} | ||
} | ||
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\')) | ||
{ | ||
newCurrentPath = subPath; | ||
} | ||
else | ||
{ | ||
newCurrentPath = m_testDataPath + subPath; | ||
} | ||
} | ||
|
||
BOOST_TEST_MESSAGE("Setting current path to: " + newCurrentPath); | ||
|
||
boost::filesystem::current_path(newCurrentPath); | ||
|
||
BOOST_TEST_MESSAGE("Current working directory is now: " + boost::filesystem::current_path().generic_string()); | ||
} | ||
|
||
~ReaderFixture() | ||
{ | ||
BOOST_TEST_MESSAGE("Teardown fixture"); | ||
} | ||
|
||
// Limits the number of minibatches to read, to reduce time and data file size | ||
size_t m_maxMiniBatchCount = 10; | ||
|
||
string m_initialWorkingPath; | ||
string m_testDataPath; | ||
string m_parentPath; | ||
|
||
string initialPath() { return m_initialWorkingPath; } | ||
string testDataPath() { return m_testDataPath; } | ||
string currentPath() { return boost::filesystem::current_path().generic_string(); } | ||
|
||
// Helper function to write a matrix (feature or label) to a file | ||
// matrix : the matrix to output | ||
// outputFile : the output stream to write to | ||
template<class ElemType> | ||
void OutputMatrix(Matrix<ElemType>& matrix, ofstream& outputFile) | ||
{ | ||
size_t numRows = matrix.GetNumRows(); | ||
|
||
std::unique_ptr<ElemType[]> pItem{ matrix.CopyToArray() }; | ||
size_t numItems = numRows * matrix.GetNumCols(); | ||
|
||
for (auto j = 0; j < numItems; j++) | ||
{ | ||
outputFile << pItem[j] << (j % numRows ? "\n" : " "); | ||
} | ||
} | ||
|
||
// Helper function to write the Reader's content to a file. | ||
// outputFile : the file stream to output to. | ||
// dataReader : the DataReader to get minibatches from | ||
// map : the map containing the feature and label matrices | ||
// epochs : the number of epochs to read | ||
// mbSize : the minibatch size | ||
// epochSize : the epoch size | ||
// numFeatureFiles : the number of feature files used (multi IO) | ||
// numLabelFiles : the number of label files used (multi IO) | ||
// subsetNum : the subset number for parallel trainings | ||
// numSubsets : the number of parallel trainings (set to 1 for single) | ||
template <class ElemType> | ||
void HelperWriteReaderContentToFile( | ||
ofstream& outputFile, | ||
DataReader<ElemType>& dataReader, | ||
std::map<std::wstring, Matrix<ElemType>*>& map, | ||
size_t epochs, | ||
size_t mbSize, | ||
size_t epochSize, | ||
size_t numFeatureFiles, | ||
size_t numLabelFiles, | ||
size_t subsetNum, | ||
size_t numSubsets) | ||
{ | ||
for (auto epoch = 0; epoch < epochs; epoch++) | ||
{ | ||
if (numSubsets == 1) | ||
{ | ||
dataReader.StartMinibatchLoop(mbSize, epoch, epochSize); | ||
} | ||
else | ||
{ | ||
dataReader.StartDistributedMinibatchLoop(mbSize, epoch, subsetNum, numSubsets, epochSize); | ||
} | ||
|
||
for (auto cnt = 0; dataReader.GetMinibatch(map) && cnt < m_maxMiniBatchCount; cnt++) | ||
{ | ||
// Process the Feature Matri(x|ces) | ||
for (auto i = 0; i < numFeatureFiles; i++) | ||
{ | ||
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features"; | ||
OutputMatrix(*map.at(name), outputFile); | ||
} | ||
|
||
// Process the Label Matri(x|ces) | ||
for (auto i = 0; i < numLabelFiles; i++) | ||
{ | ||
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels"; | ||
OutputMatrix(*map.at(name), outputFile); | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Helper function to run a Reader test. | ||
// configFileName : the file name for the config file | ||
// controlDataFilePath : the file path for the control data to verify against | ||
// testDataFilePath : the file path for writing the minibatch data (used for comparing against control data) | ||
// testSectionName : the section name for the test inside the config file | ||
// readerSectionName : the reader field name in the test section | ||
// epochSize : the epoch size | ||
// mbSize : the minibatch size | ||
// epochs : the number of epochs to read | ||
// numFeatureFiles : the number of feature files used (multi IO) | ||
// numLabelFiles : the number of label files used (multi IO) | ||
// subsetNum : the subset number for parallel trainings | ||
// numSubsets : the number of parallel trainings (set to 1 for single) | ||
template<class ElemType> | ||
void HelperRunReaderTest( | ||
string configFileName, | ||
const string controlDataFilePath, | ||
const string testDataFilePath, | ||
string testSectionName, | ||
string readerSectionName, | ||
size_t epochSize, | ||
size_t mbSize, | ||
size_t epochs, | ||
size_t numFeatureFiles, | ||
size_t numLabelFiles, | ||
size_t subsetNum, | ||
size_t numSubsets) | ||
{ | ||
std::wstring configFN(configFileName.begin(), configFileName.end()); | ||
std::wstring configFileCommand(L"configFile=" + configFN); | ||
|
||
wchar_t* arg[2] { L"CNTK", &configFileCommand[0] }; | ||
ConfigParameters config; | ||
const std::string rawConfigString = ConfigParameters::ParseCommandLine(2, arg, config); | ||
|
||
config.ResolveVariables(rawConfigString); | ||
const ConfigParameters simpleDemoConfig = config(testSectionName); | ||
const ConfigParameters readerConfig = simpleDemoConfig(readerSectionName); | ||
|
||
DataReader<ElemType> dataReader(readerConfig); | ||
|
||
std::map<std::wstring, Matrix<ElemType>*> map; | ||
std::vector<Matrix<ElemType>*> features; | ||
std::vector<Matrix<ElemType>*> labels; | ||
|
||
for (auto i = 0; i < numFeatureFiles; i++) | ||
{ | ||
features.push_back(new Matrix<ElemType>()); | ||
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features"; | ||
map.insert(std::pair<wstring, Matrix<ElemType>*>(name, features[i])); | ||
} | ||
|
||
for (auto i = 0; i < numLabelFiles; i++) | ||
{ | ||
labels.push_back(new Matrix<ElemType>()); | ||
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels"; | ||
map.insert(std::pair<wstring, Matrix<ElemType>*>(name, labels[i])); | ||
} | ||
|
||
// Setup output file | ||
boost::filesystem::remove(testDataFilePath); | ||
ofstream outputFile(testDataFilePath, ios::out); | ||
|
||
// Perform the data reading | ||
HelperWriteReaderContentToFile(outputFile, dataReader, map, epochs, mbSize, epochSize, numFeatureFiles, numLabelFiles, subsetNum, numSubsets); | ||
|
||
outputFile.close(); | ||
|
||
std::ifstream ifstream1(controlDataFilePath); | ||
std::ifstream ifstream2(testDataFilePath); | ||
|
||
std::istream_iterator<char> beginStream1(ifstream1); | ||
std::istream_iterator<char> endStream1; | ||
std::istream_iterator<char> beginStream2(ifstream2); | ||
std::istream_iterator<char> endStream2; | ||
|
||
BOOST_CHECK_EQUAL_COLLECTIONS(beginStream1, endStream1, beginStream2, endStream2); | ||
} | ||
}; | ||
} | ||
}}} |
35 changes: 35 additions & 0 deletions
35
Tests/UnitTests/ReaderTests/Config/HTKMLFReaderSimpleDataLoop10_Config.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
RootDir=. | ||
DataDir=$RootDir$ | ||
|
||
# deviceId=-1 for CPU, >=0 for GPU devices | ||
deviceId=-1 | ||
|
||
precision=float | ||
|
||
Simple_Test = [ | ||
reader = [ | ||
readerType = "HTKMLFReader" | ||
readMethod = "rollingWindow" | ||
miniBatchMode = "partial" | ||
randomize = "auto" | ||
verbosity = 1 | ||
framemode = true | ||
|
||
features1 = [ | ||
dim = 792 | ||
scpFile = "$DataDir$/TIMIT.train.scp.fbank.fullpath" | ||
type = "real" | ||
] | ||
features2 = [ | ||
dim = 39 | ||
scpFile = "$DataDir$/TIMIT.train.scp.mfcc.fullpath" | ||
type = "real" | ||
] | ||
labels = [ | ||
mlfFile = "$DataDir$/TIMIT.train.align_cistate.mlf.cntk" | ||
labelMappingFile = "$DataDir$/TIMIT.statelist" | ||
labelDim = 183 | ||
labelType = "category" | ||
] | ||
] | ||
] |
31 changes: 31 additions & 0 deletions
31
Tests/UnitTests/ReaderTests/Config/HTKMLFReaderSimpleDataLoop11_Config.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
RootDir=. | ||
DataDir=$RootDir$ | ||
|
||
# deviceId=-1 for CPU, >=0 for GPU devices | ||
deviceId=-1 | ||
|
||
precision=double | ||
|
||
Simple_Test = [ | ||
reader = [ | ||
readerType = "HTKMLFReader" | ||
readMethod = "blockRandomize" | ||
miniBatchMode = "partial" | ||
randomize = "auto" | ||
verbosity = 0 | ||
frameMode = true | ||
|
||
features = [ | ||
dim = 363 | ||
type = "real" | ||
scpFile = "$DataDir$/glob_0000.scp" | ||
] | ||
|
||
labels = [ | ||
mlfFile = "$DataDir$/glob_0000.mlf" | ||
labelMappingFile = "$DataDir$/state.list" | ||
labelDim = 132 | ||
labelType = "category" | ||
] | ||
] | ||
] |
Oops, something went wrong.