Skip to content

Commit

Permalink
Added ReaderTests project
Browse files Browse the repository at this point in the history
  • Loading branch information
gaizkan committed Dec 11, 2015
1 parent f5b6f0e commit 565dc49
Show file tree
Hide file tree
Showing 50 changed files with 17,562,962 additions and 275 deletions.
35 changes: 18 additions & 17 deletions CNTK.sln
Original file line number Diff line number Diff line change
Expand Up @@ -398,12 +398,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKActionsLib", "MachineLe
{DE3C54E5-D7D0-47AF-A783-DFDCE59E7937} = {DE3C54E5-D7D0-47AF-A783-DFDCE59E7937}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "UCIFastReaderTests", "Tests\UnitTests\UCIFastReaderTests\UCIFastReaderTests.vcxproj", "{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceTraining", "SequenceTraining", "{BB8B9FC5-C4B3-477F-80E2-665DC8E431BD}"
ProjectSection(SolutionItems) = preProject
Tests\Speech\DNN\SequenceTraining\add_layer.mel = Tests\Speech\DNN\SequenceTraining\add_layer.mel
Expand Down Expand Up @@ -541,6 +535,13 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "AdditionalFiles", "Addition
Demos\Text\AdditionalFiles\perplexity.nce100.lr0.1.txt = Demos\Text\AdditionalFiles\perplexity.nce100.lr0.1.txt
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReaderTests", "Tests\UnitTests\ReaderTests\ReaderTests.vcxproj", "{A4FC3467-4787-43E8-BBC0-D79AE56B468D}"
ProjectSection(ProjectDependencies) = postProject
{33D2FD22-DEF2-4507-A58A-368F641AEBE5} = {33D2FD22-DEF2-4507-A58A-368F641AEBE5}
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{E6646FFE-3588-4276-8A15-8D65C22711C1} = {E6646FFE-3588-4276-8A15-8D65C22711C1}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Mixed Platforms = Debug|Mixed Platforms
Expand Down Expand Up @@ -752,16 +753,16 @@ Global
{EB2BE26F-6BD4-4274-971F-86D080779DD1}.Release|Win32.ActiveCfg = Release|x64
{EB2BE26F-6BD4-4274-971F-86D080779DD1}.Release|x64.ActiveCfg = Release|x64
{EB2BE26F-6BD4-4274-971F-86D080779DD1}.Release|x64.Build.0 = Release|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Debug|Mixed Platforms.Build.0 = Debug|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Debug|Win32.ActiveCfg = Debug|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Debug|x64.ActiveCfg = Debug|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Debug|x64.Build.0 = Debug|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Release|Mixed Platforms.ActiveCfg = Release|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Release|Mixed Platforms.Build.0 = Release|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Release|Win32.ActiveCfg = Release|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Release|x64.ActiveCfg = Release|x64
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3}.Release|x64.Build.0 = Release|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Debug|Mixed Platforms.Build.0 = Debug|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Debug|Win32.ActiveCfg = Debug|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Debug|x64.ActiveCfg = Debug|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Debug|x64.Build.0 = Debug|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Release|Mixed Platforms.ActiveCfg = Release|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Release|Mixed Platforms.Build.0 = Release|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Release|Win32.ActiveCfg = Release|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Release|x64.ActiveCfg = Release|x64
{A4FC3467-4787-43E8-BBC0-D79AE56B468D}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -814,7 +815,6 @@ Global
{6F19321A-65E7-4829-B00C-3886CD6C6EDE} = {D45DF403-6781-444E-B654-A96868C5BE68}
{4701E678-5E6F-470D-B348-9CD1A2C095D1} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{EB2BE26F-6BD4-4274-971F-86D080779DD1} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{B97BDF88-F6B5-4F3A-BD8E-45F787D0C3C3} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{BB8B9FC5-C4B3-477F-80E2-665DC8E431BD} = {6994C86D-A672-4254-824A-51F4DFEB807F}
{8071EF60-30F7-4A77-81AA-ADCA0E18B1E3} = {D45DF403-6781-444E-B654-A96868C5BE68}
{76F9323D-34A1-43A5-A594-C4798931FF21} = {8071EF60-30F7-4A77-81AA-ADCA0E18B1E3}
Expand All @@ -834,5 +834,6 @@ Global
{EDE295D1-37F6-48C2-A5AF-8FC66BF20E68} = {3CE841C0-02E5-46DB-B401-6F8784880173}
{A07A002C-A05C-477C-9DED-F01272724C6E} = {97AAB0C8-D553-49CB-A539-004FCD7FD59F}
{8EBD7E6F-415C-4B6A-927C-1AF82905B16C} = {97AAB0C8-D553-49CB-A539-004FCD7FD59F}
{A4FC3467-4787-43E8-BBC0-D79AE56B468D} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
EndGlobalSection
EndGlobal
243 changes: 243 additions & 0 deletions Tests/UnitTests/ReaderTests/Common/ReaderTestHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
//
// <copyright file="ReaderTestHelper.h" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#pragma once

#include "DataReader.h"
#include "boost/filesystem.hpp"

using namespace Microsoft::MSR::CNTK;

namespace Microsoft { namespace MSR { namespace CNTK
{
namespace Test
{
struct ReaderFixture
{
// This fixture sets up paths so the tests can assume the right location for finding the configuration
// file as well as the input data and control data.
// subPath : an optional sub path (or full path) for the location of data.
ReaderFixture(string subPath = "")
{
BOOST_TEST_MESSAGE("Setup fixture");
m_initialWorkingPath = boost::filesystem::current_path().generic_string();
BOOST_TEST_MESSAGE("Current working directory: " + m_initialWorkingPath);

boost::filesystem::path path(boost::unit_test::framework::master_test_suite().argv[0]);
m_parentPath = path.parent_path().generic_string();
m_testDataPath = m_parentPath + "/../../../Tests/UnitTests/ReaderTests";

BOOST_TEST_MESSAGE("Setting test data path to: " + m_testDataPath);

string newCurrentPath;

// Determine if a subpath has been specified and it is not a relative path
if (subPath.length())
{
// Retrieve the full path from the environment variable (if any)
// Currently limited to a single expansion of an environment variable at the beginning of the string.
if (subPath[0] == '%')
{
auto end = subPath.find_last_of(subPath[0]);
string environmentVariable = subPath.substr(1, end - 1);

BOOST_TEST_MESSAGE("Retrieving environment variable: " + environmentVariable);

const char* p = std::getenv(environmentVariable.c_str());
if (p)
{
newCurrentPath = p + subPath.substr(end + 1);
}
else
{
BOOST_TEST_MESSAGE("Invalid environment variable: " + subPath);
newCurrentPath = m_testDataPath;
}
}
else if ((subPath[0] == '/' && subPath[1] == '//') || (subPath[0] == '\\' && subPath[1] == '\\'))
{
newCurrentPath = subPath;
}
else
{
newCurrentPath = m_testDataPath + subPath;
}
}

BOOST_TEST_MESSAGE("Setting current path to: " + newCurrentPath);

boost::filesystem::current_path(newCurrentPath);

BOOST_TEST_MESSAGE("Current working directory is now: " + boost::filesystem::current_path().generic_string());
}

~ReaderFixture()
{
BOOST_TEST_MESSAGE("Teardown fixture");
}

// Limits the number of minibatches to read, to reduce time and data file size
size_t m_maxMiniBatchCount = 10;

string m_initialWorkingPath;
string m_testDataPath;
string m_parentPath;

string initialPath() { return m_initialWorkingPath; }
string testDataPath() { return m_testDataPath; }
string currentPath() { return boost::filesystem::current_path().generic_string(); }

// Helper function to write a matrix (feature or label) to a file
// matrix : the matrix to output
// outputFile : the output stream to write to
template<class ElemType>
void OutputMatrix(Matrix<ElemType>& matrix, ofstream& outputFile)
{
size_t numRows = matrix.GetNumRows();

std::unique_ptr<ElemType[]> pItem{ matrix.CopyToArray() };
size_t numItems = numRows * matrix.GetNumCols();

for (auto j = 0; j < numItems; j++)
{
outputFile << pItem[j] << (j % numRows ? "\n" : " ");
}
}

// Helper function to write the Reader's content to a file.
// outputFile : the file stream to output to.
// dataReader : the DataReader to get minibatches from
// map : the map containing the feature and label matrices
// epochs : the number of epochs to read
// mbSize : the minibatch size
// epochSize : the epoch size
// numFeatureFiles : the number of feature files used (multi IO)
// numLabelFiles : the number of label files used (multi IO)
// subsetNum : the subset number for parallel trainings
// numSubsets : the number of parallel trainings (set to 1 for single)
template <class ElemType>
void HelperWriteReaderContentToFile(
ofstream& outputFile,
DataReader<ElemType>& dataReader,
std::map<std::wstring, Matrix<ElemType>*>& map,
size_t epochs,
size_t mbSize,
size_t epochSize,
size_t numFeatureFiles,
size_t numLabelFiles,
size_t subsetNum,
size_t numSubsets)
{
for (auto epoch = 0; epoch < epochs; epoch++)
{
if (numSubsets == 1)
{
dataReader.StartMinibatchLoop(mbSize, epoch, epochSize);
}
else
{
dataReader.StartDistributedMinibatchLoop(mbSize, epoch, subsetNum, numSubsets, epochSize);
}

for (auto cnt = 0; dataReader.GetMinibatch(map) && cnt < m_maxMiniBatchCount; cnt++)
{
// Process the Feature Matri(x|ces)
for (auto i = 0; i < numFeatureFiles; i++)
{
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features";
OutputMatrix(*map.at(name), outputFile);
}

// Process the Label Matri(x|ces)
for (auto i = 0; i < numLabelFiles; i++)
{
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels";
OutputMatrix(*map.at(name), outputFile);
}
}
}
}

// Helper function to run a Reader test.
// configFileName : the file name for the config file
// controlDataFilePath : the file path for the control data to verify against
// testDataFilePath : the file path for writing the minibatch data (used for comparing against control data)
// testSectionName : the section name for the test inside the config file
// readerSectionName : the reader field name in the test section
// epochSize : the epoch size
// mbSize : the minibatch size
// epochs : the number of epochs to read
// numFeatureFiles : the number of feature files used (multi IO)
// numLabelFiles : the number of label files used (multi IO)
// subsetNum : the subset number for parallel trainings
// numSubsets : the number of parallel trainings (set to 1 for single)
template<class ElemType>
void HelperRunReaderTest(
string configFileName,
const string controlDataFilePath,
const string testDataFilePath,
string testSectionName,
string readerSectionName,
size_t epochSize,
size_t mbSize,
size_t epochs,
size_t numFeatureFiles,
size_t numLabelFiles,
size_t subsetNum,
size_t numSubsets)
{
std::wstring configFN(configFileName.begin(), configFileName.end());
std::wstring configFileCommand(L"configFile=" + configFN);

wchar_t* arg[2] { L"CNTK", &configFileCommand[0] };
ConfigParameters config;
const std::string rawConfigString = ConfigParameters::ParseCommandLine(2, arg, config);

config.ResolveVariables(rawConfigString);
const ConfigParameters simpleDemoConfig = config(testSectionName);
const ConfigParameters readerConfig = simpleDemoConfig(readerSectionName);

DataReader<ElemType> dataReader(readerConfig);

std::map<std::wstring, Matrix<ElemType>*> map;
std::vector<Matrix<ElemType>*> features;
std::vector<Matrix<ElemType>*> labels;

for (auto i = 0; i < numFeatureFiles; i++)
{
features.push_back(new Matrix<ElemType>());
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features";
map.insert(std::pair<wstring, Matrix<ElemType>*>(name, features[i]));
}

for (auto i = 0; i < numLabelFiles; i++)
{
labels.push_back(new Matrix<ElemType>());
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels";
map.insert(std::pair<wstring, Matrix<ElemType>*>(name, labels[i]));
}

// Setup output file
boost::filesystem::remove(testDataFilePath);
ofstream outputFile(testDataFilePath, ios::out);

// Perform the data reading
HelperWriteReaderContentToFile(outputFile, dataReader, map, epochs, mbSize, epochSize, numFeatureFiles, numLabelFiles, subsetNum, numSubsets);

outputFile.close();

std::ifstream ifstream1(controlDataFilePath);
std::ifstream ifstream2(testDataFilePath);

std::istream_iterator<char> beginStream1(ifstream1);
std::istream_iterator<char> endStream1;
std::istream_iterator<char> beginStream2(ifstream2);
std::istream_iterator<char> endStream2;

BOOST_CHECK_EQUAL_COLLECTIONS(beginStream1, endStream1, beginStream2, endStream2);
}
};
}
}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
RootDir=.
DataDir=$RootDir$

# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=-1

precision=float

Simple_Test = [
reader = [
readerType = "HTKMLFReader"
readMethod = "rollingWindow"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 1
framemode = true

features1 = [
dim = 792
scpFile = "$DataDir$/TIMIT.train.scp.fbank.fullpath"
type = "real"
]
features2 = [
dim = 39
scpFile = "$DataDir$/TIMIT.train.scp.mfcc.fullpath"
type = "real"
]
labels = [
mlfFile = "$DataDir$/TIMIT.train.align_cistate.mlf.cntk"
labelMappingFile = "$DataDir$/TIMIT.statelist"
labelDim = 183
labelType = "category"
]
]
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
RootDir=.
DataDir=$RootDir$

# deviceId=-1 for CPU, >=0 for GPU devices
deviceId=-1

precision=double

Simple_Test = [
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
frameMode = true

features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]

labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
Loading

0 comments on commit 565dc49

Please sign in to comment.