Skip to content

Commit

Permalink
Refactor to use conc_stack.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Kamenev committed Oct 14, 2015
1 parent 98324b8 commit a5acf9d
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 26 deletions.
47 changes: 46 additions & 1 deletion Common/Include/basetypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,12 @@ OACR_WARNING_DISABLE(POTENTIAL_ARGUMENT_TYPE_MISMATCH, "Not level1 or level2_sec
#include <locale> // std::wstring_convert
#include <string>
#include <algorithm> // for transform()
#include <mutex>
#include <unordered_map>
#include <chrono>
#include <thread>
#include <stack>
#include <mutex>
#include <memory>
#ifdef _MSC_VER
#include <codecvt> // std::codecvt_utf8
#endif
Expand Down Expand Up @@ -1004,4 +1006,47 @@ static inline std::wstring FormatWin32Error(DWORD error)
return res;
}
#endif // _WIN32

// Very simple version of thread-safe stack. Add other functions as needed.
template<typename T>
class conc_stack
{
public:
typedef typename std::stack<T>::value_type value_type;

conc_stack() {}

value_type pop_or_create(std::function<value_type()> factory)
{
std::lock_guard<std::mutex> g(m_locker);
if (m_stack.size() == 0)
return factory();
auto res = std::move(m_stack.top());
m_stack.pop();
return res;
}

void push(const value_type& item)
{
std::lock_guard<std::mutex> g(m_locker);
m_stack.push(item);
}

void push(value_type&& item)
{
std::lock_guard<std::mutex> g(m_locker);
m_stack.push(std::forward<value_type>(item));
}

public:
conc_stack(const conc_stack&) = delete;
conc_stack& operator=(const conc_stack&) = delete;
conc_stack(conc_stack&&) = delete;
conc_stack& operator=(conc_stack&&) = delete;

private:
std::stack<value_type> m_stack;
std::mutex m_locker;
};

#endif // _BASETYPES_
37 changes: 24 additions & 13 deletions DataReader/ImageReader/ImageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ITransform
class CropTransform : public ITransform
{
public:
CropTransform(unsigned int seed) : m_rng(seed)
CropTransform(unsigned int seed) : m_seed(seed)
{
}

Expand Down Expand Up @@ -70,22 +70,27 @@ class CropTransform : public ITransform

void Apply(cv::Mat& mat)
{
auto seed = m_seed;
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });

double ratio = 1;
switch (m_jitterType)
{
case RatioJitterType::None:
ratio = m_cropRatioMin;
break;
case RatioJitterType::UniRatio:
ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(m_rng);
ratio = UniRealT(m_cropRatioMin, m_cropRatioMax)(*rng);
assert(m_cropRatioMin <= ratio && ratio < m_cropRatioMax);
break;
default:
RuntimeError("Jitter type currently not implemented.");
}
mat = mat(GetCropRect(m_cropType, mat.rows, mat.cols, ratio));
if (m_hFlip && std::bernoulli_distribution()(m_rng))
mat = mat(GetCropRect(m_cropType, mat.rows, mat.cols, ratio, *rng));
if (m_hFlip && std::bernoulli_distribution()(*rng))
cv::flip(mat, mat, 1);

m_rngs.push(std::move(rng));
}

private:
Expand Down Expand Up @@ -130,7 +135,7 @@ class CropTransform : public ITransform
RuntimeError("Invalid jitter type: %s.", src.c_str());
}

cv::Rect GetCropRect(CropType type, int crow, int ccol, double cropRatio)
cv::Rect GetCropRect(CropType type, int crow, int ccol, double cropRatio, std::mt19937& rng)
{
assert(crow > 0);
assert(ccol > 0);
Expand All @@ -146,8 +151,8 @@ class CropTransform : public ITransform
yOff = (crow - cropSize) / 2;
break;
case CropType::Random:
xOff = UniIntT(0, ccol - cropSize)(m_rng);
yOff = UniIntT(0, crow - cropSize)(m_rng);
xOff = UniIntT(0, ccol - cropSize)(rng);
yOff = UniIntT(0, crow - cropSize)(rng);
break;
default:
assert(false);
Expand All @@ -159,8 +164,8 @@ class CropTransform : public ITransform
}

private:
// REVIEW alexeyk: currently not thread safe. Engines are expensive to create.
std::mt19937 m_rng;
unsigned int m_seed;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;

CropType m_cropType;
double m_cropRatioMin;
Expand All @@ -172,7 +177,7 @@ class CropTransform : public ITransform
class ScaleTransform : public ITransform
{
public:
ScaleTransform(int dataType, unsigned int seed) : m_dataType(dataType), m_rng(seed)
ScaleTransform(int dataType, unsigned int seed) : m_dataType(dataType), m_seed(seed)
{
assert(m_dataType == CV_32F || m_dataType == CV_64F);

Expand Down Expand Up @@ -211,15 +216,21 @@ class ScaleTransform : public ITransform
if (mat.type() != CV_MAKETYPE(m_dataType, m_imgChannels))
mat.convertTo(mat, m_dataType);

auto seed = m_seed;
auto rng = m_rngs.pop_or_create([seed]() { return std::make_unique<std::mt19937>(seed); });

assert(m_interp.size() > 0);
cv::resize(mat, mat, cv::Size(static_cast<int>(m_imgWidth), static_cast<int>(m_imgHeight)), 0, 0,
m_interp[UniIntT(0, static_cast<int>(m_interp.size()) - 1)(m_rng)]);
m_interp[UniIntT(0, static_cast<int>(m_interp.size()) - 1)(*rng)]);

m_rngs.push(std::move(rng));
}

private:
using UniIntT = std::uniform_int_distribution<int>;
// REVIEW alexeyk: currently not thread safe. Engines are expensive to create.
std::mt19937 m_rng;

unsigned int m_seed;
conc_stack<std::unique_ptr<std::mt19937>> m_rngs;

int m_dataType;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
{
EvaluateThisNodeS(m_functionValues, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_maxIndexes0, m_maxIndexes1, m_maxValues, m_topK, m_workspace, shared_from_this());
EvaluateThisNodeS(m_functionValues, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_maxIndexes0, m_maxIndexes1, m_maxValues, m_topK, shared_from_this());
}

void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues0, const Matrix<ElemType>& inputFunctionValues1, Matrix<ElemType>& maxIndexes0, Matrix<ElemType>& maxIndexes1, Matrix<ElemType>& maxValues, ComputationNodePtr curNode)
Expand Down
3 changes: 1 addition & 2 deletions Math/CNTKMathTest/MatrixUnitTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,10 +845,9 @@ namespace CNTKMathTest
Matrix<float> actual(3, 2, src, matrixFlagNormal, deviceId);
Matrix<float> actualIdx(deviceId);
Matrix<float> actualVal(deviceId);
Matrix<float> temp(deviceId);

int topK = 2;
actual.VectorMax(actualIdx, actualVal, true, topK, temp);
actual.VectorMax(actualIdx, actualVal, true, topK);
Assert::IsTrue(actualIdx.IsEqualTo(expIdx));
Assert::IsTrue(actualVal.IsEqualTo(expVal));
}
Expand Down
15 changes: 11 additions & 4 deletions Math/Math/GPUMatrix.cu
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>::~GPUMatrix(void)
{
Clear();
if (m_workspace != nullptr)
delete m_workspace;
}

template<class ElemType>
Expand Down Expand Up @@ -2950,7 +2952,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

template<class ElemType>
void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK, GPUMatrix<ElemType>& workspace) const
void GPUMatrix<ElemType>::VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
throw std::logic_error("VectorMax: Matrix is empty.");
Expand Down Expand Up @@ -3005,9 +3007,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
cbtemp = ctemp * sizeof(ElemType);
// ElemType count needed to store indices, accounting for natural alignment for uint64_t type.
size_t cidx = ((celt + 1) * sizeof(uint64_t) - 1 + sizeof(ElemType) - 1) / sizeof(ElemType);
// Prepare temp workspace.
auto deviceId = m_computeDevice;
assert(m_workspace != nullptr);
auto workspace = m_workspace->pop_or_create([deviceId]() { return std::make_unique<GPUMatrix<ElemType>>(deviceId); });
// Resize to store: output values for the 1st and 2nd passes, input indices, output indices, and temp storage.
workspace.Resize(m, 2 * n + (2 * cidx + ctemp + m - 1) / m);
outVal1 = workspace.m_pArray;
workspace->Resize(m, 2 * n + (2 * cidx + ctemp + m - 1) / m);
outVal1 = workspace->m_pArray;
outVal2 = outVal1 + celt;
inIdx = reinterpret_cast<uint64_t*>(outVal2 + celt);
// Align indices pointer if needed.
Expand All @@ -3016,7 +3022,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
reinterpret_cast<uint8_t*&>(inIdx) += sizeof(uint64_t) - cbAlign;
outIdx = inIdx + celt;
void* ptmp = outIdx + celt;
assert(reinterpret_cast<ElemType*>(reinterpret_cast<uint8_t*>(ptmp) + cbtemp) <= workspace.m_pArray + workspace.GetNumElements());
assert(reinterpret_cast<ElemType*>(reinterpret_cast<uint8_t*>(ptmp) + cbtemp) <= workspace->m_pArray + workspace->GetNumElements());

// Initialize indices.
const int ThreadsPerBlock = 128;
Expand All @@ -3032,6 +3038,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
cblock = (topK * n + ThreadsPerBlock - 1) / ThreadsPerBlock;
_copyTopKResults<<<cblock, ThreadsPerBlock, 0, t_stream>>>(inIdx, outVal2, maxIndexes.m_pArray, maxValues.m_pArray, m, n, topK);

m_workspace->push(std::move(workspace));
#ifndef _DEBUG
UNUSED(err);
#endif
Expand Down
6 changes: 5 additions & 1 deletion Math/Math/GPUMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
static cublasHandle_t s_cuHandle[MaxGpus];
static void *s_curandGenerator;

// Have to use naked pointer to avoid issues with __declspec(dllexport) on Windows.
// REVIEW alexeyk: can be allocated lazily but the current footprint is small anyway.
mutable conc_stack<std::unique_ptr<GPUMatrix<ElemType>>>* m_workspace = new conc_stack<std::unique_ptr<GPUMatrix<ElemType>>>;

private:
void performInplaceFunction(int kind);
size_t LocateElement (const size_t i, const size_t j) const;
Expand Down Expand Up @@ -295,7 +299,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
GPUMatrix<ElemType>& AddFoldedPositiveAndShiftedNegSample(const GPUMatrix<ElemType>& a, const size_t posNumber, const size_t negNumber, const size_t shiftNumber);

void VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK, GPUMatrix<ElemType>& workspace) const;
void VectorMax(GPUMatrix<ElemType>& maxIndexes, GPUMatrix<ElemType>& maxValues, const bool isColWise, int topK) const;
void VectorMin(GPUMatrix<ElemType>& minIndexes, GPUMatrix<ElemType>& minValues, const bool isColWise) const;

GPUMatrix<ElemType>& AssignNumOfDiff(const GPUMatrix<ElemType>& a, const GPUMatrix<ElemType>& b, bool searchInCol = false);
Expand Down
5 changes: 2 additions & 3 deletions Math/Math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3400,20 +3400,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}

template<class ElemType>
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK, Matrix<ElemType>& workspace) const
void Matrix<ElemType>::VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const
{
if (IsEmpty())
throw std::logic_error("VectorMax: Matrix is empty.");

DecideAndMoveToRightDevice(*this, maxIndexes, maxValues);
maxIndexes.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
maxValues.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);
workspace.SwitchToMatrixType(GetMatrixType(), GetFormat(), false);

DISPATCH_MATRIX_ON_FLAG(this,
&maxValues,
this->m_CPUMatrix->VectorMax(*maxIndexes.m_CPUMatrix, *maxValues.m_CPUMatrix, isColWise, topK); maxIndexes.SetDataLocation(CPU, DENSE),
this->m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK, *workspace.m_GPUMatrix); maxIndexes.SetDataLocation(GPU, DENSE),
this->m_GPUMatrix->VectorMax(*maxIndexes.m_GPUMatrix, *maxValues.m_GPUMatrix, isColWise, topK); maxIndexes.SetDataLocation(GPU, DENSE),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
Expand Down
2 changes: 1 addition & 1 deletion Math/Math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
Matrix<ElemType>& AssignSignOf(const Matrix<ElemType>& a);
Matrix<ElemType>& AddSignOf(const Matrix<ElemType>& a);
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise) const;
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK, Matrix<ElemType>& workspace) const;
void VectorMax(Matrix<ElemType>& maxIndexes, Matrix<ElemType>& maxValues, const bool isColWise, int topK) const;
void VectorMin(Matrix<ElemType>& minIndexes, Matrix<ElemType>& minValues, const bool isColWise) const;

Matrix<ElemType>& AssignNumOfDiff(const Matrix<ElemType>& a, const Matrix<ElemType>& b, bool searchInCol = false);
Expand Down

0 comments on commit a5acf9d

Please sign in to comment.