Skip to content

Commit

Permalink
Updated conv auto-tuner to prevent issues with distributed reading.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Kamenev committed Feb 24, 2016
1 parent e2ded37 commit 032c6b7
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions Source/Math/CuDnnConvolutionEngine.cu
Original file line number Diff line number Diff line change
Expand Up @@ -519,10 +519,7 @@ public:
private:
void FindBestForwardAlgo(const CuDnnTensor4D& inT, const CuDnnFilter& filtT, const CuDnnConvolutionDescriptor& convDesc, const CuDnnTensor4D& outT)
{
// Need to re-run auto-tuner in case batch size has been changed.
// We assume no other dimensions of tensors can change so we don't check it.
// REVIEW alexeyk: is this a safe assumption? Can convolution configuration change in runtime?
if (m_fwdAlgo.Algo.status == CUDNN_STATUS_SUCCESS && inT.n() == m_fwdAlgo.CurMBSize && outT.n() == m_fwdAlgo.CurMBSize)
if (!m_fwdAlgo.NeedAutotuning(inT, outT))
return;
const int MaxAlgoCount = 10;
int calgo = 0;
Expand All @@ -543,7 +540,7 @@ private:

void FindBestBackwardDataAlgo(const CuDnnFilter& filtT, const CuDnnTensor4D& srcGradT, const CuDnnConvolutionDescriptor& convDesc, const CuDnnTensor4D& gradT)
{
if (m_backDataAlgo.Algo.status == CUDNN_STATUS_SUCCESS && srcGradT.n() == m_backDataAlgo.CurMBSize && gradT.n() == m_backDataAlgo.CurMBSize)
if (!m_backDataAlgo.NeedAutotuning(srcGradT, gradT))
return;
const int MaxAlgoCount = 10;
int calgo = 0;
Expand All @@ -564,7 +561,7 @@ private:

void FindBestBackwardFilterAlgo(const CuDnnTensor4D& inT, const CuDnnTensor4D& srcGradT, const CuDnnConvolutionDescriptor& convDesc, const CuDnnFilter& filtT)
{
if (m_backFiltAlgo.Algo.status == CUDNN_STATUS_SUCCESS && inT.n() == m_backFiltAlgo.CurMBSize && srcGradT.n() == m_backFiltAlgo.CurMBSize)
if (!m_backFiltAlgo.NeedAutotuning(inT, srcGradT))
return;
const int MaxAlgoCount = 10;
int calgo = 0;
Expand Down Expand Up @@ -595,6 +592,16 @@ private:
// Current mini-batch size, needed for re-computing statistics in auto-tuner.
size_t CurMBSize;
T Algo;

bool NeedAutotuning(const CuDnnTensor4D& t1, const CuDnnTensor4D& t2)
{
// Need to re-run auto-tuner in case minibatch size is increased.
// If minibatch size is decreased we assume that previously selected algorithm requires less or the same amount of workspace.
// This is done to avoid re-running auto-tuner every time in case minibatch size changes frequently (e.g. when distributed reading is enabled).
// REVIEW alexeyk: potentially, this might cause some perf issues if better (faster) algo can be selected for a smaller mininbatch.
// We assume no other dimensions of tensors can change so we don't check it.
return (Algo.status != CUDNN_STATUS_SUCCESS || t1.n() > CurMBSize || t2.n() > CurMBSize);
}
};

using C = Consts<ElemType>;
Expand Down

0 comments on commit 032c6b7

Please sign in to comment.