Skip to content

Commit

Permalink
cudnn: enabled build on Linux with cuDNN.
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Kamenev committed Dec 12, 2015
1 parent 895c10a commit 3023f11
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 65 deletions.
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
# If not specified, GPU will not be enabled
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
# defaults to /usr/local/cub-1.4.1
# CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
# If not specified, CNTK will be be built without cuDNN.
# KALDI_PATH= Path to Kaldi
# If not specified, Kaldi plugins will not be built
# OPENCV_PATH= path to OpenCV 3.0.0 installation, so $(OPENCV_PATH) exists
Expand Down Expand Up @@ -102,6 +104,13 @@ ifdef CUDA_PATH
LIBPATH += $(CUDA_PATH)/lib64
LIBS += -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml
# Set up cuDNN if needed
ifdef CUDNN_PATH
INCLUDEPATH += $(CUDNN_PATH)/cuda/include
LIBPATH += $(CUDNN_PATH)/cuda/lib64
LIBS += -lcudnn
CPPFLAGS +=-DUSE_CUDNN
endif
else
DEVICE = cpu
Expand Down Expand Up @@ -218,13 +227,15 @@ MATH_SRC =\
Math/Math/QuantizedMatrix.cpp \
Math/Math/Matrix.cpp \
Math/Math/CUDAPageLockedMemAllocator.cpp \
Math/Math/ConvolutionEngine.cpp \
ifdef CUDA_PATH
MATH_SRC +=\
Math/Math/GPUMatrix.cu \
Math/Math/GPUSparseMatrix.cu \
Math/Math/GPUWatcher.cu \
Math/Math/MatrixQuantizerGPU.cu \
Math/Math/CuDnnConvolutionEngine.cpp \
else
MATH_SRC +=\
Expand Down
136 changes: 83 additions & 53 deletions Math/Math/ConvolutionEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,62 +11,15 @@
namespace Microsoft { namespace MSR { namespace CNTK {

template<class ElemType>
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
class DefaultConvolutionEngine : public ConvolutionEngine<ElemType>
{
public:
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
: ConvolutionEngineFactory<ElemType>(deviceId)
{
}

public:
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
{
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
}

FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
{
return std::make_unique<Filter>(w, h, c, k);
}

ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
size_t wStride, size_t hStride, bool padding) override
{
return std::make_unique<ConvDesc>(wStride, hStride, padding);
}

PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
{
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
}

ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
{
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
}

PoolEnginePtr CreatePoolEngine() override
{
return std::make_unique<DefaultPoolingEngine<ElemType>>();
}
};
using Base = ConvolutionEngine<ElemType>;
using typename Base::Mat;
using typename Base::Tensor4D;
using typename Base::Filter;
using typename Base::ConvDesc;

template<class ElemType>
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
{
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
}

template class ConvolutionEngineFactory<float>;
template class ConvolutionEngineFactory<double>;

template<class ElemType>
class DefaultConvolutionEngine : public ConvolutionEngine<ElemType>
{
public:
DefaultConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
: m_tempMatrix(deviceId), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
Expand Down Expand Up @@ -294,6 +247,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class DefaultPoolingEngine : public PoolingEngine<ElemType>
{
public:
using Base = PoolingEngine<ElemType>;
using typename Base::Tensor4D;
using typename Base::PoolDesc;
using typename Base::Mat;

public:
void Forward(const Tensor4D& inT, const Mat& in, const PoolDesc& poolDesc, const Tensor4D& outT, Mat& out) override
{
Expand Down Expand Up @@ -349,4 +308,75 @@ namespace Microsoft { namespace MSR { namespace CNTK {

template class PoolingEngine<float>;
template class PoolingEngine<double>;

template<class ElemType>
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
{
public:
using Base = ConvolutionEngineFactory<ElemType>;
using typename Base::Tensor4D;
using typename Base::Tensor4DPtr;
using typename Base::Filter;
using typename Base::FilterPtr;
using typename Base::ConvDesc;
using typename Base::ConvDescPtr;
using typename Base::PoolDesc;
using typename Base::PoolDescPtr;

using typename Base::ConvEnginePtr;
using typename Base::PoolEnginePtr;

using Base::m_deviceId;

public:
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
: ConvolutionEngineFactory<ElemType>(deviceId)
{
}

public:
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
{
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
}

FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
{
return std::make_unique<Filter>(w, h, c, k);
}

ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
size_t wStride, size_t hStride, bool padding) override
{
return std::make_unique<ConvDesc>(wStride, hStride, padding);
}

PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
{
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
}

ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
{
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
}

PoolEnginePtr CreatePoolEngine() override
{
return std::make_unique<DefaultPoolingEngine<ElemType>>();
}
};

template<class ElemType>
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
{
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
//if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
//return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
}

template class ConvolutionEngineFactory<float>;
template class ConvolutionEngineFactory<double>;

}}}
27 changes: 17 additions & 10 deletions Math/Math/CuDnnConvolutionEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#ifdef USE_CUDNN
#include <cudnn.h>

template<> static const char* CudaErrString(cudnnStatus_t x)
template<> const char* CudaErrString(cudnnStatus_t x)
{
return cudnnGetErrorString(x);
}
Expand Down Expand Up @@ -202,12 +202,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
class CuDnnConvolutionEngine : public ConvolutionEngine<ElemType>
{
public:
using Tensor4D = ConvolutionTensor4D;
using Tensor4DPtr = std::unique_ptr<Tensor4D>;
using Filter = ConvolutionFilter;
using FilterPtr = std::unique_ptr<ConvolutionFilter>;
using ConvDesc = ConvolutionDescriptor;
using ConvDescPtr = std::unique_ptr<ConvolutionDescriptor>;
using Base = ConvolutionEngine<ElemType>;
using typename Base::Mat;
using typename Base::Tensor4D;
using typename Base::Filter;
using typename Base::ConvDesc;

CuDnnConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
: m_maxTempMemSizeInSamples(maxTempMemSizeInSamples), m_cudnn(nullptr), m_curMBSize(0), m_tempC(deviceId)
Expand Down Expand Up @@ -495,6 +494,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class CuDnnPoolingEngine : public PoolingEngine<ElemType>
{
public:
using Base = PoolingEngine<ElemType>;
using typename Base::Tensor4D;
using typename Base::PoolDesc;
using typename Base::Mat;

public:
CuDnnPoolingEngine()
: m_cudnn(nullptr)
Expand Down Expand Up @@ -546,7 +551,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
typename CuDnnConvolutionEngineFactory<ElemType>::Tensor4DPtr CuDnnConvolutionEngineFactory<ElemType>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
{
static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
// REVIEW alexeyk: assert fires in GCC but not in VC++.
//static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
}
template<>
typename CuDnnConvolutionEngineFactory<float>::Tensor4DPtr CuDnnConvolutionEngineFactory<float>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
Expand All @@ -562,7 +568,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
typename CuDnnConvolutionEngineFactory<ElemType>::FilterPtr CuDnnConvolutionEngineFactory<ElemType>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
{
static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
// REVIEW alexeyk: assert fires in GCC but not in VC++.
//static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
}
template<>
typename CuDnnConvolutionEngineFactory<float>::FilterPtr CuDnnConvolutionEngineFactory<float>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
Expand All @@ -586,7 +593,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

template<class ElemType>
typename CuDnnConvolutionEngineFactory<ElemType>::PoolDescPtr CuDnnConvolutionEngineFactory<ElemType>::CreatePoolDescriptor(
PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
{
return std::make_unique<CuDnnPoolingDescriptor>(kind, w, h, wStride, hStride, wPad, hPad);
}
Expand Down
20 changes: 18 additions & 2 deletions Math/Math/CuDnnConvolutionEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class CuDnnConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
{
public:
using Base = ConvolutionEngineFactory<ElemType>;
using typename Base::Tensor4D;
using typename Base::Tensor4DPtr;
using typename Base::Filter;
using typename Base::FilterPtr;
using typename Base::ConvDesc;
using typename Base::ConvDescPtr;
using typename Base::PoolDesc;
using typename Base::PoolDescPtr;

using typename Base::ConvEnginePtr;
using typename Base::PoolEnginePtr;

using Base::m_deviceId;

public:
CuDnnConvolutionEngineFactory(DEVICEID_TYPE deviceId)
: ConvolutionEngineFactory<ElemType>(deviceId)
Expand All @@ -22,9 +38,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override;
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override;
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
size_t wStride, size_t hStride, bool padding) override;
PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;
PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;

ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override;
PoolEnginePtr CreatePoolEngine() override;
Expand Down
45 changes: 45 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ have_cub=no
cub_path=
cub_check=cub/cub.cuh

have_cudnn=no
cudnn_path=
cudnn_check=cuda/include/cudnn.h

have_opencv=no
opencv_path=
opencv_check=include/opencv2/opencv.hpp
Expand All @@ -49,6 +53,7 @@ default_cudas="cuda-7.5 cuda-7.0 cuda-6.5"
default_kaldis="kaldi-trunk"
default_gdks=". gdk/usr"
default_cubs="cub-1.4.1"
default_cudnns="cudnn-4.0"
default_opencvs="opencv-3.0.0"

function default_paths ()
Expand Down Expand Up @@ -115,6 +120,11 @@ function find_cub ()
find_dir "$default_cubs" "$cub_check"
}

function find_cudnn ()
{
find_dir "$default_cudnns" "$cudnn_check"
}

function find_opencv ()
{
find_dir "$default_opencvs" "$opencv_check"
Expand Down Expand Up @@ -162,6 +172,7 @@ function show_help ()
echo " --with-cuda[=directory] $(show_default $(find_cuda))"
echo " --with-cub[=directory] $(show_default $(find_cub))"
echo " --with-gdk[=directory] $(show_default $(find_gdk))"
echo " --with-cudnn[=directory] $(show_default $(find_cudnn))"
echo " --with-acml[=directory] $(show_default $(find_acml))"
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
Expand Down Expand Up @@ -278,6 +289,27 @@ do
fi
fi
;;
--with-cudnn*)
have_cudnn=yes
if test x$optarg = x
then
cudnn_path=$(find_cudnn)
if test x$cudnn_path = x
then
echo "Cannot find NVIDIA cuDNN directory."
echo "Please specify a value for --with-cudnn"
exit 1
fi
else
if test $(check_dir $optarg $cudnn_check) = yes
then
cudnn_path=$optarg
else
echo "Invalid cuDNN directory $optarg"
exit 1
fi
fi
;;
--with-acml*)
have_acml=yes
mathlib=acml
Expand Down Expand Up @@ -448,6 +480,18 @@ then
echo Found CUB at $cub_path
fi
fi

if test $enable_cuda = yes && test x$cudnn_path = x
then
cudnn_path=$(find_cudnn)
if test x$cudnn_path = x ; then
echo Cannot locate NVIDIA cuDNN directory
echo CNTK will use default convolution engine.
else
echo Found cuDNN at $cudnn_path
fi
fi

config=$build_top/Config.make
echo Generating $config
echo "#Configuration file for cntk" > $config
Expand All @@ -465,6 +509,7 @@ if test $enable_cuda = yes ; then
echo CUDA_PATH=$cuda_path >> $config
echo GDK_PATH=$gdk_path >> $config
echo CUB_PATH=$cub_path >> $config
echo CUDNN_PATH=$cudnn_path >> $config
fi
if test x$kaldi_path != x ; then
echo KALDI_PATH=$kaldi_path >> $config
Expand Down

0 comments on commit 3023f11

Please sign in to comment.