Skip to content

Commit

Permalink
CuDNN memory limit: switches to a default of 512 MB
Browse files Browse the repository at this point in the history
  • Loading branch information
vedaldi committed Dec 2, 2015
1 parent 712f6ec commit 9cb380a
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 21 deletions.
1 change: 1 addition & 0 deletions examples/cnn_train.m
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
if start >= 1
fprintf('%s: resuming by loading epoch %d\n', mfilename, start) ;
load(modelPath(start), 'net', 'info') ;
net = vl_simplenn_tidy(net) ; % just in case MatConvNet was updated
end

for epoch=start+1:opts.numEpochs
Expand Down
22 changes: 22 additions & 0 deletions examples/imagenet/cnn_imagenet_deploy.m
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@
net = simpleRemoveMomentum(net) ;
end

% Switch to use MatConvNet default memory limit for CuDNN (512 MB)
if ~isDag
for l = simpleFindLayersOfType(net, 'conv')
net.layers{l}.opts = removeCuDNNMemoryLimit(net.layers{l}.opts) ;
end
else
for l = dagFindLayersOfType('dagnn.Conv')
net.layers(l).block.opts = removeCuDNNMemoryLimit(net.layers(l).block.opts) ;
end
end

% -------------------------------------------------------------------------
function opts = removeCuDNNMemoryLimit(opts)
% -------------------------------------------------------------------------
remove = false(numel(opts)) ;
for i = 1:numel(opts)
if isstr(opts{i}) && srcmp(lower(opts{i}), 'CudnnWorkspaceLimit')
remove([i i+1]) = true ;
end
end
opts = opts(~remove) ;

% -------------------------------------------------------------------------
function net = simpleRemoveMomentum(net)
% -------------------------------------------------------------------------
Expand Down
12 changes: 6 additions & 6 deletions matlab/src/bits/datacu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -137,21 +137,21 @@ void
vl::CudaHelper::resetCudnnConvolutionSettings()
{
cudnnConvolutionFwdSpecificAlgo = false ;
cudnnConvolutionFwdPreference = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST ;
cudnnConvolutionFwdPreference = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT ;
cudnnConvolutionFwdAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM ;
cudnnConvolutionFwdWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
cudnnConvolutionFwdWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
cudnnConvolutionFwdWorkSpaceUsed = 0 ;

cudnnConvolutionBwdFilterSpecificAlgo = false ;
cudnnConvolutionBwdFilterPreference = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST ;
cudnnConvolutionBwdFilterPreference = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT ;
cudnnConvolutionBwdFilterAlgo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 ;
cudnnConvolutionBwdFilterWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
cudnnConvolutionBwdFilterWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
cudnnConvolutionBwdFilterWorkSpaceUsed = 0 ;

cudnnConvolutionBwdDataSpecificAlgo = false ;
cudnnConvolutionBwdDataPreference = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST ;
cudnnConvolutionBwdDataPreference = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT ;
cudnnConvolutionBwdDataAlgo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 ;
cudnnConvolutionBwdDataWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
cudnnConvolutionBwdDataWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
cudnnConvolutionBwdDataWorkSpaceUsed = 0 ;
}

Expand Down
63 changes: 52 additions & 11 deletions matlab/src/vl_nnconvt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,22 @@ enum {
opt_no_der_biases,
opt_cudnn,
opt_no_cudnn,
opt_transpose
opt_cudnn_workspace_limit,
} ;

/* options */
vlmxOption options [] = {
{"Upsample", 1, opt_upsample },
{"Crop", 1, opt_crop },
{"Verbose", 0, opt_verbose },
{"NumGroups", 1, opt_num_groups },
{"NoDerData", 0, opt_no_der_data },
{"NoDerFilters", 0, opt_no_der_filters },
{"NoderBiases", 0, opt_no_der_biases },
{"CUDNN", 0, opt_cudnn },
{"NoCUDNN", 0, opt_no_cudnn },
{0, 0, 0 }
{"Upsample", 1, opt_upsample },
{"Crop", 1, opt_crop },
{"Verbose", 0, opt_verbose },
{"NumGroups", 1, opt_num_groups },
{"NoDerData", 0, opt_no_der_data },
{"NoDerFilters", 0, opt_no_der_filters },
{"NoDerBiases", 0, opt_no_der_biases },
{"CUDNN", 0, opt_cudnn },
{"NoCUDNN", 0, opt_no_cudnn },
{"CudnnWorkSpaceLimit", 1, opt_cudnn_workspace_limit },
{0, 0, 0 }
} ;

/* ---------------------------------------------------------------- */
Expand Down Expand Up @@ -196,6 +197,32 @@ void mexFunction(int nout, mxArray *out[],
#endif
break ;

case opt_cudnn_workspace_limit :
{
#if ENABLE_CUDNN
double x ;
if (!mxIsScalar(optarg) || (x = mxGetScalar(optarg)) < 0) {
mexErrMsgTxt("CudnnWorkSpaceLimit is not a non-negative scalar.") ;
}
context.getCudaHelper().setCudnnConvolutionFwdPreference
((std::isinf(x) ?
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST :
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT),
(size_t)x) ;
context.getCudaHelper().setCudnnConvolutionBwdFilterPreference
((std::isinf(x) ?
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST :
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT),
(size_t)x) ;
context.getCudaHelper().setCudnnConvolutionBwdDataPreference
((std::isinf(x) ?
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST :
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT),
(size_t)x) ;
break ;
#endif
}

default: break ;
}
}
Expand Down Expand Up @@ -350,6 +377,20 @@ void mexFunction(int nout, mxArray *out[],
cropTop, cropBottom, cropLeft, cropRight) ;
}

if (verbosity > 0) {
#if ENABLE_CUDNN
if (context.getCudaHelper().getCudnnEnabled()) {
mexPrintf("vl_nnconvt: cuDNN workspace used: "
"fwd %.2f MB"
", bwd filter %.2f MB"
", bwd data %.2f MB\n",
(double)context.getCudaHelper().getCudnnConvolutionFwdWorkSpaceUsed() / (1024*1024),
(double)context.getCudaHelper().getCudnnConvolutionBwdFilterWorkSpaceUsed() / (1024*1024),
(double)context.getCudaHelper().getCudnnConvolutionBwdDataWorkSpaceUsed() / (1024*1024)) ;
}
#endif
}

/* -------------------------------------------------------------- */
/* Cleanup */
/* -------------------------------------------------------------- */
Expand Down
9 changes: 5 additions & 4 deletions matlab/vl_nnconv.m
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@
% sticks until MATLAB purges the MEX files for any reason).
%
% Some CuDNN algorithms may use a very large amount of memory on the
% GPU (workspace). MatConvNet chooses always the fastest, which
% might not be the most economical. To change this behaviour, use
% the `CudnnWorskpaceLimit` option to specify the maximum size of
% the workspace in bytes (set to +inf to remove the limit).
% GPU (workspace). MatConvNet requests CuDNN to use at most 512MB of
% GPU memory for the workspace. To change this behaviour, use the
% `CudnnWorskpaceLimit` option to specify the maximum size of the
% workspace in bytes. Set this parameter +inf to remove the limit
% and use the `Verbose` flag to check how much memory is being used.

% Copyright (C) 2014 Andrea Vedaldi and Max Jaderberg.
% Copyright (C) 2015 Andrea Vedaldi.
Expand Down
8 changes: 8 additions & 0 deletions matlab/vl_nnconvt.m
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@
% trigger current bugs in cuDNN). You can use the 'NoCuDNN' option
% to disable cuDNN or 'cuDNN' to activate it back again (the choice
% sticks until MATLAB purges the MEX files for any reason).
%
% Some CuDNN algorithms may use a very large amount of memory on the
% GPU (workspace). MatConvNet requests CuDNN to use at most 512MB of
% GPU memory for the workspace. To change this behaviour, use the
% `CudnnWorskpaceLimit` option to specify the maximum size of the
% workspace in bytes. Set this parameter +inf to remove the limit
% and use the `Verbose` flag to check how much memory is being used.


% Copyright (C) 2015 Andrea Vedaldi.
% All rights reserved.
Expand Down
3 changes: 3 additions & 0 deletions utils/evaluate_ref_models.m
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ function evaluate_ref_models()
addpath(fullfile(fileparts(mfilename('fullpath')), '..','examples', 'imagenet')) ;

models = {...
'matconvnet-alex', ...
'matconvnet-vgg-f', ...
'matconvnet-vgg-m', ...
'caffe-ref', ...
'caffe-alex', ...
'vgg-s', ...
Expand Down

0 comments on commit 9cb380a

Please sign in to comment.