CuDNN memory limit: switches to a default of 512 MB

KingSpencer · Dec 2, 2015 · 9cb380a · 9cb380a
1 parent 712f6ec
commit 9cb380a
Show file tree

Hide file tree

Showing 7 changed files with 97 additions and 21 deletions.
diff --git a/examples/cnn_train.m b/examples/cnn_train.m
@@ -113,6 +113,7 @@
 if start >= 1
   fprintf('%s: resuming by loading epoch %d\n', mfilename, start) ;
   load(modelPath(start), 'net', 'info') ;
+  net = vl_simplenn_tidy(net) ; % just in case MatConvNet was updated
 end
 
 for epoch=start+1:opts.numEpochs

diff --git a/examples/imagenet/cnn_imagenet_deploy.m b/examples/imagenet/cnn_imagenet_deploy.m
@@ -28,6 +28,28 @@
   net = simpleRemoveMomentum(net) ;
 end
 
+% Switch to use MatConvNet default memory limit for CuDNN (512 MB)
+if ~isDag
+  for l = simpleFindLayersOfType(net, 'conv')
+    net.layers{l}.opts = removeCuDNNMemoryLimit(net.layers{l}.opts) ;
+  end
+else
+  for l = dagFindLayersOfType('dagnn.Conv')
+    net.layers(l).block.opts = removeCuDNNMemoryLimit(net.layers(l).block.opts) ;
+  end
+end
+
+% -------------------------------------------------------------------------
+function opts = removeCuDNNMemoryLimit(opts)
+% -------------------------------------------------------------------------
+remove = false(numel(opts)) ;
+for i = 1:numel(opts)
+  if isstr(opts{i}) && srcmp(lower(opts{i}), 'CudnnWorkspaceLimit')
+    remove([i i+1]) = true ;
+  end
+end
+opts = opts(~remove) ;
+
 % -------------------------------------------------------------------------
 function net = simpleRemoveMomentum(net)
 % -------------------------------------------------------------------------

diff --git a/matlab/src/bits/datacu.cu b/matlab/src/bits/datacu.cu
@@ -137,21 +137,21 @@ void
 vl::CudaHelper::resetCudnnConvolutionSettings()
 {
   cudnnConvolutionFwdSpecificAlgo = false ;
-  cudnnConvolutionFwdPreference = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST ;
+  cudnnConvolutionFwdPreference = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT ;
   cudnnConvolutionFwdAlgo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM ;
-  cudnnConvolutionFwdWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
+  cudnnConvolutionFwdWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
   cudnnConvolutionFwdWorkSpaceUsed = 0 ;
 
   cudnnConvolutionBwdFilterSpecificAlgo = false ;
-  cudnnConvolutionBwdFilterPreference = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST ;
+  cudnnConvolutionBwdFilterPreference = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT ;
   cudnnConvolutionBwdFilterAlgo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 ;
-  cudnnConvolutionBwdFilterWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
+  cudnnConvolutionBwdFilterWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
   cudnnConvolutionBwdFilterWorkSpaceUsed = 0 ;
 
   cudnnConvolutionBwdDataSpecificAlgo = false ;
-  cudnnConvolutionBwdDataPreference = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST ;
+  cudnnConvolutionBwdDataPreference = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT ;
   cudnnConvolutionBwdDataAlgo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 ;
-  cudnnConvolutionBwdDataWorkSpaceLimit = 256 * 1024 * 1024 ; // 256MB
+  cudnnConvolutionBwdDataWorkSpaceLimit = 512 * 1024 * 1024 ; // 512MB
   cudnnConvolutionBwdDataWorkSpaceUsed = 0 ;
 }
 

diff --git a/matlab/src/vl_nnconvt.cu b/matlab/src/vl_nnconvt.cu
@@ -35,21 +35,22 @@ enum {
   opt_no_der_biases,
   opt_cudnn,
   opt_no_cudnn,
-  opt_transpose
+  opt_cudnn_workspace_limit,
 } ;
 
 /* options */
 vlmxOption  options [] = {
-  {"Upsample",         1,   opt_upsample           },
-  {"Crop",             1,   opt_crop               },
-  {"Verbose",          0,   opt_verbose            },
-  {"NumGroups",        1,   opt_num_groups         },
-  {"NoDerData",        0,   opt_no_der_data        },
-  {"NoDerFilters",     0,   opt_no_der_filters     },
-  {"NoderBiases",      0,   opt_no_der_biases      },
-  {"CUDNN",            0,   opt_cudnn              },
-  {"NoCUDNN",          0,   opt_no_cudnn           },
-  {0,                  0,   0                      }
+  {"Upsample",              1,   opt_upsample              },
+  {"Crop",                  1,   opt_crop                  },
+  {"Verbose",               0,   opt_verbose               },
+  {"NumGroups",             1,   opt_num_groups            },
+  {"NoDerData",             0,   opt_no_der_data           },
+  {"NoDerFilters",          0,   opt_no_der_filters        },
+  {"NoDerBiases",           0,   opt_no_der_biases         },
+  {"CUDNN",                 0,   opt_cudnn                 },
+  {"NoCUDNN",               0,   opt_no_cudnn              },
+  {"CudnnWorkSpaceLimit",   1,   opt_cudnn_workspace_limit },
+  {0,                       0,   0                         }
 } ;
 
 /* ---------------------------------------------------------------- */
@@ -196,6 +197,32 @@ void mexFunction(int nout, mxArray *out[],
 #endif
         break ;
 
+      case opt_cudnn_workspace_limit :
+      {
+#if ENABLE_CUDNN
+        double x ;
+        if (!mxIsScalar(optarg) || (x = mxGetScalar(optarg)) < 0) {
+          mexErrMsgTxt("CudnnWorkSpaceLimit is not a non-negative scalar.") ;
+        }
+        context.getCudaHelper().setCudnnConvolutionFwdPreference
+        ((std::isinf(x) ?
+          CUDNN_CONVOLUTION_FWD_PREFER_FASTEST :
+          CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT),
+         (size_t)x) ;
+        context.getCudaHelper().setCudnnConvolutionBwdFilterPreference
+        ((std::isinf(x) ?
+          CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST :
+          CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT),
+         (size_t)x) ;
+        context.getCudaHelper().setCudnnConvolutionBwdDataPreference
+        ((std::isinf(x) ?
+          CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST :
+          CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT),
+         (size_t)x) ;
+        break ;
+#endif
+      }
+
       default: break ;
     }
   }
@@ -350,6 +377,20 @@ void mexFunction(int nout, mxArray *out[],
                                  cropTop, cropBottom, cropLeft, cropRight) ;
   }
 
+  if (verbosity > 0) {
+#if ENABLE_CUDNN
+    if (context.getCudaHelper().getCudnnEnabled()) {
+      mexPrintf("vl_nnconvt: cuDNN workspace used: "
+                "fwd %.2f MB"
+                ", bwd filter %.2f MB"
+                ", bwd data %.2f MB\n",
+                (double)context.getCudaHelper().getCudnnConvolutionFwdWorkSpaceUsed() / (1024*1024),
+                (double)context.getCudaHelper().getCudnnConvolutionBwdFilterWorkSpaceUsed() / (1024*1024),
+                (double)context.getCudaHelper().getCudnnConvolutionBwdDataWorkSpaceUsed() / (1024*1024)) ;
+    }
+#endif
+  }
+
   /* -------------------------------------------------------------- */
   /*                                                        Cleanup */
   /* -------------------------------------------------------------- */

diff --git a/matlab/vl_nnconv.m b/matlab/vl_nnconv.m
@@ -67,10 +67,11 @@
 %   sticks until MATLAB purges the MEX files for any reason).
 %
 %   Some CuDNN algorithms may use a very large amount of memory on the
-%   GPU (workspace). MatConvNet chooses always the fastest, which
-%   might not be the most economical. To change this behaviour, use
-%   the `CudnnWorskpaceLimit` option to specify the maximum size of
-%   the workspace in bytes (set to +inf to remove the limit).
+%   GPU (workspace). MatConvNet requests CuDNN to use at most 512MB of
+%   GPU memory for the workspace. To change this behaviour, use the
+%   `CudnnWorskpaceLimit` option to specify the maximum size of the
+%   workspace in bytes. Set this parameter +inf to remove the limit
+%   and use the `Verbose` flag to check how much memory is being used.
 
 % Copyright (C) 2014 Andrea Vedaldi and Max Jaderberg.
 % Copyright (C) 2015 Andrea Vedaldi.

diff --git a/matlab/vl_nnconvt.m b/matlab/vl_nnconvt.m
@@ -71,6 +71,14 @@
 %   trigger current bugs in cuDNN). You can use the 'NoCuDNN' option
 %   to disable cuDNN or 'cuDNN' to activate it back again (the choice
 %   sticks until MATLAB purges the MEX files for any reason).
+%
+%   Some CuDNN algorithms may use a very large amount of memory on the
+%   GPU (workspace). MatConvNet requests CuDNN to use at most 512MB of
+%   GPU memory for the workspace. To change this behaviour, use the
+%   `CudnnWorskpaceLimit` option to specify the maximum size of the
+%   workspace in bytes. Set this parameter +inf to remove the limit
+%   and use the `Verbose` flag to check how much memory is being used.
+
 
 % Copyright (C) 2015 Andrea Vedaldi.
 % All rights reserved.

diff --git a/utils/evaluate_ref_models.m b/utils/evaluate_ref_models.m
@@ -4,6 +4,9 @@ function evaluate_ref_models()
 addpath(fullfile(fileparts(mfilename('fullpath')), '..','examples', 'imagenet')) ;
 
 models = {...
+  'matconvnet-alex', ...
+  'matconvnet-vgg-f', ...
+  'matconvnet-vgg-m', ...
   'caffe-ref', ...
   'caffe-alex', ...
   'vgg-s', ...