Merge branch 'master' of https://github.com/liuzhuang13/DenseNet

cjr0106 · Aug 8, 2017 · d1a7ce2 · d1a7ce2
2 parents 630b3bf + ea07d06
commit d1a7ce2
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -20,9 +20,10 @@ If you find DenseNet useful in your research, please consider citing:
 
 ## Other Implementations
 0. Our [Caffe Implementation](https://github.com/liuzhuang13/DenseNetCaffe)
-0. Our space-efficient [Caffe Implementation](https://github.com/Tongcheng/DN_CaffeScript).
-0. [PyTorch Implementation (with BC structure)](https://github.com/andreasveit/densenet-pytorch) by Andreas Veit.
-0. [PyTorch Implementation (with BC structure)](https://github.com/bamos/densenet.pytorch) by Brandon Amos.
+0. Our memory-efficient [Caffe Implementation](https://github.com/Tongcheng/DN_CaffeScript).
+0. Our memory-efficient [PyTorch Implementation](https://github.com/gpleiss/efficient_densenet_pytorch).
+0. [PyTorch Implementation](https://github.com/andreasveit/densenet-pytorch) by Andreas Veit.
+0. [PyTorch Implementation](https://github.com/bamos/densenet.pytorch) by Brandon Amos.
 0. [MXNet Implementation](https://github.com/Nicatio/Densenet/tree/master/mxnet) by Nicatio.
 0. [MXNet Implementation (supporting ImageNet)](https://github.com/bruinxiong/densenet.mxnet) by Xiong Lin.
 0. [Tensorflow Implementation](https://github.com/YixuanLi/densenet-tensorflow) by Yixuan Li.

diff --git a/models/DenseConnectLayer.lua b/models/DenseConnectLayer.lua
@@ -1,7 +1,6 @@
 require 'nn'
 require 'cudnn'
 require 'cunn'
-local nninit = require 'nninit'
 
 
 local function ShareGradInput(module, key)
@@ -147,4 +146,4 @@ function DenseConnectLayerCustom:__tostring__()
    str = str .. line .. tab .. last .. '{output}'
    str = str .. line .. '}'
    return str
-end
+end
diff --git a/opts.lua b/opts.lua
@@ -39,6 +39,7 @@ function M.parse(arg)
    cmd:option('-LR',              0.1,   'initial learning rate')
    cmd:option('-momentum',        0.9,   'momentum')
    cmd:option('-weightDecay',     1e-4,  'weight decay')
+   cmd:option('-lrShape',         'multistep',    'Learning rate: multistep|cosine')
    ---------- Model options ----------------------------------
    cmd:option('-netType',      'resnet', 'Options: resnet | preresnet')
    cmd:option('-depth',        20,       'ResNet depth: 18 | 34 | 50 | 101 | ...', 'number')

diff --git a/train.lua b/train.lua
@@ -31,7 +31,12 @@ end
 
 function Trainer:train(epoch, dataloader)
    -- Trains the model for a single epoch
-   self.optimState.learningRate = self:learningRate(epoch)
+
+   ------for LR------
+   if self.opt.lrShape == 'multistep' then
+      self.optimState.learningRate = self:learningRate(epoch)
+   end
+   ------for LR------
 
    local timer = torch.Timer()
    local dataTimer = torch.Timer()
@@ -48,6 +53,13 @@ function Trainer:train(epoch, dataloader)
    -- set the batch norm to training mode
    self.model:training()
    for n, sample in dataloader:run() do
+
+      ------for LR------
+      if self.opt.lrShape == 'cosine' then
+         self.optimState.learningRate = self:learningRateCosine(epoch, n, trainSize)
+      end
+      ------for LR------
+
       local dataTime = dataTimer:time().real
 
       -- Copy input and target to the GPU
@@ -69,8 +81,8 @@ function Trainer:train(epoch, dataloader)
       lossSum = lossSum + loss*batchSize
       N = N + batchSize
 
-      print((' | Epoch: [%d][%d/%d]    Time %.3f  Data %.3f  Err %1.4f  top1 %7.3f  top5 %7.3f'):format(
-         epoch, n, trainSize, timer:time().real, dataTime, loss, top1, top5))
+      print((' | Epoch: [%d][%d/%d]   Time %.3f  Data %.3f  Err %1.3f  top1 %7.2f  top5 %7.2f  lr %.4f'):format(
+         epoch, n, trainSize, timer:time().real, dataTime, loss, top1, top5, self.optimState.learningRate))
 
       -- check that the storage didn't get changed due to an unfortunate getParameters call
       assert(self.params:storage() == self.model:parameters()[1]:storage())
@@ -184,4 +196,13 @@ function Trainer:learningRate(epoch)
    return self.opt.LR * math.pow(0.1, decay)
 end
 
+------for LR------
+function Trainer:learningRateCosine(epoch, iter, nBatches)
+   local nEpochs_cur = self.opt.nEpochs
+   local T_total = nEpochs_cur * nBatches
+   local T_cur = ((epoch-1) % nEpochs_cur) * nBatches + iter
+   return 0.5 * self.opt.LR * (1 + torch.cos(math.pi * T_cur / T_total))
+end
+------for LR------
+
 return M.Trainer