Skip to content

Commit

Permalink
Merge pull request karpathy#73 from guillitte/master
Browse files Browse the repository at this point in the history
GRU and plain RNN support
  • Loading branch information
karpathy committed Aug 1, 2015
2 parents ef0373f + e555fa9 commit 5a1793b
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 15 deletions.
23 changes: 17 additions & 6 deletions model/GRU.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ local GRU = {}
Creates one timestep of one GRU
Paper reference: http://arxiv.org/pdf/1412.3555v1.pdf
]]--
function GRU.gru(input_size, rnn_size, n)

function GRU.gru(input_size, rnn_size, n, dropout)
dropout = dropout or 0
-- there are n+1 inputs (hiddens on each layer and x)
local inputs = {}
table.insert(inputs, nn.Identity()()) -- x
Expand All @@ -25,9 +25,15 @@ function GRU.gru(input_size, rnn_size, n)
for L = 1,n do

local prev_h = inputs[L+1]
if L == 1 then x = inputs[1] else x = outputs[L-1] end
if L == 1 then input_size_L = input_size else input_size_L = rnn_size end

-- the input to this layer
if L == 1 then
x = OneHot(input_size)(inputs[1])
input_size_L = input_size
else
x = outputs[(L-1)]
if dropout > 0 then x = nn.Dropout(dropout)(x) end -- apply dropout, if any
input_size_L = rnn_size
end
-- GRU tick
-- forward the update and reset gates
local update_gate = nn.Sigmoid()(new_input_sum(input_size_L, x, prev_h))
Expand All @@ -44,9 +50,14 @@ function GRU.gru(input_size, rnn_size, n)

table.insert(outputs, next_h)
end
-- set up the decoder
local top_h = outputs[#outputs]
if dropout > 0 then top_h = nn.Dropout(dropout)(top_h) end
local proj = nn.Linear(rnn_size, input_size)(top_h)
local logsoft = nn.LogSoftMax()(proj)
table.insert(outputs, logsoft)

return nn.gModule(inputs, outputs)
end

return GRU

19 changes: 16 additions & 3 deletions model/RNN.lua
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
local RNN = {}

function RNN.rnn(input_size, rnn_size, n)
function RNN.rnn(input_size, rnn_size, n, dropout)

-- there are n+1 inputs (hiddens on each layer and x)
local inputs = {}
table.insert(inputs, nn.Identity()()) -- x
for L = 1,n do
table.insert(inputs, nn.Identity()()) -- prev_h[L]

end

local x, input_size_L
local outputs = {}
for L = 1,n do

local prev_h = inputs[L+1]
if L == 1 then x = inputs[1] else x = outputs[L-1] end
if L == 1 then input_size_L = input_size else input_size_L = rnn_size end
if L == 1 then
x = OneHot(input_size)(inputs[1])
input_size_L = input_size
else
x = outputs[(L-1)]
if dropout > 0 then x = nn.Dropout(dropout)(x) end -- apply dropout, if any
input_size_L = rnn_size
end

-- RNN tick
local i2h = nn.Linear(input_size_L, rnn_size)(x)
Expand All @@ -24,6 +31,12 @@ function RNN.rnn(input_size, rnn_size, n)

table.insert(outputs, next_h)
end
-- set up the decoder
local top_h = outputs[#outputs]
if dropout > 0 then top_h = nn.Dropout(dropout)(top_h) end
local proj = nn.Linear(rnn_size, input_size)(top_h)
local logsoft = nn.LogSoftMax()(proj)
table.insert(outputs, logsoft)

return nn.gModule(inputs, outputs)
end
Expand Down
6 changes: 4 additions & 2 deletions sample.lua
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ local ivocab = {}
for c,i in pairs(vocab) do ivocab[i] = c end

-- initialize the rnn state to all zeros
gprint('creating an LSTM...')
gprint('creating an ' .. checkpoint.opt.model .. '...')
local current_state
local num_layers = checkpoint.opt.num_layers
current_state = {}
Expand All @@ -101,7 +101,9 @@ for L = 1,checkpoint.opt.num_layers do
if opt.gpuid >= 0 and opt.opencl == 0 then h_init = h_init:cuda() end
if opt.gpuid >= 0 and opt.opencl == 1 then h_init = h_init:cl() end
table.insert(current_state, h_init:clone())
table.insert(current_state, h_init:clone())
if checkpoint.opt.model == 'lstm' then
table.insert(current_state, h_init:clone())
end
end
state_size = #current_state

Expand Down
18 changes: 14 additions & 4 deletions train.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ require 'util.misc'
local CharSplitLMMinibatchLoader = require 'util.CharSplitLMMinibatchLoader'
local model_utils = require 'util.model_utils'
local LSTM = require 'model.LSTM'
local GRU = require 'model.GRU'
local RNN = require 'model.RNN'

cmd = torch.CmdLine()
cmd:text()
Expand All @@ -35,7 +37,7 @@ cmd:option('-data_dir','data/tinyshakespeare','data directory. Should contain th
-- model params
cmd:option('-rnn_size', 128, 'size of LSTM internal state')
cmd:option('-num_layers', 2, 'number of layers in the LSTM')
cmd:option('-model', 'lstm', 'for now only lstm is supported. keep fixed')
cmd:option('-model', 'lstm', 'lstm,gru or rnn')
-- optimization
cmd:option('-learning_rate',2e-3,'learning rate')
cmd:option('-learning_rate_decay',0.97,'learning rate decay')
Expand Down Expand Up @@ -132,9 +134,15 @@ if string.len(opt.init_from) > 0 then
opt.num_layers = checkpoint.opt.num_layers
do_random_init = false
else
print('creating an LSTM with ' .. opt.num_layers .. ' layers')
print('creating an ' .. opt.model .. ' with ' .. opt.num_layers .. ' layers')
protos = {}
protos.rnn = LSTM.lstm(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout)
if opt.model == 'lstm' then
protos.rnn = LSTM.lstm(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout)
elseif opt.model == 'gru' then
protos.rnn = GRU.gru(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout)
elseif opt.model == 'rnn' then
protos.rnn = RNN.rnn(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout)
end
protos.criterion = nn.ClassNLLCriterion()
end

Expand All @@ -145,7 +153,9 @@ for L=1,opt.num_layers do
if opt.gpuid >=0 and opt.opencl == 0 then h_init = h_init:cuda() end
if opt.gpuid >=0 and opt.opencl == 1 then h_init = h_init:cl() end
table.insert(init_state, h_init:clone())
table.insert(init_state, h_init:clone())
if opt.model == 'lstm' then
table.insert(init_state, h_init:clone())
end
end

-- ship the model to the GPU if desired
Expand Down

0 comments on commit 5a1793b

Please sign in to comment.