Skip to content

Commit

Permalink
Sensible activation for RNNs
Browse files Browse the repository at this point in the history
Have noticed how default GRUs works usually worse than LSTMs? It seems that "tanh" is a more sensible activation choice. Also for GRUs, tanh seems to be the default:
see http://arxiv.org/pdf/1412.3555v1.pdf Section 3.2
  • Loading branch information
EderSantana committed Feb 22, 2016
1 parent f10c430 commit c7f7ffe
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions keras/layers/recurrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ class SimpleRNN(Recurrent):
'''
def __init__(self, output_dim,
init='glorot_uniform', inner_init='orthogonal',
activation='sigmoid',
activation='tanh',
W_regularizer=None, U_regularizer=None, b_regularizer=None,
dropout_W=0., dropout_U=0., **kwargs):
self.output_dim = output_dim
Expand Down Expand Up @@ -333,7 +333,7 @@ class GRU(Recurrent):
'''
def __init__(self, output_dim,
init='glorot_uniform', inner_init='orthogonal',
activation='sigmoid', inner_activation='hard_sigmoid',
activation='tanh', inner_activation='hard_sigmoid',
W_regularizer=None, U_regularizer=None, b_regularizer=None,
dropout_W=0., dropout_U=0., **kwargs):
self.output_dim = output_dim
Expand Down

0 comments on commit c7f7ffe

Please sign in to comment.