diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index 2522102f4e5..7fbefbe686f 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -9,6 +9,7 @@ from .. import constraints from ..engine import Layer from ..engine import InputSpec +from ..legacy import interfaces def _time_distributed_dense(x, w, b=None, dropout=None, @@ -317,6 +318,7 @@ class SimpleRNN(Recurrent): - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) """ + @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', use_bias=True, @@ -552,6 +554,7 @@ class GRU(Recurrent): - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) """ + @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', @@ -840,7 +843,7 @@ class LSTM(Recurrent): - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf) - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) """ - + @interfaces.legacy_recurrent_support def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', diff --git a/keras/legacy/interfaces.py b/keras/legacy/interfaces.py index 8b2e6827302..acfc070739e 100644 --- a/keras/legacy/interfaces.py +++ b/keras/legacy/interfaces.py @@ -5,7 +5,7 @@ def generate_legacy_interface(allowed_positional_args=None, - conversions=None): + conversions=None, preprocessor=None): allowed_positional_args = allowed_positional_args or [] conversions = conversions or [] @@ -13,7 +13,10 @@ def legacy_support(func): @six.wraps(func) def wrapper(*args, **kwargs): layer_name = args[0].__class__.__name__ - converted = [] + if preprocessor: + args, kwargs, converted = preprocessor(args, kwargs) + else: + converted = [] if len(args) > len(allowed_positional_args) + 1: raise TypeError('Layer `' + layer_name + '` can accept only ' + @@ -61,6 +64,7 @@ def raise_duplicate_arg_error(old_arg, new_arg): '`' + old_arg + '` and the Keras 2 keyword argument ' '`' + new_arg + '`. Stick to the latter!') + legacy_dense_support = generate_legacy_interface( allowed_positional_args=['units'], conversions=[('output_dim', 'units'), @@ -89,6 +93,34 @@ def raise_duplicate_arg_error(old_arg, new_arg): allowed_positional_args=['stddev'], conversions=[('sigma', 'stddev')]) + +def lstm_args_preprocessor(args, kwargs): + converted = [] + if 'forget_bias_init' in kwargs: + if kwargs['forget_bias_init'] == 'one': + kwargs.pop('forget_bias_init') + kwargs['unit_forget_bias'] = True + converted.append(('forget_bias_init', 'unit_forget_bias')) + else: + kwargs.pop('forget_bias_init') + warnings.warn('The `forget_bias_init` argument ' + 'has been ignored. Use `unit_forget_bias=True` ' + 'instead to intialize with ones') + return args, kwargs, converted + +legacy_recurrent_support = generate_legacy_interface( + allowed_positional_args=['units'], + conversions=[('output_dim', 'units'), + ('init', 'kernel_initializer'), + ('inner_init', 'recurrent_initializer'), + ('inner_activation', 'recurrent_activation'), + ('W_regularizer', 'kernel_regularizer'), + ('b_regularizer', 'bias_regularizer'), + ('U_regularizer', 'recurrent_regularizer'), + ('dropout_W', 'dropout'), + ('dropout_U', 'recurrent_dropout')], + preprocessor=lstm_args_preprocessor) + legacy_gaussiandropout_support = generate_legacy_interface( allowed_positional_args=['rate'], conversions=[('p', 'rate')]) diff --git a/tests/keras/legacy/interface_test.py b/tests/keras/legacy/interface_test.py index a5b567aa249..40ea492bfdb 100644 --- a/tests/keras/legacy/interface_test.py +++ b/tests/keras/legacy/interface_test.py @@ -63,6 +63,7 @@ def test_avgpooling1d_legacy_interface(): def test_prelu_legacy_interface(): old_layer = keras.layers.PReLU(init='zero', name='p') new_layer = keras.layers.PReLU('zero', name='p') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) @@ -73,6 +74,112 @@ def test_gaussiannoise_legacy_interface(): assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) +@keras_test +def test_lstm_legacy_interface(): + old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d') + new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + old_layer = keras.layers.LSTM(2, init='normal', + inner_init='glorot_uniform', + forget_bias_init='one', + inner_activation='hard_sigmoid', + W_regularizer='l1', + U_regularizer='l1', + b_regularizer='l1', + dropout_W=0.1, + dropout_U=0.1, + name='LSTM') + + new_layer = keras.layers.LSTM(2, kernel_initializer='normal', + recurrent_initializer='glorot_uniform', + unit_forget_bias=True, + recurrent_activation='hard_sigmoid', + kernel_regularizer='l1', + recurrent_regularizer='l1', + bias_regularizer='l1', + dropout=0.1, + recurrent_dropout=0.1, + name='LSTM') + + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + old_layer = keras.layers.LSTM(2, init='normal', + inner_init='glorot_uniform', + forget_bias_init='zero', + inner_activation='hard_sigmoid', + W_regularizer='l1', + U_regularizer='l1', + b_regularizer='l1', + dropout_W=0.1, + dropout_U=0.1, + name='LSTM') + + new_layer = keras.layers.LSTM(2, kernel_initializer='normal', + recurrent_initializer='glorot_uniform', + unit_forget_bias=True, + recurrent_activation='hard_sigmoid', + kernel_regularizer='l1', + recurrent_regularizer='l1', + bias_regularizer='l1', + dropout=0.1, + recurrent_dropout=0.1, + name='LSTM') + + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + +@keras_test +def test_simplernn_legacy_interface(): + old_layer = keras.layers.SimpleRNN(input_shape=[3, 5], output_dim=2, name='d') + new_layer = keras.layers.SimpleRNN(2, input_shape=[3, 5], name='d') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + old_layer = keras.layers.SimpleRNN(2, init='normal', + inner_init='glorot_uniform', + W_regularizer='l1', + U_regularizer='l1', + b_regularizer='l1', + dropout_W=0.1, + dropout_U=0.1, + name='SimpleRNN') + new_layer = keras.layers.SimpleRNN(2, kernel_initializer='normal', + recurrent_initializer='glorot_uniform', + kernel_regularizer='l1', + recurrent_regularizer='l1', + bias_regularizer='l1', + dropout=0.1, + recurrent_dropout=0.1, + name='SimpleRNN') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + +@keras_test +def test_gru_legacy_interface(): + old_layer = keras.layers.GRU(input_shape=[3, 5], output_dim=2, name='d') + new_layer = keras.layers.GRU(2, input_shape=[3, 5], name='d') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + + old_layer = keras.layers.GRU(2, init='normal', + inner_init='glorot_uniform', + inner_activation='hard_sigmoid', + W_regularizer='l1', + U_regularizer='l1', + b_regularizer='l1', + dropout_W=0.1, + dropout_U=0.1, + name='GRU') + new_layer = keras.layers.GRU(2, kernel_initializer='normal', + recurrent_initializer='glorot_uniform', + recurrent_activation='hard_sigmoid', + kernel_regularizer='l1', + recurrent_regularizer='l1', + bias_regularizer='l1', + dropout=0.1, + recurrent_dropout=0.1, + name='GRU') + assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config()) + @keras_test def test_gaussiandropout_legacy_interface(): old_layer = keras.layers.GaussianDropout(p=0.6, name='drop')