Legacy API support of LSTM, GRU, SimpleRNN (keras-team#5688)

* Legacy API interface for SimpleRNN, GRU, LSTM * Test for recurrent layer legacy interfaces * Fixed import * Fixed issues with test and recurrent layers * Preprocessor arguement for legacy generator + LSTM kwarg conversion fix * Warning message fix
yumios · Mar 10, 2017 · 7e6ccb8 · 7e6ccb8
1 parent 152d896
commit 7e6ccb8
Show file tree

Hide file tree

Showing 3 changed files with 145 additions and 3 deletions.
diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
@@ -9,6 +9,7 @@
 from .. import constraints
 from ..engine import Layer
 from ..engine import InputSpec
+from ..legacy import interfaces
 
 
 def _time_distributed_dense(x, w, b=None, dropout=None,
@@ -317,6 +318,7 @@ class SimpleRNN(Recurrent):
         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
     """
 
+    @interfaces.legacy_recurrent_support
     def __init__(self, units,
                  activation='tanh',
                  use_bias=True,
@@ -552,6 +554,7 @@ class GRU(Recurrent):
         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
     """
 
+    @interfaces.legacy_recurrent_support
     def __init__(self, units,
                  activation='tanh',
                  recurrent_activation='hard_sigmoid',
@@ -840,7 +843,7 @@ class LSTM(Recurrent):
         - [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
         - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
     """
-
+    @interfaces.legacy_recurrent_support
     def __init__(self, units,
                  activation='tanh',
                  recurrent_activation='hard_sigmoid',

diff --git a/keras/legacy/interfaces.py b/keras/legacy/interfaces.py
@@ -5,15 +5,18 @@
 
 
 def generate_legacy_interface(allowed_positional_args=None,
-                              conversions=None):
+                              conversions=None, preprocessor=None):
     allowed_positional_args = allowed_positional_args or []
     conversions = conversions or []
 
     def legacy_support(func):
         @six.wraps(func)
         def wrapper(*args, **kwargs):
             layer_name = args[0].__class__.__name__
-            converted = []
+            if preprocessor:
+                args, kwargs, converted = preprocessor(args, kwargs)
+            else:
+                converted = []
             if len(args) > len(allowed_positional_args) + 1:
                 raise TypeError('Layer `' + layer_name +
                                 '` can accept only ' +
@@ -61,6 +64,7 @@ def raise_duplicate_arg_error(old_arg, new_arg):
                     '`' + old_arg + '` and the Keras 2 keyword argument '
                     '`' + new_arg + '`. Stick to the latter!')
 
+
 legacy_dense_support = generate_legacy_interface(
     allowed_positional_args=['units'],
     conversions=[('output_dim', 'units'),
@@ -89,6 +93,34 @@ def raise_duplicate_arg_error(old_arg, new_arg):
     allowed_positional_args=['stddev'],
     conversions=[('sigma', 'stddev')])
 
+
+def lstm_args_preprocessor(args, kwargs):
+    converted = []
+    if 'forget_bias_init' in kwargs:
+        if kwargs['forget_bias_init'] == 'one':
+            kwargs.pop('forget_bias_init')
+            kwargs['unit_forget_bias'] = True
+            converted.append(('forget_bias_init', 'unit_forget_bias'))
+        else:
+            kwargs.pop('forget_bias_init')
+            warnings.warn('The `forget_bias_init` argument '
+                          'has been ignored. Use `unit_forget_bias=True` '
+                          'instead to intialize with ones')
+    return args, kwargs, converted
+
+legacy_recurrent_support = generate_legacy_interface(
+    allowed_positional_args=['units'],
+    conversions=[('output_dim', 'units'),
+                 ('init', 'kernel_initializer'),
+                 ('inner_init', 'recurrent_initializer'),
+                 ('inner_activation', 'recurrent_activation'),
+                 ('W_regularizer', 'kernel_regularizer'),
+                 ('b_regularizer', 'bias_regularizer'),
+                 ('U_regularizer', 'recurrent_regularizer'),
+                 ('dropout_W', 'dropout'),
+                 ('dropout_U', 'recurrent_dropout')],
+    preprocessor=lstm_args_preprocessor)
+
 legacy_gaussiandropout_support = generate_legacy_interface(
     allowed_positional_args=['rate'],
     conversions=[('p', 'rate')])
diff --git a/tests/keras/legacy/interface_test.py b/tests/keras/legacy/interface_test.py
@@ -63,6 +63,7 @@ def test_avgpooling1d_legacy_interface():
 def test_prelu_legacy_interface():
     old_layer = keras.layers.PReLU(init='zero', name='p')
     new_layer = keras.layers.PReLU('zero', name='p')
+
     assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
 
 
@@ -73,6 +74,112 @@ def test_gaussiannoise_legacy_interface():
     assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
 
 
+@keras_test
+def test_lstm_legacy_interface():
+    old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d')
+    new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d')
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+    old_layer = keras.layers.LSTM(2, init='normal',
+                                  inner_init='glorot_uniform',
+                                  forget_bias_init='one',
+                                  inner_activation='hard_sigmoid',
+                                  W_regularizer='l1',
+                                  U_regularizer='l1',
+                                  b_regularizer='l1',
+                                  dropout_W=0.1,
+                                  dropout_U=0.1,
+                                  name='LSTM')
+
+    new_layer = keras.layers.LSTM(2, kernel_initializer='normal',
+                                  recurrent_initializer='glorot_uniform',
+                                  unit_forget_bias=True,
+                                  recurrent_activation='hard_sigmoid',
+                                  kernel_regularizer='l1',
+                                  recurrent_regularizer='l1',
+                                  bias_regularizer='l1',
+                                  dropout=0.1,
+                                  recurrent_dropout=0.1,
+                                  name='LSTM')
+
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+    old_layer = keras.layers.LSTM(2, init='normal',
+                                  inner_init='glorot_uniform',
+                                  forget_bias_init='zero',
+                                  inner_activation='hard_sigmoid',
+                                  W_regularizer='l1',
+                                  U_regularizer='l1',
+                                  b_regularizer='l1',
+                                  dropout_W=0.1,
+                                  dropout_U=0.1,
+                                  name='LSTM')
+
+    new_layer = keras.layers.LSTM(2, kernel_initializer='normal',
+                                  recurrent_initializer='glorot_uniform',
+                                  unit_forget_bias=True,
+                                  recurrent_activation='hard_sigmoid',
+                                  kernel_regularizer='l1',
+                                  recurrent_regularizer='l1',
+                                  bias_regularizer='l1',
+                                  dropout=0.1,
+                                  recurrent_dropout=0.1,
+                                  name='LSTM')
+
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+
+@keras_test
+def test_simplernn_legacy_interface():
+    old_layer = keras.layers.SimpleRNN(input_shape=[3, 5], output_dim=2, name='d')
+    new_layer = keras.layers.SimpleRNN(2, input_shape=[3, 5], name='d')
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+    old_layer = keras.layers.SimpleRNN(2, init='normal',
+                                       inner_init='glorot_uniform',
+                                       W_regularizer='l1',
+                                       U_regularizer='l1',
+                                       b_regularizer='l1',
+                                       dropout_W=0.1,
+                                       dropout_U=0.1,
+                                       name='SimpleRNN')
+    new_layer = keras.layers.SimpleRNN(2, kernel_initializer='normal',
+                                       recurrent_initializer='glorot_uniform',
+                                       kernel_regularizer='l1',
+                                       recurrent_regularizer='l1',
+                                       bias_regularizer='l1',
+                                       dropout=0.1,
+                                       recurrent_dropout=0.1,
+                                       name='SimpleRNN')
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+
+@keras_test
+def test_gru_legacy_interface():
+    old_layer = keras.layers.GRU(input_shape=[3, 5], output_dim=2, name='d')
+    new_layer = keras.layers.GRU(2, input_shape=[3, 5], name='d')
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
+    old_layer = keras.layers.GRU(2, init='normal',
+                                 inner_init='glorot_uniform',
+                                 inner_activation='hard_sigmoid',
+                                 W_regularizer='l1',
+                                 U_regularizer='l1',
+                                 b_regularizer='l1',
+                                 dropout_W=0.1,
+                                 dropout_U=0.1,
+                                 name='GRU')
+    new_layer = keras.layers.GRU(2, kernel_initializer='normal',
+                                 recurrent_initializer='glorot_uniform',
+                                 recurrent_activation='hard_sigmoid',
+                                 kernel_regularizer='l1',
+                                 recurrent_regularizer='l1',
+                                 bias_regularizer='l1',
+                                 dropout=0.1,
+                                 recurrent_dropout=0.1,
+                                 name='GRU')
+    assert json.dumps(old_layer.get_config()) == json.dumps(new_layer.get_config())
+
 @keras_test
 def test_gaussiandropout_legacy_interface():
     old_layer = keras.layers.GaussianDropout(p=0.6, name='drop')