diff --git a/keras/layers/embeddings.py b/keras/layers/embeddings.py index c61a2d0dd2c1..4c543d3976f6 100644 --- a/keras/layers/embeddings.py +++ b/keras/layers/embeddings.py @@ -105,7 +105,7 @@ def get_output(self, train=False): X = self.get_input(train) retain_p = 1. - self.p if train and self.p > 0: - B = K.random_binomial((self.input_dim), p=retain_p) + B = K.random_binomial((self.input_dim,), p=retain_p) else: B = K.ones((self.input_dim)) * retain_p out = K.gather(self.W * K.expand_dims(B), X) # we zero-out rows of W at random diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index d289a8116454..f36de0fecce8 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -78,6 +78,10 @@ class Recurrent(MaskedLayer): To reset the states of your model, call `.reset_states()` on either a specific layer, or on your entire model. + + # Note on using dropout with TensorFlow + When using the TensorFlow backend, specify a fixed batch size for your model + following the notes on statefulness RNNs. ''' input_ndim = 3 @@ -252,8 +256,7 @@ def reset_states(self): input_shape = self.input_shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + - 'input_shape must be provided ' + - '(including batch size).') + 'input_shape must be provided (including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.output_dim))) @@ -272,8 +275,11 @@ def step(self, x, states): def get_constants(self, X, train=False): nb_samples = K.shape(X)[0] - if K._BACKEND == 'tensorflow': - nb_samples = int(nb_samples) + if K._BACKEND == 'tensorflow' and train and self.p_W > 0 and self.p_U > 0: + if not self.input_shape[0]: + raise Exception('For RNN dropout in tensorflow, a complete ' + + 'input_shape must be provided (including batch size).') + nb_samples = self.input_shape[0] retain_p_W = 1. - self.p_W retain_p_U = 1. - self.p_U if train and self.p_W > 0 and self.p_U > 0: @@ -391,8 +397,7 @@ def reset_states(self): input_shape = self.input_shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + - 'input_shape must be provided ' + - '(including batch size).') + 'input_shape must be provided (including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.output_dim))) @@ -418,8 +423,11 @@ def step(self, x, states): def get_constants(self, X, train=False): nb_samples = K.shape(X)[0] - if K._BACKEND == 'tensorflow': - nb_samples = int(nb_samples) + if K._BACKEND == 'tensorflow' and train and self.p_W > 0 and self.p_U > 0: + if not self.input_shape[0]: + raise Exception('For RNN dropout in tensorflow, a complete ' + + 'input_shape must be provided (including batch size).') + nb_samples = self.input_shape[0] retain_p_W = 1. - self.p_W retain_p_U = 1. - self.p_U if train and self.p_W > 0 and self.p_U > 0: @@ -553,8 +561,7 @@ def reset_states(self): input_shape = self.input_shape if not input_shape[0]: raise Exception('If a RNN is stateful, a complete ' + - 'input_shape must be provided ' + - '(including batch size).') + 'input_shape must be provided (including batch size).') if hasattr(self, 'states'): K.set_value(self.states[0], np.zeros((input_shape[0], self.output_dim))) @@ -585,8 +592,11 @@ def step(self, x, states): def get_constants(self, X, train=False): nb_samples = K.shape(X)[0] - if K._BACKEND == 'tensorflow': - nb_samples = int(nb_samples) + if K._BACKEND == 'tensorflow' and train and self.p_W > 0 and self.p_U > 0: + if not self.input_shape[0]: + raise Exception('For RNN dropout in tensorflow, a complete ' + + 'input_shape must be provided (including batch size).') + nb_samples = self.input_shape[0] retain_p_W = 1. - self.p_W retain_p_U = 1. - self.p_U if train and self.p_W > 0 and self.p_U > 0: diff --git a/tests/keras/layers/test_recurrent.py b/tests/keras/layers/test_recurrent.py index 255ff2af203f..b95cd0e3b916 100644 --- a/tests/keras/layers/test_recurrent.py +++ b/tests/keras/layers/test_recurrent.py @@ -18,23 +18,39 @@ def _runner(layer_class): All the recurrent layers share the same interface, so we can run through them with a single function. """ - for p in [0., 0.5]: - for ret_seq in [True, False]: - layer = layer_class(output_dim, return_sequences=ret_seq, - weights=None, input_shape=(timesteps, embedding_dim), - p_W=p, p_U=p) - layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) - layer.get_config() - - for train in [True, False]: - out = K.eval(layer.get_output(train)) - # Make sure the output has the desired shape - if ret_seq: - assert(out.shape == (nb_samples, timesteps, output_dim)) - else: - assert(out.shape == (nb_samples, output_dim)) - - mask = layer.get_output_mask(train) + for ret_seq in [True, False]: + layer = layer_class(output_dim, return_sequences=ret_seq, + weights=None, input_shape=(timesteps, embedding_dim)) + layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) + layer.get_config() + + for train in [True, False]: + out = K.eval(layer.get_output(train)) + # Make sure the output has the desired shape + if ret_seq: + assert(out.shape == (nb_samples, timesteps, output_dim)) + else: + assert(out.shape == (nb_samples, output_dim)) + + mask = layer.get_output_mask(train) + + # check dropout + for ret_seq in [True, False]: + layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, + batch_input_shape=(nb_samples, timesteps, embedding_dim), + p_W=0.5, p_U=0.5) + layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) + layer.get_config() + + for train in [True, False]: + out = K.eval(layer.get_output(train)) + # Make sure the output has the desired shape + if ret_seq: + assert(out.shape == (nb_samples, timesteps, output_dim)) + else: + assert(out.shape == (nb_samples, output_dim)) + + mask = layer.get_output_mask(train) # check statefulness model = Sequential()