diff --git a/.travis.yml b/.travis.yml index 6e8a227172a..57d0b033652 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,9 +60,9 @@ install: # install cntk - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then - pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp27-cp27mu-linux_x86_64.whl; + pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp27-cp27mu-linux_x86_64.whl; elif [[ "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then - pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp36-cp36m-linux_x86_64.whl; + pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.3.1-cp36-cp36m-linux_x86_64.whl; fi # install pydot for visualization tests diff --git a/keras/backend/cntk_backend.py b/keras/backend/cntk_backend.py index 4cf01af0a43..5b575de8acc 100644 --- a/keras/backend/cntk_backend.py +++ b/keras/backend/cntk_backend.py @@ -2330,6 +2330,9 @@ def _get_cntk_version(): version = C.__version__ if version.endswith('+'): version = version[:-1] + # for hot fix, ignore all the . except the first one. + if len(version) > 2 and version[1] == '.': + version = version[:2] + version[2:].replace('.', '') try: return float(version) except: diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index eb6ad5cd5f5..ba9eb5bc279 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -864,13 +864,14 @@ def build(self, input_shape): def call(self, inputs, states, training=None): prev_output = states[0] if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask(K.shape(inputs), - self.dropout, - training=training) + self._dropout_mask = _generate_dropout_mask( + _generate_dropout_ones(inputs, K.shape(inputs)[-1]), + self.dropout, + training=training) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( - [K.shape(inputs)[0], self.units], + _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training) @@ -986,16 +987,6 @@ def __init__(self, units, warnings.warn('The `implementation` argument ' 'in `SimpleRNN` has been deprecated. ' 'Please remove it from your layer call.') - if K.backend() == 'cntk': - if not kwargs.get('unroll') and (dropout > 0 or recurrent_dropout > 0): - warnings.warn( - 'RNN dropout is not supported with the CNTK backend ' - 'when using dynamic RNNs (i.e. non-unrolled). ' - 'You can either set `unroll=True`, ' - 'set `dropout` and `recurrent_dropout` to 0, ' - 'or use the TensorFlow backend.') - dropout = 0. - recurrent_dropout = 0. if K.backend() == 'theano': warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' @@ -1257,14 +1248,15 @@ def call(self, inputs, states, training=None): h_tm1 = states[0] # previous memory if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask(K.shape(inputs), - self.dropout, - training=training, - count=3) + self._dropout_mask = _generate_dropout_mask( + _generate_dropout_ones(inputs, K.shape(inputs)[-1]), + self.dropout, + training=training, + count=3) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( - [K.shape(inputs)[0], self.units], + _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=3) @@ -1440,16 +1432,6 @@ def __init__(self, units, warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') - if K.backend() == 'cntk': - if not kwargs.get('unroll') and (dropout > 0 or recurrent_dropout > 0): - warnings.warn( - 'RNN dropout is not supported with the CNTK backend ' - 'when using dynamic RNNs (i.e. non-unrolled). ' - 'You can either set `unroll=True`, ' - 'set `dropout` and `recurrent_dropout` to 0, ' - 'or use a different backend.') - dropout = 0. - recurrent_dropout = 0. if K.backend() == 'theano': warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' @@ -1739,14 +1721,15 @@ def bias_initializer(shape, *args, **kwargs): def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask(K.shape(inputs), - self.dropout, - training=training, - count=4) + self._dropout_mask = _generate_dropout_mask( + _generate_dropout_ones(inputs, K.shape(inputs)[-1]), + self.dropout, + training=training, + count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( - [K.shape(inputs)[0], self.units], + _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training, count=4) @@ -1935,16 +1918,6 @@ def __init__(self, units, warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') - if K.backend() == 'cntk': - if not kwargs.get('unroll') and (dropout > 0 or recurrent_dropout > 0): - warnings.warn( - 'RNN dropout is not supported with the CNTK backend ' - 'when using dynamic RNNs (i.e. non-unrolled). ' - 'You can either set `unroll=True`, ' - 'set `dropout` and `recurrent_dropout` to 0, ' - 'or use a different backend.') - dropout = 0. - recurrent_dropout = 0. if K.backend() == 'theano': warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' @@ -2084,9 +2057,18 @@ def from_config(cls, config): return cls(**config) -def _generate_dropout_mask(shape, rate, training=None, count=1): - ones = K.ones(shape) +def _generate_dropout_ones(inputs, dims): + # Currently cntk can't perform `ones` with dynamic batch axis + # So use `ones_like` instead. it will have protential perf issue + # will update it once cntk support generate ones with batch axis. + if K.backend() == 'cntk': + ones = K.ones_like(K.reshape(inputs[:, 0], (-1, 1))) + return K.tile(ones, (1, dims)) + else: + return K.ones((K.shape(inputs)[0], dims)) + +def _generate_dropout_mask(ones, rate, training=None, count=1): def dropped_inputs(): return K.dropout(ones, rate) diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py index 0eb41c78f15..31335963b3a 100644 --- a/tests/keras/layers/recurrent_test.py +++ b/tests/keras/layers/recurrent_test.py @@ -70,7 +70,7 @@ def test_stateful_invalid_use(layer_class): @rnn_test -@pytest.mark.skipif((K.backend() in ['cntk', 'theano']), +@pytest.mark.skipif((K.backend() in ['theano']), reason='Not supported.') def test_dropout(layer_class): for unroll in [True, False]: