Skip to content

Commit

Permalink
Merge pull request #2 from allenai/master
Browse files Browse the repository at this point in the history
Merge from upstream
  • Loading branch information
alvinhom authored Apr 27, 2017
2 parents c78d397 + 00d3630 commit 72b634b
Show file tree
Hide file tree
Showing 13 changed files with 155 additions and 69 deletions.
4 changes: 3 additions & 1 deletion deep_qa/layers/attention/masked_softmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,14 @@ def call(self, inputs, mask=None):
inputs = K.squeeze(inputs, axis=-1)
input_shape = input_shape[:-1]
if len(input_shape) > 2:
original_inputs = inputs
inputs = last_dim_flatten(inputs)
if mask is not None:
mask = last_dim_flatten(mask)
# Now we have both inputs and mask with shape (?, num_options), and can do a softmax.
softmax_result = masked_softmax(inputs, mask)
if len(input_shape) > 2:
input_shape = (-1,) + input_shape[1:]
original_shape = K.shape(original_inputs)
input_shape = K.concatenate([[-1], original_shape[1:]], 0)
softmax_result = K.reshape(softmax_result, input_shape)
return softmax_result
23 changes: 6 additions & 17 deletions deep_qa/layers/attention/matrix_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,13 @@ def compute_output_shape(self, input_shape):

@overrides
def call(self, inputs, mask=None):
"""
NOTE: This does not work if ``num_rows_1`` or ``num_rows_2`` is ``None``! I tried to get
it to work, but ``K.dot()`` breaks.
"""
matrix_1, matrix_2 = inputs
matrix_1_shape = K.int_shape(matrix_1)
matrix_2_shape = K.int_shape(matrix_2)
num_rows_1 = matrix_1_shape[1]
num_rows_2 = matrix_2_shape[1]
tiled_matrix_1 = K.repeat_elements(K.expand_dims(matrix_1, axis=2), num_rows_2, axis=2)
tiled_matrix_2 = K.repeat_elements(K.expand_dims(matrix_2, axis=1), num_rows_1, axis=1)

# We need to be able to access K.int_shape() in compute_similarity() below, but in theano,
# calling a backend function makes it so you can't use K.int_shape() anymore. Setting
# tensor._keras_shape here fixes that.
# pylint: disable=protected-access
tiled_matrix_1._keras_shape = matrix_1_shape[:2] + (num_rows_2,) + matrix_1_shape[2:]
tiled_matrix_2._keras_shape = matrix_2_shape[:1] + (num_rows_1,) + matrix_2_shape[1:]
num_rows_1 = K.shape(matrix_1)[1]
num_rows_2 = K.shape(matrix_2)[1]
tile_dims_1 = K.concatenate([[1, 1], [num_rows_2], [1]], 0)
tile_dims_2 = K.concatenate([[1], [num_rows_1], [1, 1]], 0)
tiled_matrix_1 = K.tile(K.expand_dims(matrix_1, axis=2), tile_dims_1)
tiled_matrix_2 = K.tile(K.expand_dims(matrix_2, axis=1), tile_dims_2)
return self.similarity_function.compute_similarity(tiled_matrix_1, tiled_matrix_2)

@overrides
Expand Down
33 changes: 7 additions & 26 deletions deep_qa/layers/attention/weighted_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,41 +71,22 @@ def compute_output_shape(self, input_shapes):

@overrides
def call(self, inputs, mask=None):
# pylint: disable=redefined-variable-type
matrix, attention_vector = inputs
matrix_shape = K.int_shape(matrix)
matrix = self._expand_matrix_if_necessary(matrix, matrix_shape[:-1], attention_vector)
num_attention_dims = K.ndim(attention_vector)
num_matrix_dims = K.ndim(matrix) - 1
for _ in range(num_attention_dims - num_matrix_dims):
matrix = K.expand_dims(matrix, axis=1)
if mask is None:
matrix_mask = None
else:
matrix_mask = mask[0]
if self.use_masking and matrix_mask is not None:
matrix_mask = self._expand_matrix_if_necessary(matrix_mask, matrix_shape[:-1], attention_vector)
# Doing a multiply here instead of a `switch` to avoid allocating another large tensor.
for _ in range(num_attention_dims - num_matrix_dims):
matrix_mask = K.expand_dims(matrix_mask, axis=1)
matrix = K.cast(K.expand_dims(matrix_mask), 'float32') * matrix
return K.sum(K.expand_dims(attention_vector, axis=-1) * matrix, -2)

@staticmethod
def _expand_matrix_if_necessary(matrix, matrix_shape, attention_vector):
"""
This function gets the tiles the matrix to have the same shape as the attention vector,
ignoring the embedding dimension. We take the shape as input (where the shape already has
the embedding dimension removed) so we can call this on the mask as well as the input
matrix.
"""
attention_shape = K.int_shape(attention_vector)
if matrix_shape != attention_shape:
# We'll take care of the batch size first. After this, the matrix_shape should match
# the end of the attention_shape exactly.
assert matrix_shape[0] == attention_shape[0], "somehow batch sizes don't match"
matrix_shape = matrix_shape[1:]
attention_shape = attention_shape[1:]
assert attention_shape[-len(matrix_shape):] == matrix_shape, ("matrix_shape must be "
"subset of attention_shape")
for i in range(len(attention_shape) - len(matrix_shape)):
matrix = K.expand_dims(matrix, axis=i+1) # +1 to account for batch_size
matrix = K.repeat_elements(matrix, attention_shape[i], axis=i+1)
return matrix

@overrides
def get_config(self):
base_config = super(WeightedSum, self).get_config()
Expand Down
1 change: 1 addition & 0 deletions deep_qa/layers/backend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
from .permute import Permute
from .multiply import Multiply
from .repeat import Repeat
from .repeat_like import RepeatLike
from .squeeze import Squeeze
from .add_mask import AddMask
21 changes: 16 additions & 5 deletions deep_qa/layers/backend/repeat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,25 @@

class Repeat(MaskedLayer):
"""
This `Layer` calls `K.repeat_elements` on both the input and the mask, after calling
`K.expand_dims`.
This ``Layer`` calls ``K.repeat_elements`` on both the input and the mask, after calling
``K.expand_dims``.
If the mask is not `None`, we must be able to call `K.expand_dims` using the same axis
If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis
parameter as we do for the input.
Input:
- A tensor of arbitrary shape.
Output:
- The input tensor repeated along one of the dimensions.
Parameters
----------
axis: int
We will add a dimension to the input tensor at this axis.
repetitions: int
The new dimension will have this size to it, with each slice being identical to the
original input tensor.
"""
def __init__(self, axis: int, repetitions: int, **kwargs):
self.axis = axis
Expand All @@ -28,15 +36,18 @@ def compute_mask(self, inputs, mask=None):
# pylint: disable=unused-argument
if mask is None:
return None
return K.repeat_elements(K.expand_dims(mask, self.axis), self.repetitions, self.axis)
return self.__repeat_tensor(mask)

@overrides
def compute_output_shape(self, input_shape):
return input_shape[:self.axis] + (self.repetitions,) + input_shape[self.axis:]

@overrides
def call(self, inputs, mask=None):
return K.repeat_elements(K.expand_dims(inputs, self.axis), self.repetitions, self.axis)
return self.__repeat_tensor(inputs)

def __repeat_tensor(self, tensor):
return K.repeat_elements(K.expand_dims(tensor, self.axis), self.repetitions, self.axis)

@overrides
def get_config(self):
Expand Down
63 changes: 63 additions & 0 deletions deep_qa/layers/backend/repeat_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from keras import backend as K
from overrides import overrides

from ..masked_layer import MaskedLayer


class RepeatLike(MaskedLayer):
"""
This ``Layer`` is like :class:`~.repeat.Repeat`, but gets the number of repetitions to use from
a second input tensor. This allows doing a number of repetitions that is unknown at graph
compilation time, and is necessary when the ``repetitions`` argument to ``Repeat`` would be
``None``.
If the mask is not ``None``, we must be able to call ``K.expand_dims`` using the same axis
parameter as we do for the input.
Input:
- A tensor of arbitrary shape, which we will expand and tile.
- A second tensor whose shape along one dimension we will copy
Output:
- The input tensor repeated along one of the dimensions.
Parameters
----------
axis: int
We will add a dimension to the input tensor at this axis.
copy_from_axis: int
We will copy the dimension from the second tensor at this axis.
"""
def __init__(self, axis: int, copy_from_axis: int, **kwargs):
self.axis = axis
self.copy_from_axis = copy_from_axis
super(RepeatLike, self).__init__(**kwargs)

@overrides
def compute_mask(self, inputs, mask=None):
# pylint: disable=unused-argument
if mask is None or mask[0] is None:
return None
return self.__repeat_tensor(mask[0], inputs[1])

@overrides
def compute_output_shape(self, input_shape):
return input_shape[0][:self.axis] + (input_shape[1][self.copy_from_axis],) + input_shape[0][self.axis:]

@overrides
def call(self, inputs, mask=None):
return self.__repeat_tensor(inputs[0], inputs[1])

def __repeat_tensor(self, to_repeat, to_copy):
expanded = K.expand_dims(to_repeat, self.axis)
ones = [1] * K.ndim(expanded)
num_repetitions = K.shape(to_copy)[self.copy_from_axis]
tile_shape = K.concatenate([ones[:self.axis], [num_repetitions], ones[self.axis+1:]], 0)
return K.tile(expanded, tile_shape)

@overrides
def get_config(self):
base_config = super(RepeatLike, self).get_config()
config = {'axis': self.axis, 'copy_from_axis': self.copy_from_axis}
config.update(base_config)
return config
5 changes: 4 additions & 1 deletion deep_qa/layers/complex_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ def _get_combination(self, combination: str, tensors: List['Tensor']):
first_tensor = self._get_combination(combination[0], tensors)
second_tensor = self._get_combination(combination[2], tensors)
if K.int_shape(first_tensor) != K.int_shape(second_tensor):
raise ConfigurationError("Cannot combine two tensors with different shapes!")
shapes_message = "Shapes were: {} and {}".format(K.int_shape(first_tensor),
K.int_shape(second_tensor))
raise ConfigurationError("Cannot combine two tensors with different shapes! " +
shapes_message)
operation = combination[1]
if operation == '*':
return first_tensor * second_tensor
Expand Down
26 changes: 14 additions & 12 deletions deep_qa/models/reading_comprehension/bidirectional_attention.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Dict
from typing import Dict, List

from keras.layers import Dense, Input, Concatenate, TimeDistributed
from overrides import overrides

from ...data.instances.reading_comprehension import CharacterSpanInstance
from ...layers import ComplexConcat, Highway
from ...layers.attention import MatrixAttention, MaskedSoftmax, WeightedSum
from ...layers.backend import Max, Repeat
from ...layers.backend import Max, RepeatLike, Repeat
from ...training import TextTrainer
from ...training.models import DeepQaModel
from ...common.params import Params
Expand Down Expand Up @@ -139,9 +139,9 @@ def _build_model(self):

# Then he repeats this question/passage vector for every word in the passage, and uses it
# as an additional input to the hidden layers above.
repeat_layer = Repeat(axis=1, repetitions=self.num_passage_words)
repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
# Shape: (batch_size, num_passage_words, embedding_dim * 2)
tiled_question_passage_vector = repeat_layer(question_passage_vector)
tiled_question_passage_vector = repeat_layer([question_passage_vector, encoded_passage])

# Shape: (batch_size, num_passage_words, embedding_dim * 8)
complex_concat_layer = ComplexConcat(combination='1,2,1*2,1*3', name='final_merged_passage')
Expand Down Expand Up @@ -176,9 +176,10 @@ def _build_model(self):
# his figure makes it clear this is what he intended; he just wrote the equations wrong).
# Shape: (batch_size, num_passage_words, embedding_dim * 2)
sum_layer = WeightedSum(name="passage_weighted_by_predicted_span", use_masking=False)
repeat_layer = Repeat(axis=1, repetitions=self.num_passage_words)
passage_weighted_by_predicted_span = repeat_layer(sum_layer([modeled_passage,
span_begin_probabilities]))
repeat_layer = RepeatLike(axis=1, copy_from_axis=1)
passage_weighted_by_predicted_span = repeat_layer([sum_layer([modeled_passage,
span_begin_probabilities]),
encoded_passage])
span_end_representation = ComplexConcat(combination="1,2,3,2*3")([final_merged_passage,
modeled_passage,
passage_weighted_by_predicted_span])
Expand All @@ -204,13 +205,10 @@ def _get_padding_lengths(self) -> Dict[str, int]:

@overrides
def _set_padding_lengths(self, padding_lengths: Dict[str, int]):
# Adding this because we're bypassing num_sentence_words in our model, but TextTrainer
# expects it.
padding_lengths['num_sentence_words'] = None
super(BidirectionalAttentionFlow, self)._set_padding_lengths(padding_lengths)
if self.num_passage_words is None:
if not self.use_dynamic_padding and self.num_passage_words is None:
self.num_passage_words = padding_lengths['num_passage_words']
if self.num_question_words is None:
if not self.use_dynamic_padding and self.num_question_words is None:
self.num_question_words = padding_lengths['num_question_words']

@overrides
Expand All @@ -224,6 +222,9 @@ def _set_padding_lengths_from_model(self):
# self.num_sentence_words.
self._set_text_lengths_from_model_input(self.model.get_input_shape_at(0)[1][1:])

def _get_instance_sorting_keys(self) -> List[str]: # pylint: disable=no-self-use
return ['num_passage_words', 'num_question_words']

@classmethod
def _get_custom_objects(cls):
custom_objects = super(BidirectionalAttentionFlow, cls)._get_custom_objects()
Expand All @@ -232,6 +233,7 @@ def _get_custom_objects(cls):
custom_objects["MatrixAttention"] = MatrixAttention
custom_objects["Max"] = Max
custom_objects["Repeat"] = Repeat
custom_objects["RepeatLike"] = RepeatLike
custom_objects["WeightedSum"] = WeightedSum
return custom_objects

Expand Down
4 changes: 2 additions & 2 deletions deep_qa/training/text_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def _get_padding_lengths(self) -> Dict[str, int]:

def _set_padding_lengths(self, dataset_padding_lengths: Dict[str, int]):
"""
This is about padding. Any solver will have some number of things that need padding in
This is about padding. Any model will have some number of things that need padding in
order to make a consistent set of input arrays, like the length of a sentence. This method
sets those variables given a dictionary of lengths from a dataset.
Expand All @@ -544,7 +544,7 @@ def _set_padding_lengths(self, dataset_padding_lengths: Dict[str, int]):
set a hard limit in the class parameters and don't want to change it.
"""
if not self.use_dynamic_padding and self.num_sentence_words is None:
self.num_sentence_words = dataset_padding_lengths['num_sentence_words']
self.num_sentence_words = dataset_padding_lengths.get('num_sentence_words', None)
if not self.use_dynamic_padding and self.num_word_characters is None:
self.num_word_characters = dataset_padding_lengths.get('num_word_characters', None)

Expand Down
6 changes: 4 additions & 2 deletions deep_qa/training/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,9 +272,11 @@ def train(self):
self.training_arrays = self.create_data_arrays(indexed_training_dataset)

if self.validation_files:
self.validation_dataset, self.validation_arrays = self.load_data_arrays(self.validation_files)
self.validation_dataset, self.validation_arrays = self.load_data_arrays(self.validation_files,
self.max_validation_instances)
if self.test_files:
self.test_dataset, self.test_arrays = self.load_data_arrays(self.test_files)
self.test_dataset, self.test_arrays = self.load_data_arrays(self.test_files,
self.max_test_instances)

# Then we build the model and compile it.
logger.info("Building the model")
Expand Down
8 changes: 8 additions & 0 deletions doc/layers/backend.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,11 @@ Repeat
:members:
:undoc-members:
:show-inheritance:

RepeatLike
----------

.. automodule:: deep_qa.layers.backend.repeat_like
:members:
:undoc-members:
:show-inheritance:
7 changes: 4 additions & 3 deletions example_experiments/reading_comprehension/bidaf_squad.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"type": "adadelta",
"lr": 0.5
},
"max_training_instances": 100,
"validation_files": ["/efs/data/dlfa/sciq_da/processed/dev.tsv"],
"train_files": ["/efs/data/dlfa/sciq_da/processed/train.tsv"]
"use_data_generator": true,
"use_dynamic_padding": true,
"validation_files": ["/net/efs/aristo/dlfa/squad/processed/dev.tsv"],
"train_files": ["/net/efs/aristo/dlfa/squad/processed/train.tsv"]
}
23 changes: 23 additions & 0 deletions tests/layers/backend/repeat_like_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# pylint: disable=no-self-use,invalid-name

import numpy
from keras.layers import Input
from keras.models import Model

from deep_qa.layers.backend import RepeatLike

class TestRepeatLikeLayer:
def test_call_works_on_simple_input(self):
batch_size = 2
input_length = 3
repetitions = 4
input_layer = Input(shape=(input_length,), dtype='float32')
input_layer_2 = Input(shape=(None,), dtype='float32')
repeat_output = RepeatLike(axis=1, copy_from_axis=1)([input_layer, input_layer_2])
model = Model(inputs=[input_layer, input_layer_2], outputs=[repeat_output])
input_tensor = numpy.asarray([[2, 5, 3], [-1, -4, -2]])
input_tensor_2 = numpy.ones((batch_size, repetitions))
repeat_tensor = model.predict([input_tensor, input_tensor_2])
assert repeat_tensor.shape == (batch_size, repetitions, input_length)
for i in range(repetitions):
numpy.testing.assert_almost_equal(repeat_tensor[:, i, :], [[2, 5, 3], [-1, -4, -2]])

0 comments on commit 72b634b

Please sign in to comment.