Skip to content

Commit

Permalink
split dotmul_projection and dotmul_operator (PaddlePaddle#87)
Browse files Browse the repository at this point in the history
* split dotmul_projection and dotmul_operator
* bug fix in outputsize checking for mixed layer
  • Loading branch information
Haichao-Zhang authored and emailweixu committed Sep 19, 2016
1 parent 90b9cba commit 159dd83
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 43 deletions.
19 changes: 14 additions & 5 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2430,7 +2430,6 @@ def __init__(
config_assert(inputs, 'inputs cannot be empty')
super(MixedLayer, self).__init__(
name, 'mixed', size, inputs=inputs, **xargs)

operator_input_index = []
for operator in self.operators:
operator_conf = operator.operator_conf
Expand All @@ -2445,21 +2444,31 @@ def __init__(
input_layer = self.get_input_layer(input_index)
operator_conf.input_sizes.append(input_layer.size)
operator_input_index.append(input_index)
if self.config.size == 0:
if self.config.size == 0:
size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0:
self.set_layer_size(size)

else:
size = operator.calc_output_size(operator_conf.input_sizes)
if size != 0:
config_assert(size == self.config.size,
"different inputs have different size: %s vs. %s" %
(size, self.config.size))
for input_index in xrange(len(self.inputs)):
input_layer = self.get_input_layer(input_index)
input = self.inputs[input_index]
if input_index not in operator_input_index:
config_assert(isinstance(input, Projection), "input should be projection or operation")
if self.config.size == 0 and isinstance(input, Projection):
if self.config.size == 0 and isinstance(input, Projection):
size = input.calc_output_size(input_layer)
if size != 0:
self.set_layer_size(size)

elif isinstance(input, Projection):
sz = input.calc_output_size(input_layer)
if sz != 0:
config_assert(sz == self.config.size,
"different inputs have different size: %s vs. %s" %
(sz, self.config.size))
config_assert(size != 0, "size is not set")

for input_index in xrange(len(self.inputs)):
Expand Down
74 changes: 37 additions & 37 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import copy

__all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel",
"identity_projection", "dotmul_projection",
"identity_projection", "dotmul_projection", "dotmul_operator",
"table_projection", "mixed_layer", "data_layer",
"embedding_layer", "fc_layer", "grumemory",
"pooling_layer", "lstmemory", "last_seq", "first_seq",
Expand Down Expand Up @@ -389,7 +389,7 @@ def identity_projection(input, offset=None):
@wrap_param_attr_default()
def dotmul_projection(input, param_attr=None, scale=1):
"""
1. DotMulProjection if input is a layer.
DotMulProjection with a layer as input.
It performs element-wise multiplication with weight.
.. math::
Expand All @@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1):
proj = dotmul_projection(input=layer)
2. DotMulOperator if input is a list or tuple.
It takes two inputs, performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_projection(input=[layer1, layer2],
scale=2.0)
:param input: Input layer.
:type input: LayerOutput|list|tuple
:type input: LayerOutput
:param param_attr: Parameter config, None if use default.
:type param_attr: ParameterAttribute
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulProjection or DotMulOperator Object.
:rtype: DotMulProjection or DotMulOperator
:return: A DotMulProjection Object.
:rtype: DotMulProjection
"""
if isinstance(input, LayerOutput):
proj = DotMulProjection(input_layer_name=input.name,
proj = DotMulProjection(input_layer_name=input.name,
size=input.size,
**param_attr.attr)
proj.origin = input
proj.origin.projection = "dot_mul"
return proj
else:
assert isinstance(input, list) or isinstance(input, tuple)
assert len(input) == 2
assert param_attr is None
op = DotMulOperator(input_layer_name=[x.name for x in input],
scale=scale)
op.origin = input
op.origin.operator = "dot_mul"
return op
proj.origin = input
return proj

def dotmul_operator(x, y, scale=1):
"""
DotMulOperator takes two inputs and performs element-wise multiplication:
.. math::
out.row[i] += scale * (in1.row[i] .* in2.row[i])
where :math:`.*` means element-wise multiplication, and
scale is a config scalar, its default value is one.
The example usage is:
.. code-block:: python
op = dotmul_operator(x, y,
scale=1)
:param input: Input layer
:type input: LayerOutput
:param scale: config scalar, default value is one.
:type scale: float
:return: A DotMulOperator Object.
:rtype: DotMulOperator
"""
assert isinstance(x, LayerOutput)
assert isinstance(y, LayerOutput)
op = DotMulOperator(input_layer_names=[x.name, y.name],
scale=scale)
op.origin = [x, y]
return op

@wrap_bias_attr_default(['padding_attr'])
def context_projection(input, context_len, context_start=None,
Expand Down Expand Up @@ -539,7 +536,10 @@ def __add__(self, other):
if not self.finalized:
assert isinstance(other, Projection) or isinstance(other, Operator)
self.inputs.append(other)
self.parents.append(other.origin)
if isinstance(other, Projection):
self.parents.append(other.origin)
else:
self.parents.extend(other.origin)
return self
else:
raise MixedLayerType.AddToSealedMixedLayerException()
Expand All @@ -565,7 +565,7 @@ def __exit__(self, *args, **kwargs):
@wrap_act_default(act=LinearActivation())
@wrap_bias_attr_default(has_bias=False)
@layer_support(ERROR_CLIPPING, DROPOUT)
def mixed_layer(size, input=None, name=None, act=None, bias_attr=False,
def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False,
layer_attr=None):
"""
Mixed Layer. A mixed layer will add all inputs together, then activate.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@

outputs(classification_cost(out, data_layer(name="label", size=num_classes)))

dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1),
dotmul_projection(input=y1)])

# for ctc
tmp = fc_layer(input=x1,
tmp = fc_layer(input=[x1, dotmul],
size=num_classes + 1,
act=SoftmaxActivation())
ctc = ctc_layer(input=tmp,
Expand Down

0 comments on commit 159dd83

Please sign in to comment.