From 55b96287aeb08305b90d3351c602c207d9c38290 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 23 Feb 2017 22:45:51 +0800 Subject: [PATCH 01/24] support rnn --- python/paddle/v2/layer.py | 87 +++++++++++++++++++++++++++++++++++---- 1 file changed, 79 insertions(+), 8 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 507725ee4ff712..bebe7c6690ca71 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,6 +73,7 @@ class for each layer creation function in paddle.trainer_config_helpers.layers. parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default +import activation import data_type __all__ = [ @@ -97,10 +98,11 @@ def __real_func__(): class Layer(object): - def __init__(self, name, parent_layers): + def __init__(self, name, parent_layers, step_input=None): assert isinstance(parent_layers, dict) assert isinstance(name, basestring) self.name = name + self.step_input = step_input self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -116,8 +118,14 @@ def to_proto(self, context): else: v1_layer = map(lambda x: x.to_proto(context=context), self.__parent_layers__[layer_name]) + if layer_name == "input" and self.step_input is not None: + v1_layer.insert(0, self.step_input) kwargs[layer_name] = v1_layer + # memory may have the same name with some layer + if isinstance(self, MemoryV2): + return self.to_proto_impl(**kwargs) + if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) return context[self.name] @@ -133,7 +141,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, **kwargs): + def __init__(self, name=None, step_input=None, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -143,7 +151,7 @@ def __init__(self, name=None, **kwargs): if key not in parent_names: other_kwargs[key] = kwargs[key] - super(V2LayerImpl, self).__init__(name, parent_layers) + super(V2LayerImpl, self).__init__(name, parent_layers, step_input) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -186,6 +194,22 @@ def to_proto_impl(self, **kwargs): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) +class MemoryV2(Layer): + def __init__(self, name, size, **kwargs): + self.name = name + self.size = size + self.__kwargs__ = kwargs + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.memory(name=self.name, size=self.size, **args) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -198,6 +222,13 @@ def to_proto_impl(self, **kwargs): 'cross_entropy', name_prefix='cross_entropy', parent_names=['input', 'label']) +embedding = __convert_to_v2__( + 'embedding_layer', name_prefix='embedding', parent_names=['input']) +last_seq = __convert_to_v2__( + 'last_seq', name_prefix='last_seq', parent_names=['input']) +recurrent_group = __convert_to_v2__( + 'recurrent_group', name_prefix='recurrent_layer', parent_names=['input']) +memory = MemoryV2 if __name__ == '__main__': pixel = data(name='pixel', type=data_type.dense_vector(784)) @@ -208,8 +239,48 @@ def to_proto_impl(self, **kwargs): cost1 = classification_cost(input=inference, label=label) cost2 = cross_entropy_cost(input=inference, label=label) - print parse_network(cost1) - print parse_network(cost2) - print parse_network(cost1, cost2) - print parse_network(cost2) - print parse_network(inference, maxid) + mem = memory(name="rnn_state", size=10) + + # print parse_network(cost1) + # print parse_network(cost2) + # print parse_network(cost1, cost2) + # print parse_network(cost2) + # print parse_network(inference, maxid) + + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + # print __parse__(test) + + # yyyyyyyy + def new_step(y): + mem = memory(name="rnn_state", size=hidden_dim) + out = fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = data(name="word", type=data_type.integer_value(dict_dim)) + embd = embedding(input=data1, size=word_dim) + aaa = recurrent_group(name="rnn", step=new_step, input=embd) + print parse_network(aaa) From 92f52e3bb7a1a203a01d3641887c6bdfd03dce67 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 13:46:59 +0800 Subject: [PATCH 02/24] add rnn test --- demo/mnist/api_train_v2.py | 3 ++ python/paddle/v2/layer.py | 43 +---------------- python/paddle/v2/tests/layer_test.py | 72 ++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 41 deletions(-) create mode 100644 python/paddle/v2/tests/layer_test.py diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 6fc01ce58be57c..5e66b7399ad52f 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -3,6 +3,9 @@ import mnist_util +import pudb +pudb.set_trace() + def train_reader(): train_file = './data/raw_data/train' diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 44ebebcaeab538..e1952ce747d103 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,16 +73,15 @@ class for each layer creation function in paddle.trainer_config_helpers.layers. parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default -import activation -import data_type import activation import attr +import data_type __all__ = [ 'parse_network', 'data', 'fc', 'max_id', 'classification_cost', 'cross_entropy_cost', 'cross_entropy_with_selfnorm_cost', 'regression_cost', 'multi_binary_label_cross_entropy_cost', 'rank_cost', 'lambda_cost', - 'sum_cost', 'huber_cost' + 'sum_cost', 'huber_cost', 'memory', 'embedding', 'recurrent_group' ] @@ -294,41 +293,3 @@ def to_proto_impl(self, **kwargs): print parse_network(cost5, cost6) print parse_network(cost7, cost8, cost9, cost10, cost11) print parse_network(inference, maxid) - - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - label_dim = 3 - - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data1 = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data1, size=word_dim) - conf_helps.recurrent_group(name="rnn", step=step, input=embd) - - # print __parse__(test) - - # yyyyyyyy - def new_step(y): - mem = memory(name="rnn_state", size=hidden_dim) - out = fc(input=[mem], - step_input=y, - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out.to_proto(dict()) - - data1 = data(name="word", type=data_type.integer_value(dict_dim)) - embd = embedding(input=data1, size=word_dim) - aaa = recurrent_group(name="rnn", step=new_step, input=embd) - print parse_network(aaa) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py new file mode 100644 index 00000000000000..87e601a60abc57 --- /dev/null +++ b/python/paddle/v2/tests/layer_test.py @@ -0,0 +1,72 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def test_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def test_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data1, size=word_dim) + aaa = layer.recurrent_group(name="rnn", step=new_step, input=embd) + return str(layer.parse_network(aaa)) + + diff = difflib.unified_diff(test_old_rnn().splitlines(1), + test_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main() From 6b80c2b4f9a626efa911f715dcb45bee99d80729 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 15:29:11 +0800 Subject: [PATCH 03/24] add cost test --- python/paddle/v2/layer.py | 2 -- python/paddle/v2/tests/layer_test.py | 35 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index e1952ce747d103..f333c0af964bf5 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -281,8 +281,6 @@ def to_proto_impl(self, **kwargs): cost10 = sum_cost(input=inference) cost11 = huber_cost(input=score, label=label) - mem = memory(name="rnn_state", size=10) - # print parse_network(cost1) # print parse_network(cost2) # print parse_network(cost1, cost2) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 87e601a60abc57..6c4b458914701e 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -18,10 +18,45 @@ import paddle.v2.activation as activation import paddle.v2.data_type as data_type import paddle.v2.layer as layer +import paddle.v2.attr as attr from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network +class CostLyaerTest(unittest.TestCase): + def test_cost_layer(self): + pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) + label = layer.data(name='label', type=data_type.integer_value(10)) + weight = layer.data(name='weight', type=data_type.dense_vector(10)) + score = layer.data(name='score', type=data_type.dense_vector(1)) + hidden = layer.fc(input=pixel, + size=100, + act=activation.Sigmoid(), + param_attr=attr.Param(name='hidden')) + inference = layer.fc(input=hidden, size=10, act=activation.Softmax()) + + cost1 = layer.classification_cost(input=inference, label=label) + cost2 = layer.classification_cost( + input=inference, label=label, weight=weight) + cost3 = layer.cross_entropy_cost(input=inference, label=label) + cost4 = layer.cross_entropy_with_selfnorm_cost( + input=inference, label=label) + cost5 = layer.regression_cost(input=inference, label=label) + cost6 = layer.regression_cost( + input=inference, label=label, weight=weight) + cost7 = layer.multi_binary_label_cross_entropy_cost( + input=inference, label=label) + cost8 = layer.rank_cost(left=score, right=score, label=score) + cost9 = layer.lambda_cost(input=inference, score=score) + cost10 = layer.sum_cost(input=inference) + cost11 = layer.huber_cost(input=score, label=label) + + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + class RNNTest(unittest.TestCase): def test_simple_rnn(self): dict_dim = 10 From db92e3c884a586d0f28dcc7c7e3be99c1e6203f6 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 16:04:59 +0800 Subject: [PATCH 04/24] refine code --- python/paddle/v2/layer.py | 35 ---------------------------- python/paddle/v2/tests/layer_test.py | 7 +++--- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f333c0af964bf5..5ecc96c6856508 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -256,38 +256,3 @@ def to_proto_impl(self, **kwargs): 'sum_cost', name_prefix='sum_cost', parent_names=['input']) huber_cost = __convert_to_v2__( 'huber_cost', name_prefix='huber_cost', parent_names=['input', 'label']) - -if __name__ == '__main__': - pixel = data(name='pixel', type=data_type.dense_vector(784)) - label = data(name='label', type=data_type.integer_value(10)) - weight = data(name='weight', type=data_type.dense_vector(10)) - score = data(name='score', type=data_type.dense_vector(1)) - - hidden = fc(input=pixel, - size=100, - act=activation.Sigmoid(), - param_attr=attr.Param(name='hidden')) - inference = fc(input=hidden, size=10, act=activation.Softmax()) - maxid = max_id(input=inference) - cost1 = classification_cost(input=inference, label=label) - cost2 = classification_cost(input=inference, label=label, weight=weight) - cost3 = cross_entropy_cost(input=inference, label=label) - cost4 = cross_entropy_with_selfnorm_cost(input=inference, label=label) - cost5 = regression_cost(input=inference, label=label) - cost6 = regression_cost(input=inference, label=label, weight=weight) - cost7 = multi_binary_label_cross_entropy_cost(input=inference, label=label) - cost8 = rank_cost(left=score, right=score, label=score) - cost9 = lambda_cost(input=inference, score=score) - cost10 = sum_cost(input=inference) - cost11 = huber_cost(input=score, label=label) - - # print parse_network(cost1) - # print parse_network(cost2) - # print parse_network(cost1, cost2) - # print parse_network(cost2) - # print parse_network(inference, maxid) - print parse_network(cost1, cost2) - print parse_network(cost3, cost4) - print parse_network(cost5, cost6) - print parse_network(cost7, cost8, cost9, cost10, cost11) - print parse_network(inference, maxid) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 6c4b458914701e..2958cbd9ebd0c3 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -16,9 +16,9 @@ import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation +import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -import paddle.v2.attr as attr from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as parse_network @@ -95,8 +95,9 @@ def new_step(y): data1 = layer.data( name="word", type=data_type.integer_value(dict_dim)) embd = layer.embedding(input=data1, size=word_dim) - aaa = layer.recurrent_group(name="rnn", step=new_step, input=embd) - return str(layer.parse_network(aaa)) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) diff = difflib.unified_diff(test_old_rnn().splitlines(1), test_new_rnn().splitlines(1)) From e4327a7cd9408839900c0f82b4aedf2ce6672cbd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 16:11:02 +0800 Subject: [PATCH 05/24] add CMakeLists.txt --- python/paddle/v2/tests/CMakeLists.txt | 4 ++++ python/paddle/v2/tests/layer_test.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/tests/CMakeLists.txt diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt new file mode 100644 index 00000000000000..dc5efdab6a9973 --- /dev/null +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -0,0 +1,4 @@ +add_test(NAME layer_test + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/layer_test.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) diff --git a/python/paddle/v2/tests/layer_test.py b/python/paddle/v2/tests/layer_test.py index 2958cbd9ebd0c3..83c8c26d6b6546 100644 --- a/python/paddle/v2/tests/layer_test.py +++ b/python/paddle/v2/tests/layer_test.py @@ -23,7 +23,7 @@ parse_network_config as parse_network -class CostLyaerTest(unittest.TestCase): +class CostLayerTest(unittest.TestCase): def test_cost_layer(self): pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) From f13f1f1ce5cfe428c272e90f85dc9a9c1ed55f6b Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 24 Feb 2017 17:37:38 +0800 Subject: [PATCH 06/24] use test_layer instead of layer_test --- python/paddle/v2/tests/test_layer.py | 57 +++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index b600e8cf765122..73d769a3582441 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -51,12 +51,57 @@ def test_cost_layer(self): cost10 = layer.sum_cost(input=inference) cost11 = layer.huber_cost(input=score, label=label) - print dir(layer) - layer.parse_network(cost1, cost2) - print dir(layer) - #print layer.parse_network(cost3, cost4) - #print layer.parse_network(cost5, cost6) - #print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + print layer.parse_network(cost1, cost2) + print layer.parse_network(cost3, cost4) + print layer.parse_network(cost5, cost6) + print layer.parse_network(cost7, cost8, cost9, cost10, cost11) + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def test_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data1 = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data1, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def test_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[mem], + step_input=y, + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out.to_proto(dict()) + + data1 = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data1, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(test_old_rnn().splitlines(1), + test_new_rnn().splitlines(1)) + print ''.join(diff) if __name__ == '__main__': From ad4ab5ac811d90dd2bbb661ad34ba5ee3aa510a1 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 26 Feb 2017 16:29:02 +0800 Subject: [PATCH 07/24] remove step_input in recurrent_group step_input --- .../paddle/trainer_config_helpers/layers.py | 8 ++- python/paddle/v2/layer.py | 61 +++++++++++++++---- python/paddle/v2/tests/test_layer.py | 13 ++-- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 00aef80691fba0..4e200517fc4bc6 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3042,7 +3042,8 @@ def recurrent_group(step, reverse=False, name=None, targetInlink=None, - is_generating=False): + is_generating=False, + in_args_converter=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3185,7 +3186,10 @@ def map_in_links(x): assert (is_generating != has_LayerOutput) - layer_outs = step(*in_args) + if in_args_converter is None: + layer_outs = step(*in_args) + else: + layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) if isinstance(layer_outs, LayerOutput): layer_outs = [layer_outs] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 5ecc96c6856508..44c7661b2463f0 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -73,8 +73,6 @@ class for each layer creation function in paddle.trainer_config_helpers.layers. parse_network_config as __parse__ from paddle.trainer_config_helpers.default_decorators import wrap_name_default -import activation -import attr import data_type __all__ = [ @@ -101,11 +99,10 @@ def __real_func__(): class Layer(object): - def __init__(self, name, parent_layers, step_input=None): + def __init__(self, name, parent_layers): assert isinstance(parent_layers, dict) assert isinstance(name, basestring) self.name = name - self.step_input = step_input self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -121,12 +118,13 @@ def to_proto(self, context): else: v1_layer = map(lambda x: x.to_proto(context=context), self.__parent_layers__[layer_name]) - if layer_name == "input" and self.step_input is not None: - v1_layer.insert(0, self.step_input) kwargs[layer_name] = v1_layer + if self.name is None: + return self.to_proto_impl(**kwargs) + # memory may have the same name with some layer - if isinstance(self, MemoryV2): + if isinstance(self, MemoryV2) or isinstance(self, LayerOutputV2): return self.to_proto_impl(**kwargs) if self.name not in context: @@ -144,7 +142,7 @@ def __convert_to_v2__(method_name, name_prefix, parent_names): wrapper = None class V2LayerImpl(Layer): - def __init__(self, name=None, step_input=None, **kwargs): + def __init__(self, name=None, **kwargs): parent_layers = dict() other_kwargs = dict() for pname in parent_names: @@ -155,7 +153,7 @@ def __init__(self, name=None, step_input=None, **kwargs): if key not in parent_names: other_kwargs[key] = kwargs[key] - super(V2LayerImpl, self).__init__(name, parent_layers, step_input) + super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -214,6 +212,48 @@ def to_proto_impl(self, **kwargs): return conf_helps.memory(name=self.name, size=self.size, **args) +class LayerOutputV2(Layer): + def __init__(self, layer_output): + assert isinstance(layer_output, conf_helps.LayerOutput) + self.layer_output = layer_output + super(LayerOutputV2, self).__init__( + name=layer_output.name, parent_layers=dict()) + + def to_proto_impl(self): + return self.layer_output + + +class RecurrentGroupV2(Layer): + def __init__(self, name, **kwargs): + self.__parent_names__ = ['input'] + other_kwargs = dict() + parent_layers = dict() + for pname in self.__parent_names__: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + for key in kwargs.keys(): + if key not in self.__parent_names__: + other_kwargs[key] = kwargs[key] + self.__kwargs__ = other_kwargs + + super(RecurrentGroupV2, self).__init__( + name=name, parent_layers=parent_layers) + + def to_proto_impl(self, **kwargs): + def in_args_converter(in_args): + if not isinstance(in_args, collections.Sequence): + in_args = [in_args] + return [LayerOutputV2(input) for input in in_args] + + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.recurrent_group( + name=self.name, in_args_converter=in_args_converter, **args) + + data = DataLayerV2 fc = __convert_to_v2__('fc_layer', name_prefix='fc', parent_names=['input']) max_id = __convert_to_v2__( @@ -234,8 +274,7 @@ def to_proto_impl(self, **kwargs): 'embedding_layer', name_prefix='embedding', parent_names=['input']) last_seq = __convert_to_v2__( 'last_seq', name_prefix='last_seq', parent_names=['input']) -recurrent_group = __convert_to_v2__( - 'recurrent_group', name_prefix='recurrent_layer', parent_names=['input']) +recurrent_group = RecurrentGroupV2 memory = MemoryV2 cross_entropy_with_selfnorm_cost = __convert_to_v2__( diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 73d769a3582441..04c0fc7cb0bf12 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -63,7 +63,7 @@ def test_simple_rnn(self): word_dim = 8 hidden_dim = 8 - def test_old_rnn(): + def parse_old_rnn(): def step(y): mem = conf_helps.memory(name="rnn_state", size=hidden_dim) out = conf_helps.fc_layer( @@ -81,16 +81,15 @@ def test(): return str(parse_network(test)) - def test_new_rnn(): + def parse_new_rnn(): def new_step(y): mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[mem], - step_input=y, + out = layer.fc(input=[y, mem], size=hidden_dim, act=activation.Tanh(), bias_attr=True, name="rnn_state") - return out.to_proto(dict()) + return out data1 = layer.data( name="word", type=data_type.integer_value(dict_dim)) @@ -99,8 +98,8 @@ def new_step(y): name="rnn", step=new_step, input=embd) return str(layer.parse_network(rnn_layer)) - diff = difflib.unified_diff(test_old_rnn().splitlines(1), - test_new_rnn().splitlines(1)) + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) print ''.join(diff) From 632ad5c9e25c906b0189be308ecf22c2409abb2c Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 27 Feb 2017 19:59:38 +0800 Subject: [PATCH 08/24] support sequence_rnn_multi_input --- demo/mnist/api_train_v2.py | 3 - python/paddle/trainer/config_parser.py | 6 +- python/paddle/v2/layer.py | 30 ++++- python/paddle/v2/tests/CMakeLists.txt | 6 +- python/paddle/v2/tests/test_layer.py | 50 -------- python/paddle/v2/tests/test_rnn_layer.py | 143 +++++++++++++++++++++++ 6 files changed, 178 insertions(+), 60 deletions(-) create mode 100644 python/paddle/v2/tests/test_rnn_layer.py diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py index 5e66b7399ad52f..6fc01ce58be57c 100644 --- a/demo/mnist/api_train_v2.py +++ b/demo/mnist/api_train_v2.py @@ -3,9 +3,6 @@ import mnist_util -import pudb -pudb.set_trace() - def train_reader(): train_file = './data/raw_data/train' diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da937152ee0ce7..487d4dfd5b1b81 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3474,6 +3474,8 @@ def update_g_config(): for name in g_config.model_config.output_layer_names: assert name in g_layer_map, \ 'input name "%s" does not correspond to a layer name' % name + for hook in _parse_config_hooks: + hook() return g_config @@ -3485,8 +3487,8 @@ def parse_config(trainer_config, config_arg_str): passed to config script as a dictionary CONFIG_ARGS ''' init_config_environment() - for hook in _parse_config_hooks: - hook() + # for hook in _parse_config_hooks: + # hook() config_args = {} diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 44c7661b2463f0..5328070456ef1e 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -124,11 +124,13 @@ def to_proto(self, context): return self.to_proto_impl(**kwargs) # memory may have the same name with some layer - if isinstance(self, MemoryV2) or isinstance(self, LayerOutputV2): + if isinstance(self, MemoryV2): return self.to_proto_impl(**kwargs) + # store v1 API's layer_output in context with the key of it's name. if self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) + return context[self.name] def to_proto_impl(self, **kwargs): @@ -200,8 +202,19 @@ class MemoryV2(Layer): def __init__(self, name, size, **kwargs): self.name = name self.size = size - self.__kwargs__ = kwargs - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + + parent_names = ['boot_layer'] + parent_layers = dict() + other_kwargs = dict() + for pname in parent_names: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + + for key in kwargs.keys(): + if key not in parent_names: + other_kwargs[key] = kwargs[key] + super(MemoryV2, self).__init__(name=name, parent_layers=parent_layers) + self.__kwargs__ = other_kwargs def to_proto_impl(self, **kwargs): args = dict() @@ -209,10 +222,16 @@ def to_proto_impl(self, **kwargs): args[each] = kwargs[each] for each in self.__kwargs__: args[each] = self.__kwargs__[each] + return conf_helps.memory(name=self.name, size=self.size, **args) class LayerOutputV2(Layer): + """ + LayerOutputV2 is used to store the result of LayerOutput in v1 api. + It will not store it's parents because layer_output has been parsed already. + """ + def __init__(self, layer_output): assert isinstance(layer_output, conf_helps.LayerOutput) self.layer_output = layer_output @@ -239,8 +258,11 @@ def __init__(self, name, **kwargs): super(RecurrentGroupV2, self).__init__( name=name, parent_layers=parent_layers) + wrapper = wrap_name_default(name_prefix='recurrent_group') + __init__ = wrapper(__init__) + def to_proto_impl(self, **kwargs): - def in_args_converter(in_args): + def in_args_converter(*in_args): if not isinstance(in_args, collections.Sequence): in_args = [in_args] return [LayerOutputV2(input) for input in in_args] diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index ceb71c1454b2ba..bc0f24792796b8 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,3 +1,7 @@ add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py) + +add_test(NAME test_v2_rnn_layer + COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 04c0fc7cb0bf12..41d9683464d29f 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -11,16 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import difflib import unittest -import paddle.trainer_config_helpers as conf_helps import paddle.v2.activation as activation import paddle.v2.attr as attr import paddle.v2.data_type as data_type import paddle.v2.layer as layer -from paddle.trainer_config_helpers.config_parser_utils import \ - parse_network_config as parse_network pixel = layer.data(name='pixel', type=data_type.dense_vector(784)) label = layer.data(name='label', type=data_type.integer_value(10)) @@ -57,51 +53,5 @@ def test_cost_layer(self): print layer.parse_network(cost7, cost8, cost9, cost10, cost11) -class RNNTest(unittest.TestCase): - def test_simple_rnn(self): - dict_dim = 10 - word_dim = 8 - hidden_dim = 8 - - def parse_old_rnn(): - def step(y): - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - def test(): - data1 = conf_helps.data_layer(name="word", size=dict_dim) - embd = conf_helps.embedding_layer(input=data1, size=word_dim) - conf_helps.recurrent_group(name="rnn", step=step, input=embd) - - return str(parse_network(test)) - - def parse_new_rnn(): - def new_step(y): - mem = layer.memory(name="rnn_state", size=hidden_dim) - out = layer.fc(input=[y, mem], - size=hidden_dim, - act=activation.Tanh(), - bias_attr=True, - name="rnn_state") - return out - - data1 = layer.data( - name="word", type=data_type.integer_value(dict_dim)) - embd = layer.embedding(input=data1, size=word_dim) - rnn_layer = layer.recurrent_group( - name="rnn", step=new_step, input=embd) - return str(layer.parse_network(rnn_layer)) - - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py new file mode 100644 index 00000000000000..bf2c4db61aa502 --- /dev/null +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -0,0 +1,143 @@ +# Copyright PaddlePaddle contributors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import difflib +import unittest + +import paddle.trainer_config_helpers as conf_helps +import paddle.v2.activation as activation +import paddle.v2.data_type as data_type +import paddle.v2.layer as layer +from paddle.trainer_config_helpers.config_parser_utils import \ + parse_network_config as parse_network + + +class RNNTest(unittest.TestCase): + def test_simple_rnn(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + + def parse_old_rnn(): + def step(y): + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + embd = conf_helps.embedding_layer(input=data, size=word_dim) + conf_helps.recurrent_group(name="rnn", step=step, input=embd) + + return str(parse_network(test)) + + def parse_new_rnn(): + def new_step(y): + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.integer_value(dict_dim)) + embd = layer.embedding(input=data, size=word_dim) + rnn_layer = layer.recurrent_group( + name="rnn", step=new_step, input=embd) + return str(layer.parse_network(rnn_layer)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + def test_sequence_rnn_multi_input(self): + dict_dim = 10 + word_dim = 8 + hidden_dim = 8 + label_dim = 3 + + def parse_old_rnn(): + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory(name="rnn_state", size=hidden_dim) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + + def test(): + data = conf_helps.data_layer(name="word", size=dict_dim) + label = conf_helps.data_layer(name="label", size=label_dim) + emb = conf_helps.embedding_layer(input=data, size=word_dim) + out = conf_helps.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = conf_helps.last_seq(input=out) + prob = conf_helps.fc_layer( + size=label_dim, + input=rep, + act=conf_helps.SoftmaxActivation(), + bias_attr=True) + + conf_helps.outputs( + conf_helps.classification_cost( + input=prob, label=label)) + + return str(parse_network(test)) + + def parse_new_rnn(): + def step(y, wid): + z = layer.embedding(input=wid, size=word_dim) + mem = layer.memory(name="rnn_state", size=hidden_dim) + out = layer.fc(input=[y, z, mem], + size=hidden_dim, + act=activation.Tanh(), + bias_attr=True, + name="rnn_state") + return out + + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + out = layer.recurrent_group( + name="rnn", step=step, input=[emb, data]) + + rep = layer.last_seq(input=out) + prob = layer.fc(size=label_dim, + input=rep, + act=activation.Softmax(), + bias_attr=True) + + cost = layer.classification_cost(input=prob, label=label) + + return str(layer.parse_network(cost)) + + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) + + +if __name__ == '__main__': + unittest.main() From 876d597495015ba416639af7426258d32587986e Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 27 Feb 2017 20:41:00 +0800 Subject: [PATCH 09/24] memory have same name with some layer --- python/paddle/v2/layer.py | 2 ++ python/paddle/v2/tests/CMakeLists.txt | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 6dea3afbcf407f..dbd802bee8270d 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -139,6 +139,8 @@ def to_proto(self, context): if self.name is None: return self.to_proto_impl(**kwargs) + elif isinstance(self, MemoryV2): + return self.to_proto_impl(**kwargs) elif self.name not in context: context[self.name] = self.to_proto_impl(**kwargs) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index bc0f24792796b8..b2f43c42de8ebf 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -1,7 +1,11 @@ add_test(NAME test_v2_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py) + ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_layer.py + WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) add_test(NAME test_v2_rnn_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) + +add_test(NAME test_v2_api + COMMAND bash ${PROJ_ROOT}/python/paddle/v2/tests/run_tests.sh ${PYTHON_EXECUTABLE}) From 7ad8363036af9d8ae91e6698ff09804023602bdf Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Tue, 28 Feb 2017 19:44:56 +0800 Subject: [PATCH 10/24] support boot_layer --- .../paddle/trainer_config_helpers/layers.py | 6 ++- python/paddle/v2/layer.py | 37 ++++++++++++++++--- python/paddle/v2/tests/CMakeLists.txt | 1 - 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index a8b536dda4f249..4f7a2735e296d2 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3110,7 +3110,8 @@ def recurrent_group(step, name=None, targetInlink=None, is_generating=False, - in_args_converter=None): + in_args_converter=None, + boot_layer=None): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3256,6 +3257,9 @@ def map_in_links(x): if in_args_converter is None: layer_outs = step(*in_args) else: + # append boot_layer to the last of in_args + if boot_layer is not None: + in_args.append(boot_layer) layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) if isinstance(layer_outs, LayerOutput): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 1155eca9c815b8..542d5a515c5619 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -140,10 +140,13 @@ def to_proto(self, context): if self.name is None: return self.to_proto_impl(**kwargs) elif isinstance(self, MemoryV2): - return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + name = self.name + "#__memory__" + if name not in context: + context[name] = self.to_proto_impl(**kwargs) + return context[name] + if self.name not in context: + context[self.name] = self.to_proto_impl(**kwargs) return context[self.name] def to_proto_impl(self, **kwargs): @@ -256,9 +259,32 @@ def to_proto_impl(self): return self.layer_output +class StaticInputV2(Layer): + def __init__(self, **kwargs): + self.__parent_names__ = ['input'] + other_kwargs = dict() + parent_layers = dict() + for pname in self.__parent_names__: + if kwargs.has_key(pname): + parent_layers[pname] = kwargs[pname] + for key in kwargs.keys(): + if key not in self.__parent_names__: + other_kwargs[key] = kwargs[key] + self.__kwargs__ = other_kwargs + super(StaticInputV2, self).__init__(parent_layers=parent_layers) + + def to_proto_impl(self, **kwargs): + args = dict() + for each in kwargs: + args[each] = kwargs[each] + for each in self.__kwargs__: + args[each] = self.__kwargs__[each] + return conf_helps.StaticInput(**args) + + class RecurrentGroupV2(Layer): def __init__(self, name, **kwargs): - self.__parent_names__ = ['input'] + self.__parent_names__ = ['input', 'boot_layer'] other_kwargs = dict() parent_layers = dict() for pname in self.__parent_names__: @@ -443,7 +469,8 @@ def mixed(size=0, ['nce', 'nce_layer', ['input', 'label']], ['hsigmoid', 'hsigmoid', ['input', 'label']], # check layers - ['eos', 'eos_layer', ['input']] + ['eos', 'eos_layer', ['input']], + ['gru_step_layer', 'gru_step_layer', ['input', 'output_mem']] ] for l in layer_list: globals()[l[0]] = __convert_to_v2__(l[1], l[2]) diff --git a/python/paddle/v2/tests/CMakeLists.txt b/python/paddle/v2/tests/CMakeLists.txt index 948cebdf727c69..572deaff356712 100644 --- a/python/paddle/v2/tests/CMakeLists.txt +++ b/python/paddle/v2/tests/CMakeLists.txt @@ -10,7 +10,6 @@ add_test(NAME test_v2_rnn_layer COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_rnn_layer.py) - add_test(NAME test_topology COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/v2/tests/test_topology.py From 73af1942c8fcf89fffa0a13b7d8fdc6cdcdcb815 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 15:54:46 +0800 Subject: [PATCH 11/24] add the implementation of rnn by yuyang --- .../paddle/trainer_config_helpers/layers.py | 2 +- python/paddle/v2/layer.py | 151 +++++++++++------- 2 files changed, 97 insertions(+), 56 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 250878cbe11595..dcb39784a54120 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -822,7 +822,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): return LayerOutput(name, LayerType.DATA, size=size) -@wrap_name_default("embedding") +@wrap_name_default("embedding_layer") @wrap_param_attr_default() @layer_support(ERROR_CLIPPING) def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None): diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 4f6c71664a8bee..71d0e54c0a6f85 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -76,6 +76,9 @@ class for each layer creation function in paddle.trainer_config_helpers.layers. wrap_bias_attr_default from paddle.trainer_config_helpers.default_decorators import wrap_name_default from paddle.trainer_config_helpers.layers import layer_support +from paddle.trainer.config_parser import \ + RecurrentLayerGroupWithoutOutLinksBegin, RecurrentLayerGroupSetOutLink, \ + RecurrentLayerGroupEnd, model_type import activation import data_type @@ -126,21 +129,28 @@ def to_proto(self, context): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) elif isinstance(self, MemoryV2): name = self.name + "#__memory__" if name not in context: context[name] = self.to_proto_impl(**kwargs) return context[name] - - if self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -231,6 +241,9 @@ def to_proto_impl(self, **kwargs): return conf_helps.memory(name=self.name, size=self.size, **args) + def context_name(self): + return self.name + "#memory" + class LayerOutputV2(Layer): """ @@ -249,60 +262,20 @@ def to_proto_impl(self): class StaticInputV2(Layer): - def __init__(self, **kwargs): - self.__parent_names__ = ['input'] - other_kwargs = dict() - parent_layers = dict() - for pname in self.__parent_names__: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] - for key in kwargs.keys(): - if key not in self.__parent_names__: - other_kwargs[key] = kwargs[key] - self.__kwargs__ = other_kwargs - super(StaticInputV2, self).__init__(parent_layers=parent_layers) - - def to_proto_impl(self, **kwargs): - args = dict() - for each in kwargs: - args[each] = kwargs[each] - for each in self.__kwargs__: - args[each] = self.__kwargs__[each] - return conf_helps.StaticInput(**args) - - -class RecurrentGroupV2(Layer): - def __init__(self, name, **kwargs): - self.__parent_names__ = ['input', 'boot_layer'] - other_kwargs = dict() - parent_layers = dict() - for pname in self.__parent_names__: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] - for key in kwargs.keys(): - if key not in self.__parent_names__: - other_kwargs[key] = kwargs[key] - self.__kwargs__ = other_kwargs - - super(RecurrentGroupV2, self).__init__( - name=name, parent_layers=parent_layers) + def __init__(self, input=None, **kwargs): + assert input is not None + self.__kwargs__ = kwargs + super(StaticInputV2, self).__init__( + name=input.name, parent_layers={'input': input}) - wrapper = wrap_name_default(name_prefix='recurrent_group') - __init__ = wrapper(__init__) + def context_name(self): + return self.name + "#static_input" def to_proto_impl(self, **kwargs): - def in_args_converter(*in_args): - if not isinstance(in_args, collections.Sequence): - in_args = [in_args] - return [LayerOutputV2(input) for input in in_args] - args = dict() - for each in kwargs: - args[each] = kwargs[each] - for each in self.__kwargs__: - args[each] = self.__kwargs__[each] - return conf_helps.recurrent_group( - name=self.name, in_args_converter=in_args_converter, **args) + args.update(kwargs) + args.update(self.__kwargs__) + return conf_helps.StaticInput(**args) class MixedLayerV2(Layer): @@ -377,11 +350,79 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) +class RecurrentLayerInput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + print self.__parents__, parent_layers + super(RecurrentLayerInput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".begin" + + def to_proto_impl(self, **kwargs): + model_type('recurrent_nn') + RecurrentLayerGroupWithoutOutLinksBegin( + name=self.__recurrent_name__, + in_links=map(lambda x: x.name, self.__parents__)) + return self + + +class RecurrentLayerOutput(Layer): + def __init__(self, recurrent_name, index, parent_layers): + assert len(parent_layers) == 1 + self.__parents__ = parent_layers.values()[0] + super(RecurrentLayerOutput, self).__init__( + name=self.__parents__[index].name, parent_layers=parent_layers) + self.__recurrent_name__ = recurrent_name + + def context_name(self): + return self.__recurrent_name__ + ".end" + + def to_proto_impl(self, **kwargs): + for l in self.__parents__: + RecurrentLayerGroupSetOutLink(l.name) + RecurrentLayerGroupEnd(name=self.__recurrent_name__) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': input}) + for i in xrange(len(input)) + ] + + actual_output = step(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv + + LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel -recurrent_group = RecurrentGroupV2 +recurrent_group = recurrent_group memory = MemoryV2 From fd3be087d603bc1ea399769b79c1b0f9e1758161 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 16:01:42 +0800 Subject: [PATCH 12/24] restore recurrent_group in v1 --- python/paddle/trainer_config_helpers/layers.py | 12 ++---------- python/paddle/v2/layer.py | 5 ----- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index dcb39784a54120..2b95c2ed0f3566 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3115,9 +3115,7 @@ def recurrent_group(step, reverse=False, name=None, targetInlink=None, - is_generating=False, - in_args_converter=None, - boot_layer=None): + is_generating=False): """ Recurrent layer group is an extremely flexible recurrent unit in PaddlePaddle. As long as the user defines the calculation done within a @@ -3260,13 +3258,7 @@ def map_in_links(x): assert (is_generating != has_LayerOutput) - if in_args_converter is None: - layer_outs = step(*in_args) - else: - # append boot_layer to the last of in_args - if boot_layer is not None: - in_args.append(boot_layer) - layer_outs = step(*in_args_converter(*in_args)).to_proto(dict()) + layer_outs = step(*in_args) if isinstance(layer_outs, LayerOutput): layer_outs = [layer_outs] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 71d0e54c0a6f85..f1ca0b46ebc42e 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -131,11 +131,6 @@ def to_proto(self, context): if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif isinstance(self, MemoryV2): - name = self.name + "#__memory__" - if name not in context: - context[name] = self.to_proto_impl(**kwargs) - return context[name] elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) return context[self.name] From 6b199367e0339119a699292ff488976bdb22554f Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 1 Mar 2017 16:27:55 +0800 Subject: [PATCH 13/24] handle memory layer --- python/paddle/v2/layer.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index f1ca0b46ebc42e..bdb0c29a47db00 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -133,7 +133,11 @@ def to_proto(self, context): return self.to_proto_impl(**kwargs) elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) - return context[self.name] + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() @@ -146,6 +150,9 @@ def context_name(self): """ return self.name + def use_context_name(self): + return False + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -239,6 +246,13 @@ def to_proto_impl(self, **kwargs): def context_name(self): return self.name + "#memory" + def use_context_name(self): + """ + memory layer will have the same name with some layer + :return: + """ + return True + class LayerOutputV2(Layer): """ From 5fc572c29459faf0fbc342e3582ec8b6ee6f02ac Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 1 Mar 2017 20:28:00 +0800 Subject: [PATCH 14/24] Complete Memory --- python/paddle/trainer/config_parser.py | 6 +- python/paddle/v2/layer.py | 99 ++++++++++++++++++------ python/paddle/v2/tests/test_rnn_layer.py | 27 ++++--- 3 files changed, 96 insertions(+), 36 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 487d4dfd5b1b81..da937152ee0ce7 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -3474,8 +3474,6 @@ def update_g_config(): for name in g_config.model_config.output_layer_names: assert name in g_layer_map, \ 'input name "%s" does not correspond to a layer name' % name - for hook in _parse_config_hooks: - hook() return g_config @@ -3487,8 +3485,8 @@ def parse_config(trainer_config, config_arg_str): passed to config script as a dictionary CONFIG_ARGS ''' init_config_environment() - # for hook in _parse_config_hooks: - # hook() + for hook in _parse_config_hooks: + hook() config_args = {} diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bdb0c29a47db00..bf5d653e8ae3e4 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -67,7 +67,7 @@ class for each layer creation function in paddle.trainer_config_helpers.layers. """ import collections - +import inspect import paddle.trainer_config_helpers as conf_helps from paddle.trainer_config_helpers.config_parser_utils import \ parse_network_config as __parse__ @@ -216,31 +216,83 @@ def to_proto_impl(self, **kwargs): return getattr(conf_helps, self.__method_name__)(name=self.name, **args) -class MemoryV2(Layer): - def __init__(self, name, size, **kwargs): - self.name = name - self.size = size +class WithExtraParent(Layer): + def extra_parent(self): + return self.__extra_parent__ - parent_names = ['boot_layer'] - parent_layers = dict() - other_kwargs = dict() - for pname in parent_names: - if kwargs.has_key(pname): - parent_layers[pname] = kwargs[pname] + def __init__(self, name=None, parent_layers=None): + self.__extra_parent__ = [] + super(WithExtraParent, self).__init__(name, parent_layers) - for key in kwargs.keys(): - if key not in parent_names: - other_kwargs[key] = kwargs[key] - super(MemoryV2, self).__init__(name=name, parent_layers=parent_layers) - self.__kwargs__ = other_kwargs + def append_extra_parent(self, parent): + self.__extra_parent__.append(parent) - def to_proto_impl(self, **kwargs): + def to_proto(self, context): + """ + function to set proto attribute + """ + kwargs = dict() + for p in self.__extra_parent__: + p.to_proto(context=context) + + for layer_name in self.__parent_layers__: + if not isinstance(self.__parent_layers__[layer_name], + collections.Sequence): + v1_layer = self.__parent_layers__[layer_name].to_proto( + context=context) + else: + v1_layer = map(lambda x: x.to_proto(context=context), + self.__parent_layers__[layer_name]) + kwargs[layer_name] = v1_layer + + if self.context_name() is None: + return self.to_proto_impl(context=context, **kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl( + context=context, **kwargs) + + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] + + +class MemoryV2(WithExtraParent): + def __init__(self, name, size, **kwargs): + self.name = name + self.size = size + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + self.__kwargs__ = kwargs + self.__boot_layer_name__ = None + if 'boot_layer' in kwargs: + begin_of_current_rnn = [] + # TODO(yuyang18): Fix inspect, it could be wrong when user invoke a + # function inside step. + st = inspect.stack() + for i in xrange(len(st)): + locs = inspect.stack()[i][0].f_locals + for val in locs.viewvalues(): + if isinstance(val, RecurrentLayerInput): + begin_of_current_rnn.append(val) + + if begin_of_current_rnn: + break + assert begin_of_current_rnn is not None + for extra in begin_of_current_rnn: + self.append_extra_parent(extra) + assert isinstance(extra, WithExtraParent) + extra.append_extra_parent(kwargs['boot_layer']) + self.__boot_layer_name__ = kwargs['boot_layer'].name + + def to_proto_impl(self, context, **kwargs): args = dict() for each in kwargs: args[each] = kwargs[each] for each in self.__kwargs__: args[each] = self.__kwargs__[each] + if self.__boot_layer_name__ is not None: + args['boot_layer'] = context[self.__boot_layer_name__] return conf_helps.memory(name=self.name, size=self.size, **args) def context_name(self): @@ -328,7 +380,7 @@ def __iadd__(self, other): self.__inputs__.append(other) return self else: - raise MixedLayerTypeV2.AddToSealedMixedLayerExceptionV2() + raise MixedLayerV2.AddToSealedMixedLayerExceptionV2() def __enter__(self): assert len(self.__inputs__) == 0 @@ -359,11 +411,10 @@ def mixed(size=0, return MixedLayerV2(size, input, name, act, bias_attr, layer_attr) -class RecurrentLayerInput(Layer): +class RecurrentLayerInput(WithExtraParent): def __init__(self, recurrent_name, index, parent_layers): assert len(parent_layers) == 1 self.__parents__ = parent_layers.values()[0] - print self.__parents__, parent_layers super(RecurrentLayerInput, self).__init__( name=self.__parents__[index].name, parent_layers=parent_layers) self.__recurrent_name__ = recurrent_name @@ -371,7 +422,7 @@ def __init__(self, recurrent_name, index, parent_layers): def context_name(self): return self.__recurrent_name__ + ".begin" - def to_proto_impl(self, **kwargs): + def to_proto_impl(self, context, **kwargs): model_type('recurrent_nn') RecurrentLayerGroupWithoutOutLinksBegin( name=self.__recurrent_name__, @@ -458,8 +509,10 @@ def __layer_name_mapping__(inname): def __layer_name_mapping_parent_names__(inname): all_args = getattr(conf_helps, inname).argspec.args return filter( - lambda x: x in ['input1', 'input2','label', 'input', 'a', 'b', 'expand_as', - 'weights', 'vectors', 'weight', 'score', 'left', 'right'], + lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', + 'expand_as', + 'weights', 'vectors', 'weight', 'score', 'left', + 'right'], all_args) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py index bf2c4db61aa502..48aeb42391576f 100644 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -106,9 +106,21 @@ def test(): return str(parse_network(test)) def parse_new_rnn(): + data = layer.data( + name="word", type=data_type.dense_vector(dict_dim)) + label = layer.data( + name="label", type=data_type.dense_vector(label_dim)) + emb = layer.embedding(input=data, size=word_dim) + + boot_layer = layer.data( + name="boot", type=data_type.dense_vector(10)) + + boot_layer = layer.fc(name='wtf', input=boot_layer, size=10) + def step(y, wid): z = layer.embedding(input=wid, size=word_dim) - mem = layer.memory(name="rnn_state", size=hidden_dim) + mem = layer.memory( + name="rnn_state", size=hidden_dim, boot_layer=boot_layer) out = layer.fc(input=[y, z, mem], size=hidden_dim, act=activation.Tanh(), @@ -116,11 +128,6 @@ def step(y, wid): name="rnn_state") return out - data = layer.data( - name="word", type=data_type.dense_vector(dict_dim)) - label = layer.data( - name="label", type=data_type.dense_vector(label_dim)) - emb = layer.embedding(input=data, size=word_dim) out = layer.recurrent_group( name="rnn", step=step, input=[emb, data]) @@ -134,9 +141,11 @@ def step(y, wid): return str(layer.parse_network(cost)) - diff = difflib.unified_diff(parse_old_rnn().splitlines(1), - parse_new_rnn().splitlines(1)) - print ''.join(diff) + with open("/Users/baidu/old.out", 'w') as f: + print >> f, parse_old_rnn() + with open("/Users/baidu/new.out", "w") as f: + print >> f, parse_new_rnn() + # print ''.join(diff) if __name__ == '__main__': From 06cbd81eecf40c6b90b72da01126c176e4fc0ebf Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 2 Mar 2017 02:23:02 +0800 Subject: [PATCH 15/24] CONLL05 dataset for SRL --- python/paddle/v2/dataset/conll05.py | 188 ++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100644 python/paddle/v2/dataset/conll05.py diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py new file mode 100644 index 00000000000000..e9029063710f20 --- /dev/null +++ b/python/paddle/v2/dataset/conll05.py @@ -0,0 +1,188 @@ +import paddle.v2.dataset.common +import tarfile +import gzip +import itertools + +__all__ = ['test, get_dict', 'get_embedding'] +""" +Conll 2005 dataset. Paddle semantic role labeling Book and demo use this +dataset as an example. Because Conll 2005 is not free in public, the default +downloaded URL is test set of Conll 2005 (which is public). Users can change +URL and MD5 to their Conll dataset. +""" + +DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' +DATA_MD5 = '387719152ae52d60422c016e92a742fc' +WORDDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/wordDict.txt' +WORDDICT_MD5 = 'ea7fb7d4c75cc6254716f0177a506baa' +VERBDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/verbDict.txt' +VERBDICT_MD5 = '0d2977293bbb6cbefab5b0f97db1e77c' +TRGDICT_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/targetDict.txt' +TRGDICT_MD5 = 'd8c7f03ceb5fc2e5a0fa7503a4353751' +EMB_URL = 'http://paddlepaddle.bj.bcebos.com/demo/srl_dict_and_embedding/emb' +EMB_MD5 = 'bf436eb0faa1f6f9103017f8be57cdb7' + +UNK_IDX = 0 + + +def load_dict(filename): + d = dict() + with open(filename, 'r') as f: + for i, line in enumerate(f): + d[line.strip()] = i + return d + + +def corpus_reader(data_path, words_name, props_name): + """ + Read one corpus by corpus name. It returns an iterator. Each element of + this iterator is a tuple including sentence and labels. The sentence is + consist of a list of word IDs. The labels include a list of label IDs. + :param name: corpus name. + :type name: basestring + :return: a iterator of data. + :rtype: iterator + """ + + def reader(): + tf = tarfile.open(data_path) + wf = tf.extractfile(words_name) + pf = tf.extractfile(props_name) + with gzip.GzipFile(fileobj=wf) as words_file, gzip.GzipFile( + fileobj=pf) as props_file: + sentences = [] + labels = [] + one_seg = [] + for word, label in itertools.izip(words_file, props_file): + word = word.strip() + label = label.strip().split() + + if len(label) == 0: # end of sentence + for i in xrange(len(one_seg[0])): + a_kind_lable = [x[i] for x in one_seg] + labels.append(a_kind_lable) + + if len(labels) >= 1: + verb_list = [] + for x in labels[0]: + if x != '-': + verb_list.append(x) + + for i, lbl in enumerate(labels[1:]): + cur_tag = 'O' + is_in_bracket = False + lbl_seq = [] + verb_word = '' + for l in lbl: + if l == '*' and is_in_bracket == False: + lbl_seq.append('O') + elif l == '*' and is_in_bracket == True: + lbl_seq.append('I-' + cur_tag) + elif l == '*)': + lbl_seq.append('I-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') != -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = False + elif l.find('(') != -1 and l.find(')') == -1: + cur_tag = l[1:l.find('*')] + lbl_seq.append('B-' + cur_tag) + is_in_bracket = True + else: + print 'error:', l + + yield sentences, verb_list[i], lbl_seq + + sentences = [] + labels = [] + one_seg = [] + else: + sentences.append(word) + one_seg.append(label) + + return reader + + +def reader_creator(corpus_reader, + word_dict=None, + predicate_dict=None, + label_dict=None): + def reader(): + for sentence, predicate, labels in corpus_reader(): + + sen_len = len(sentence) + + verb_index = labels.index('B-V') + mark = [0] * len(labels) + if verb_index > 0: + mark[verb_index - 1] = 1 + ctx_n1 = sentence[verb_index - 1] + else: + ctx_n1 = 'bos' + + if verb_index > 1: + mark[verb_index - 2] = 1 + ctx_n2 = sentence[verb_index - 2] + else: + ctx_n2 = 'bos' + + mark[verb_index] = 1 + ctx_0 = sentence[verb_index] + + if verb_index < len(labels) - 1: + mark[verb_index + 1] = 1 + ctx_p1 = sentence[verb_index + 1] + else: + ctx_p1 = 'eos' + + if verb_index < len(labels) - 2: + mark[verb_index + 2] = 1 + ctx_p2 = sentence[verb_index + 2] + else: + ctx_p2 = 'eos' + + word_idx = [word_dict.get(w, UNK_IDX) for w in sentence] + pred_idx = [predicate_dict.get(predicate)] * sen_len + + ctx_n2_idx = [word_dict.get(ctx_n2, UNK_IDX)] * sen_len + ctx_n1_idx = [word_dict.get(ctx_n1, UNK_IDX)] * sen_len + ctx_0_idx = [word_dict.get(ctx_0, UNK_IDX)] * sen_len + ctx_p1_idx = [word_dict.get(ctx_p1, UNK_IDX)] * sen_len + ctx_p2_idx = [word_dict.get(ctx_p2, UNK_IDX)] * sen_len + + label_idx = [label_dict.get(w) for w in labels] + + yield word_idx, pred_idx, ctx_n2_idx, ctx_n1_idx, \ + ctx_0_idx, ctx_p1_idx, ctx_p2_idx, mark, label_idx + + return reader() + + +def get_dict(): + word_dict = load_dict( + common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) + verb_dict = load_dict( + common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) + label_dict = load_dict( + common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + return word_dict, verb_dict, label_dict + + +def get_embedding(): + return common.download(EMB_URL, 'conll05st', EMB_MD5) + + +def test(): + word_dict, verb_dict, label_dict = get_dict() + reader = corpus_reader( + common.download(DATA_URL, 'conll05st', DATA_MD5), + words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', + props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') + return reader_creator(reader, word_dict, verb_dict, label_dict) + + +if __name__ == '__main__': + print get_embedding() + for f in test(): + print f From 0dd53294caaefdee1d7809ef08cd64db3ba8561d Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 2 Mar 2017 02:32:51 +0800 Subject: [PATCH 16/24] add copyright --- python/paddle/v2/dataset/__init__.py | 14 ++++++++++++++ python/paddle/v2/dataset/cifar.py | 14 ++++++++++++++ python/paddle/v2/dataset/common.py | 14 ++++++++++++++ python/paddle/v2/dataset/conll05.py | 14 ++++++++++++++ python/paddle/v2/dataset/imdb.py | 4 +--- python/paddle/v2/dataset/imikolov.py | 13 +++++++++++++ python/paddle/v2/dataset/mnist.py | 13 +++++++++++++ python/paddle/v2/dataset/movielens.py | 14 ++++++++++++++ python/paddle/v2/dataset/tests/cifar_test.py | 14 ++++++++++++++ python/paddle/v2/dataset/tests/common_test.py | 14 ++++++++++++++ python/paddle/v2/dataset/tests/imdb_test.py | 14 ++++++++++++++ python/paddle/v2/dataset/tests/mnist_test.py | 14 ++++++++++++++ 12 files changed, 153 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 9647e9850332b7..15460b820d86d2 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import mnist __all__ = ['mnist'] diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 77c54bd268b5d9..5c6f5d85567fa1 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -1,6 +1,20 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ CIFAR dataset: https://www.cs.toronto.edu/~kriz/cifar.html """ + import cPickle import itertools import numpy diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index fcf4437ffaf329..397c9e66d49543 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import requests import hashlib import os diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index e9029063710f20..7c43c7c6340c20 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.common import tarfile import gzip diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 433e37380f840f..ffd7d89049358e 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -1,6 +1,3 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,6 +14,7 @@ """ IMDB dataset: http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz """ + import paddle.v2.dataset.common import tarfile import Queue diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index b3791ddad66e58..285d3eaca8317c 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/ """ diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 1512a3c3189de4..7cecb341647d1b 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -1,3 +1,16 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ MNIST dataset. """ diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index dcffcff2f58c63..c22bcfa38b5f50 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import zipfile from common import download import re diff --git a/python/paddle/v2/dataset/tests/cifar_test.py b/python/paddle/v2/dataset/tests/cifar_test.py index a2af45ecf50846..e0e18229da7818 100644 --- a/python/paddle/v2/dataset/tests/cifar_test.py +++ b/python/paddle/v2/dataset/tests/cifar_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.cifar import unittest diff --git a/python/paddle/v2/dataset/tests/common_test.py b/python/paddle/v2/dataset/tests/common_test.py index 7d8406171b8478..5babcef0eb4345 100644 --- a/python/paddle/v2/dataset/tests/common_test.py +++ b/python/paddle/v2/dataset/tests/common_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.common import unittest import tempfile diff --git a/python/paddle/v2/dataset/tests/imdb_test.py b/python/paddle/v2/dataset/tests/imdb_test.py index e887af16634d2d..c4d82f26895d77 100644 --- a/python/paddle/v2/dataset/tests/imdb_test.py +++ b/python/paddle/v2/dataset/tests/imdb_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.imdb import unittest import re diff --git a/python/paddle/v2/dataset/tests/mnist_test.py b/python/paddle/v2/dataset/tests/mnist_test.py index b4408cc2f590d4..1d344cac3e7483 100644 --- a/python/paddle/v2/dataset/tests/mnist_test.py +++ b/python/paddle/v2/dataset/tests/mnist_test.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import paddle.v2.dataset.mnist import unittest From 35ec5f0f1a5b497c0e927c98df882a1e9ab40d16 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 09:51:46 +0800 Subject: [PATCH 17/24] Support StaticInput --- python/paddle/v2/__init__.py | 3 +- python/paddle/v2/layer.py | 53 +++++++++++++----------- python/paddle/v2/networks.py | 19 +++++++++ python/paddle/v2/tests/test_rnn_layer.py | 41 +++++++++--------- 4 files changed, 72 insertions(+), 44 deletions(-) create mode 100644 python/paddle/v2/networks.py diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index b31efe170dbf11..4dbcd3bb6b8b4c 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -20,6 +20,7 @@ import data_type import topology import data_feeder +import networks from . import dataset from . import reader import attr @@ -29,7 +30,7 @@ __all__ = [ 'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer', 'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader', - 'topology' + 'topology', 'networks' ] diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bf5d653e8ae3e4..82ccd8498a3dbb 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -109,9 +109,10 @@ def __real_func__(): class Layer(object): - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.size = size self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -173,7 +174,8 @@ def __init__(self, **kwargs): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - super(V2LayerImpl, self).__init__(name, parent_layers) + size = kwargs.get('size', None) + super(V2LayerImpl, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: @@ -220,9 +222,10 @@ class WithExtraParent(Layer): def extra_parent(self): return self.__extra_parent__ - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): self.__extra_parent__ = [] - super(WithExtraParent, self).__init__(name, parent_layers) + super(WithExtraParent, self).__init__( + name=name, size=size, parent_layers=parent_layers) def append_extra_parent(self, parent): self.__extra_parent__.append(parent) @@ -261,7 +264,8 @@ class MemoryV2(WithExtraParent): def __init__(self, name, size, **kwargs): self.name = name self.size = size - super(MemoryV2, self).__init__(name=name, parent_layers=dict()) + super(MemoryV2, self).__init__( + name=name, size=size, parent_layers=dict()) self.__kwargs__ = kwargs self.__boot_layer_name__ = None if 'boot_layer' in kwargs: @@ -271,7 +275,9 @@ def __init__(self, name, size, **kwargs): st = inspect.stack() for i in xrange(len(st)): locs = inspect.stack()[i][0].f_locals - for val in locs.viewvalues(): + keys = locs.keys() + for key in keys: + val = locs[key] if isinstance(val, RecurrentLayerInput): begin_of_current_rnn.append(val) @@ -322,21 +328,15 @@ def to_proto_impl(self): return self.layer_output -class StaticInputV2(Layer): - def __init__(self, input=None, **kwargs): - assert input is not None - self.__kwargs__ = kwargs - super(StaticInputV2, self).__init__( - name=input.name, parent_layers={'input': input}) - - def context_name(self): - return self.name + "#static_input" - - def to_proto_impl(self, **kwargs): - args = dict() - args.update(kwargs) - args.update(self.__kwargs__) - return conf_helps.StaticInput(**args) +class StaticInputV2(object): + def __init__(self, input, is_seq=False, size=None): + assert isinstance(input, LayerV2) + self.name = input.name + self.input = input + self.is_seq = is_seq + self.size = size + # TODO(qiaolongfei): add size + # assert input.size is not None or size is not None class MixedLayerV2(Layer): @@ -370,9 +370,8 @@ def __init__(self, other_kwargs['act'] = act other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr - parent_layers = {"input": self.__inputs__} - super(MixedLayerV2, self).__init__(name, parent_layers) + super(MixedLayerV2, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): @@ -452,6 +451,12 @@ def recurrent_group(step, input, name=None): if not isinstance(input, collections.Sequence): input = [input] + # TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group + for i in xrange(len(input)): + cur_input = input[i] + if isinstance(cur_input, StaticInputV2): + input[i] = cur_input.input + actual_input = [ RecurrentLayerInput( recurrent_name=name, @@ -512,7 +517,7 @@ def __layer_name_mapping_parent_names__(inname): lambda x: x in ['input1', 'input2', 'label', 'input', 'a', 'b', 'expand_as', 'weights', 'vectors', 'weight', 'score', 'left', - 'right'], + 'right', 'output_mem'], all_args) diff --git a/python/paddle/v2/networks.py b/python/paddle/v2/networks.py new file mode 100644 index 00000000000000..2877b56b18dc9f --- /dev/null +++ b/python/paddle/v2/networks.py @@ -0,0 +1,19 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from layer import __convert_to_v2__ + +simple_gru = __convert_to_v2__('simple_gru', ['input']) +simple_attention = __convert_to_v2__( + 'simple_attention', ['encoded_sequence', 'encoded_proj', 'decoder_state']) diff --git a/python/paddle/v2/tests/test_rnn_layer.py b/python/paddle/v2/tests/test_rnn_layer.py index 48aeb42391576f..5fbbd20eb76bb9 100644 --- a/python/paddle/v2/tests/test_rnn_layer.py +++ b/python/paddle/v2/tests/test_rnn_layer.py @@ -74,21 +74,28 @@ def test_sequence_rnn_multi_input(self): label_dim = 3 def parse_old_rnn(): - def step(y, wid): - z = conf_helps.embedding_layer(input=wid, size=word_dim) - mem = conf_helps.memory(name="rnn_state", size=hidden_dim) - out = conf_helps.fc_layer( - input=[y, z, mem], - size=hidden_dim, - act=conf_helps.TanhActivation(), - bias_attr=True, - name="rnn_state") - return out - def test(): data = conf_helps.data_layer(name="word", size=dict_dim) label = conf_helps.data_layer(name="label", size=label_dim) emb = conf_helps.embedding_layer(input=data, size=word_dim) + boot_layer = conf_helps.data_layer(name="boot", size=10) + boot_layer = conf_helps.fc_layer( + name='boot_fc', input=boot_layer, size=10) + + def step(y, wid): + z = conf_helps.embedding_layer(input=wid, size=word_dim) + mem = conf_helps.memory( + name="rnn_state", + size=hidden_dim, + boot_layer=boot_layer) + out = conf_helps.fc_layer( + input=[y, z, mem], + size=hidden_dim, + act=conf_helps.TanhActivation(), + bias_attr=True, + name="rnn_state") + return out + out = conf_helps.recurrent_group( name="rnn", step=step, input=[emb, data]) @@ -111,11 +118,9 @@ def parse_new_rnn(): label = layer.data( name="label", type=data_type.dense_vector(label_dim)) emb = layer.embedding(input=data, size=word_dim) - boot_layer = layer.data( name="boot", type=data_type.dense_vector(10)) - - boot_layer = layer.fc(name='wtf', input=boot_layer, size=10) + boot_layer = layer.fc(name='boot_fc', input=boot_layer, size=10) def step(y, wid): z = layer.embedding(input=wid, size=word_dim) @@ -141,11 +146,9 @@ def step(y, wid): return str(layer.parse_network(cost)) - with open("/Users/baidu/old.out", 'w') as f: - print >> f, parse_old_rnn() - with open("/Users/baidu/new.out", "w") as f: - print >> f, parse_new_rnn() - # print ''.join(diff) + diff = difflib.unified_diff(parse_old_rnn().splitlines(1), + parse_new_rnn().splitlines(1)) + print ''.join(diff) if __name__ == '__main__': From b400c8f02c76ce74828cc999d6bef335cca18a57 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 11:47:33 +0800 Subject: [PATCH 18/24] update to latest --- python/paddle/v2/config_base.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 035f96b0f2e978..be3e39a06ef9d3 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -19,9 +19,10 @@ class Layer(object): - def __init__(self, name=None, parent_layers=None): + def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name + self.size = size self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -39,16 +40,30 @@ def to_proto(self, context): self.__parent_layers__[layer_name]) kwargs[layer_name] = v1_layer - if self.name is None: + if self.context_name() is None: return self.to_proto_impl(**kwargs) - elif self.name not in context: - context[self.name] = self.to_proto_impl(**kwargs) + elif self.context_name() not in context: + context[self.context_name()] = self.to_proto_impl(**kwargs) - return context[self.name] + if self.use_context_name(): + return context[self.context_name()] + else: + return context[self.name] def to_proto_impl(self, **kwargs): raise NotImplementedError() + def context_name(self): + """ + Context name means the context which stores `to_proto_impl` result. + If multiple layer share same context_name, the `to_proto_impl` of them + will be invoked only once. + """ + return self.name + + def use_context_name(self): + return False + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: @@ -69,7 +84,8 @@ def __init__(self, **kwargs): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - super(V2LayerImpl, self).__init__(name, parent_layers) + size = kwargs.get('size', None) + super(V2LayerImpl, self).__init__(name, size, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: From ba1c978514c05500a858644a09033e2afc5e1f7a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 2 Mar 2017 13:41:41 +0800 Subject: [PATCH 19/24] close file and compare the result with the old way. --- python/paddle/v2/dataset/conll05.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 7c43c7c6340c20..b6a4c252d12d7a 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.dataset.common +#import paddle.v2.dataset.common +import common import tarfile import gzip import itertools @@ -49,11 +50,9 @@ def load_dict(filename): def corpus_reader(data_path, words_name, props_name): """ - Read one corpus by corpus name. It returns an iterator. Each element of + Read one corpus. It returns an iterator. Each element of this iterator is a tuple including sentence and labels. The sentence is consist of a list of word IDs. The labels include a list of label IDs. - :param name: corpus name. - :type name: basestring :return: a iterator of data. :rtype: iterator """ @@ -104,7 +103,8 @@ def reader(): lbl_seq.append('B-' + cur_tag) is_in_bracket = True else: - print 'error:', l + raise RuntimeError('Unexpected label: %s' % + l) yield sentences, verb_list[i], lbl_seq @@ -115,6 +115,10 @@ def reader(): sentences.append(word) one_seg.append(label) + pf.close() + wf.close() + tf.close() + return reader From 4dd2e40bd07c0ff1123d35328687f53682e67a62 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 2 Mar 2017 13:55:41 +0800 Subject: [PATCH 20/24] remove comments --- python/paddle/v2/dataset/conll05.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index b6a4c252d12d7a..7874161a059689 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -#import paddle.v2.dataset.common -import common +import paddle.v2.dataset.common import tarfile import gzip import itertools From c9bb48b308807f80b3ba238cafb97ba4b0eda983 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:09:26 +0800 Subject: [PATCH 21/24] support calculate size --- python/paddle/v2/config_base.py | 7 +- python/paddle/v2/layer.py | 110 ++++++++++++++++++++------------ 2 files changed, 75 insertions(+), 42 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index be3e39a06ef9d3..573539a30cc1d9 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -22,7 +22,7 @@ class Layer(object): def __init__(self, name=None, size=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name - self.size = size + self.__contex__ = {} self.__parent_layers__ = parent_layers def to_proto(self, context): @@ -44,7 +44,7 @@ def to_proto(self, context): return self.to_proto_impl(**kwargs) elif self.context_name() not in context: context[self.context_name()] = self.to_proto_impl(**kwargs) - + self.__contex__ = context if self.use_context_name(): return context[self.context_name()] else: @@ -64,6 +64,9 @@ def context_name(self): def use_context_name(self): return False + def calcalted_size(self): + return self.__contex__[self.context_name()].size + def __convert_to_v2__(method_name, parent_names, is_default_name=True): if is_default_name: diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index e24244a48c96dc..a97518ed52562f 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -197,6 +197,10 @@ def __init__(self, name, size, **kwargs): val = locs[key] if isinstance(val, RecurrentLayerInput): begin_of_current_rnn.append(val) + elif isinstance(val, collections.Sequence): + for v in val: + if isinstance(v, RecurrentLayerInput): + begin_of_current_rnn.append(v) if begin_of_current_rnn: break @@ -216,7 +220,13 @@ def to_proto_impl(self, context, **kwargs): if self.__boot_layer_name__ is not None: args['boot_layer'] = context[self.__boot_layer_name__] - return conf_helps.memory(name=self.name, size=self.size, **args) + + if callable(self.size): + real_size = self.size() + else: + real_size = self.size + args['size'] = real_size + return conf_helps.memory(name=self.name, **args) def context_name(self): return self.name + "#memory" @@ -311,6 +321,12 @@ def to_proto_impl(self, **kwargs): args[each] = kwargs[each] for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] + size = args.get('size', None) + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -363,53 +379,15 @@ def to_proto_impl(self, **kwargs): RecurrentLayerGroupEnd(name=self.__recurrent_name__) -@wrap_name_default() -def recurrent_group(step, input, name=None): - if not isinstance(input, collections.Sequence): - input = [input] - - # TODO(qiaolongfei) convert StaticInput to memory according to v2 recurrent_group - for i in xrange(len(input)): - cur_input = input[i] - if isinstance(cur_input, StaticInputV2): - input[i] = cur_input.input - - actual_input = [ - RecurrentLayerInput( - recurrent_name=name, - index=i, - parent_layers={'recurrent_inputs': input}) - for i in xrange(len(input)) - ] - - actual_output = step(*actual_input) - - if not isinstance(actual_output, collections.Sequence): - actual_output = [actual_output] - - retv = [ - RecurrentLayerOutput( - recurrent_name=name, - index=i, - parent_layers={'recurrent_outputs': actual_output}) - for i in xrange(len(actual_output)) - ] - if len(retv) == 1: - return retv[0] - else: - return retv - - LayerV2 = Layer data = DataLayerV2 AggregateLevel = conf_helps.layers.AggregateLevel ExpandLevel = conf_helps.layers.ExpandLevel -recurrent_group = recurrent_group memory = MemoryV2 def __layer_name_mapping__(inname): - if inname in ['data_layer', 'memory', 'mixed_layer']: + if inname in ['data_layer', 'memory', 'mixed_layer', 'recurrent_group']: # Do Not handle these layers return elif inname == 'maxid_layer': @@ -469,3 +447,55 @@ def __convert_layer__(_new_name_, _old_name_, _parent_names_): for op in operator_list: globals()[op[0]] = __convert_to_v2__( op[0], parent_names=op[1], is_default_name=False) + + +@wrap_name_default() +def recurrent_group(step, input, name=None): + if not isinstance(input, collections.Sequence): + input = [input] + + non_static_inputs = filter(lambda x: not isinstance(x, StaticInputV2), + input) + actual_input = [ + RecurrentLayerInput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_inputs': non_static_inputs}) + for i in xrange(len(non_static_inputs)) + ] + + def __real_step__(*args): + rnn_input = list(args) + static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) + for static_input in static_inputs: + mem_name = "__%s_memory__" % static_input.input.name + print memory + mem = memory( + name=mem_name, + is_seq=static_input.is_seq, + size=static_input.input.calcalted_size, + boot_layer=static_input.input) + with mixed( + name=mem_name, + size=static_input.input.calcalted_size, + act=activation.Identity()) as mix: + mix += identity_projection(input=mem) + rnn_input.insert(input.index(static_input), mix) + return step(*rnn_input) + + actual_output = __real_step__(*actual_input) + + if not isinstance(actual_output, collections.Sequence): + actual_output = [actual_output] + + retv = [ + RecurrentLayerOutput( + recurrent_name=name, + index=i, + parent_layers={'recurrent_outputs': actual_output}) + for i in xrange(len(actual_output)) + ] + if len(retv) == 1: + return retv[0] + else: + return retv From f9e6aa2c31aa6bc5269cd66eaa8705b0b98af989 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:23:19 +0800 Subject: [PATCH 22/24] refine code --- python/paddle/v2/config_base.py | 12 ++++++---- python/paddle/v2/layer.py | 40 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/python/paddle/v2/config_base.py b/python/paddle/v2/config_base.py index 573539a30cc1d9..fa2ccec6c32705 100644 --- a/python/paddle/v2/config_base.py +++ b/python/paddle/v2/config_base.py @@ -19,7 +19,7 @@ class Layer(object): - def __init__(self, name=None, size=None, parent_layers=None): + def __init__(self, name=None, parent_layers=None): assert isinstance(parent_layers, dict) self.name = name self.__contex__ = {} @@ -64,7 +64,12 @@ def context_name(self): def use_context_name(self): return False - def calcalted_size(self): + def calculate_size(self): + """ + lazy calculate size of the layer, should be called when to_proto_impl of + this layer is called. + :return: + """ return self.__contex__[self.context_name()].size @@ -87,8 +92,7 @@ def __init__(self, **kwargs): other_kwargs[key] = kwargs[key] name = kwargs.get('name', None) - size = kwargs.get('size', None) - super(V2LayerImpl, self).__init__(name, size, parent_layers) + super(V2LayerImpl, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs if wrapper is not None: diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index a97518ed52562f..0d8b59cfd29c9c 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -139,10 +139,10 @@ class WithExtraParent(Layer): def extra_parent(self): return self.__extra_parent__ - def __init__(self, name=None, size=None, parent_layers=None): + def __init__(self, name=None, parent_layers=None): self.__extra_parent__ = [] super(WithExtraParent, self).__init__( - name=name, size=size, parent_layers=parent_layers) + name=name, parent_layers=parent_layers) def append_extra_parent(self, parent): self.__extra_parent__.append(parent) @@ -178,11 +178,9 @@ def to_proto(self, context): class MemoryV2(WithExtraParent): - def __init__(self, name, size, **kwargs): + def __init__(self, name, **kwargs): self.name = name - self.size = size - super(MemoryV2, self).__init__( - name=name, size=size, parent_layers=dict()) + super(MemoryV2, self).__init__(name=name, parent_layers=dict()) self.__kwargs__ = kwargs self.__boot_layer_name__ = None if 'boot_layer' in kwargs: @@ -221,11 +219,14 @@ def to_proto_impl(self, context, **kwargs): if self.__boot_layer_name__ is not None: args['boot_layer'] = context[self.__boot_layer_name__] - if callable(self.size): - real_size = self.size() - else: - real_size = self.size - args['size'] = real_size + size = args.get('size', None) + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + print(real_size) + args['size'] = real_size return conf_helps.memory(name=self.name, **args) def context_name(self): @@ -298,7 +299,7 @@ def __init__(self, other_kwargs['bias_attr'] = bias_attr other_kwargs['layer_attr'] = layer_attr parent_layers = {"input": self.__inputs__} - super(MixedLayerV2, self).__init__(name, size, parent_layers) + super(MixedLayerV2, self).__init__(name, parent_layers) self.__other_kwargs__ = other_kwargs def __iadd__(self, other): @@ -322,11 +323,12 @@ def to_proto_impl(self, **kwargs): for each in self.__other_kwargs__: args[each] = self.__other_kwargs__[each] size = args.get('size', None) - if callable(size): - real_size = size() - else: - real_size = size - args['size'] = real_size + if size is not None: + if callable(size): + real_size = size() + else: + real_size = size + args['size'] = real_size return getattr(conf_helps, self.__method_name__)(**args) @@ -473,11 +475,11 @@ def __real_step__(*args): mem = memory( name=mem_name, is_seq=static_input.is_seq, - size=static_input.input.calcalted_size, + size=static_input.input.calculate_size, boot_layer=static_input.input) with mixed( name=mem_name, - size=static_input.input.calcalted_size, + size=static_input.input.calculate_size, act=activation.Identity()) as mix: mix += identity_projection(input=mem) rnn_input.insert(input.index(static_input), mix) From bb66f24334eff70a045c75ef9ff5a22b77c27e81 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 15:32:01 +0800 Subject: [PATCH 23/24] remove debug code --- python/paddle/v2/layer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index 0d8b59cfd29c9c..2f55611aaa1d3a 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -225,7 +225,6 @@ def to_proto_impl(self, context, **kwargs): real_size = size() else: real_size = size - print(real_size) args['size'] = real_size return conf_helps.memory(name=self.name, **args) @@ -471,7 +470,6 @@ def __real_step__(*args): static_inputs = filter(lambda x: isinstance(x, StaticInputV2), input) for static_input in static_inputs: mem_name = "__%s_memory__" % static_input.input.name - print memory mem = memory( name=mem_name, is_seq=static_input.is_seq, From edce6c8b6ab23c9c7fea1dee75d46fb2bb0f3e31 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 2 Mar 2017 17:51:53 +0800 Subject: [PATCH 24/24] restore embedding_layer name to embedding --- python/paddle/trainer_config_helpers/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2b95c2ed0f3566..b68460b6a3ab62 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -822,7 +822,7 @@ def data_layer(name, size, height=None, width=None, layer_attr=None): return LayerOutput(name, LayerType.DATA, size=size) -@wrap_name_default("embedding_layer") +@wrap_name_default("embedding") @wrap_param_attr_default() @layer_support(ERROR_CLIPPING) def embedding_layer(input, size, name=None, param_attr=None, layer_attr=None):