add comments for networks.py

ISSUE=4611081 git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1475 1ad973e4-5ce8-4261-8a94-b56d1f490c56
yeqinglovecode · Sep 1, 2016 · af0bbfa · af0bbfa
1 parent 200dfa1
commit af0bbfa
Show file tree

Hide file tree

Showing 2 changed files with 243 additions and 67 deletions.
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -743,7 +743,8 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
                                 pooling_type=AvgPooling(),
                                 agg_level=AggregateLevel.EACH_SEQUENCE)
 
-    :param agg_level: AggregateLevel.EACH_TIMESTEP or AggregateLevel.EACH_SEQUENCE
+    :param agg_level: AggregateLevel.EACH_TIMESTEP or
+                      AggregateLevel.EACH_SEQUENCE
     :type agg_level: AggregateLevel
     :param name: layer name.
     :type name: basestring
@@ -806,21 +807,24 @@ def lstmemory(input, name=None, reverse=False, act=None,
         h_t & = o_t tanh(c_t)
 
 
-    NOTE: In paddle's implementation, the multiply operation
+    NOTE: In PaddlePaddle's implementation, the multiplications
     :math:`W_{xi}x_{t}` , :math:`W_{xf}x_{t}`,
-    :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` is not done by
-    lstmemory layer, so it must use a mixed_layer do this full_matrix_projection
-    before lstm is used.
+    :math:`W_{xc}x_t`, :math:`W_{xo}x_{t}` are not done in the lstmemory layer,
+    so an additional mixed_layer with full_matrix_projection or a fc_layer must
+    be included in the configuration file to complete the input-to-hidden
+    mappings before lstmemory is called.
 
-    NOTE: This is a low level user interface. You may use network.simple_lstm
+    NOTE: This is a low level user interface. You can use network.simple_lstm
     to config a simple plain lstm layer.
 
-    Please refer **Generating Sequences With Recurrent Neural Networks** if you
-    want to know what lstm is. Link_ is here.
+    Please refer to **Generating Sequences With Recurrent Neural Networks** for
+    more details about LSTM.
+
+    Link_ goes as below.
 
     .. _Link: http://arxiv.org/abs/1308.0850
 
-    TODO(yuyang18): Check lstm can input multiple values or not?
+    TODO(yuyang18): Check lstm can take multiple input values or not?
 
     :param name: The lstmemory layer name.
     :type name: basestring
@@ -894,28 +898,30 @@ def grumemory(input, name=None, reverse=False, act=None,
 
         r_t = \\sigma(W_{r}x_{t} + U_{r}h_{t-1} + b_r)
 
-    3. The candidate activation :math:`\\tilde{h_t}` is computed similarly to that
-    of the traditional recurrent unit:
+    3. The candidate activation :math:`\\tilde{h_t}` is computed similarly to
+    that of the traditional recurrent unit:
 
     ..  math::
 
         {\\tilde{h_t}} = tanh(W x_{t} + U (r_{t} \odot h_{t-1}) + b)
 
-    4. The hidden activation :math:`h_t` of the GRU at time t is a linear interpolation
-    between the previous activation :math:`h_{t-1}` and the candidate activation
-    :math:`\\tilde{h_t}`:
+    4. The hidden activation :math:`h_t` of the GRU at time t is a linear
+    interpolation between the previous activation :math:`h_{t-1}` and the
+    candidate activation :math:`\\tilde{h_t}`:
 
     ..  math::
 
         h_t = (1 - z_t) h_{t-1} + z_t {\\tilde{h_t}}
 
-    NOTE: In paddle's implementation, the multiply operation
+    NOTE: In PaddlePaddle's implementation, the multiplication operations
     :math:`W_{r}x_{t}`, :math:`W_{z}x_{t}` and :math:`W x_t` are not computed in
-    gate_recurrent layer. So it must use a mixed_layer with full_matrix_projection
-    or fc_layer to compute them before GRU.
+    gate_recurrent layer. Consequently, an additional mixed_layer with
+    full_matrix_projection or a fc_layer must be included before grumemory
+    is called.
 
-    The details can refer to `Empirical Evaluation of Gated Recurrent
-    Neural Networks on Sequence Modeling. <https://arxiv.org/abs/1412.3555>`_
+    More details can be found by referring to `Empirical Evaluation of Gated
+    Recurrent Neural Networks on Sequence Modeling.
+    <https://arxiv.org/abs/1412.3555>`_
 
     The simple usage is:
 
@@ -1279,7 +1285,8 @@ def cos_sim(a, b, scale=5, size=1, name=None, layer_attr=None):
 @wrap_name_default()
 @wrap_bias_attr_default(has_bias=True)
 @layer_support()
-def hsigmoid(input, label, num_classes, name=None, bias_attr=None, layer_attr=None):
+def hsigmoid(input, label, num_classes, name=None, bias_attr=None,
+             layer_attr=None):
     """
     Organize the classes into a binary tree. At each node, a sigmoid function
     is used to calculate the probability of belonging to the right branch.
@@ -1358,22 +1365,22 @@ def img_conv_layer(input, filter_size, num_filters,
     input is raw pixels of image(mono or RGB), or it may be the previous layer's
     num_filters * num_group.
 
-    There are several group of filter in paddle
-    implementation. Each group will process some channel of inputs. For example,
-    if input num_channel = 256, group = 4, num_filter=32, the paddle will create
+    There are several group of filter in PaddlePaddle implementation.
+    Each group will process some channel of the inputs. For example, if an input
+    num_channel = 256, group = 4, num_filter=32, the PaddlePaddle will create
     32*4 = 128 filters to process inputs. The channels will be split into 4
-    pieces. First 256/4 = 64 channels will process by first 32 filters. The rest
-    channels will be processed by rest group of filters.
+    pieces. First 256/4 = 64 channels will process by first 32 filters. The
+    rest channels will be processed by rest group of filters.
 
     :param name: Layer name.
     :type name: basestring
     :param input: Layer Input.
     :type input: LayerOutput
     :param filter_size: The x dimension of a filter kernel.
     :type filter_size: int
-    :param filter_size_y: The y dimension of a filter kernel. Since paddle now
-                        support rectangular filters, the filter's shape
-                        will be (filter_size, filter_size_y).
+    :param filter_size_y: The y dimension of a filter kernel. Since PaddlePaddle
+                        currently supports rectangular filters, the filter's
+                        shape will be (filter_size, filter_size_y).
     :type filter_size_y: int
     :param num_filters: Each filter group's number of filter
     :param act: Activation type. Default is tanh
@@ -1744,11 +1751,13 @@ def addto_layer(input, act=None, name=None, bias_attr=None,
     inputs. Each input of this layer should be the same size, which is also the
     output size of this layer.
 
-    There is no weight matrix for each input, because it just a simple add operation.
-    If you want to a complicated operation before add, please use mixed_layer.
+    There is no weight matrix for each input, because it just a simple add
+    operation. If you want a complicated operation before add, please use
+    mixed_layer.
 
     It is a very good way to set dropout outside the layers. Since not all
-    paddle layer support dropout, you can add an add_to layer, set dropout here.
+    PaddlePaddle layer support dropout, you can add an add_to layer, set
+    dropout here.
     Please refer to dropout_layer for details.
 
     :param name: Layer name.
@@ -2063,9 +2072,10 @@ def gru_step_layer(input, output_mem, size=None, act=None,
 @layer_support()
 def get_output_layer(input, arg_name, name=None, layer_attr=None):
     """
-    Get layer's output by name. In paddle, a layer might return multiple value,
-    but return one layer output. If user want to reference another output beside
-    default output, use get_output_layer first to get another output from input.
+    Get layer's output by name. In PaddlePaddle, a layer might return multiple
+    values, but returns one layer's output. If the user wants to use another
+    output besides the default one, please use get_output_layer first to get
+    the output from input.
 
     :param name: Layer's name.
     :type name: basestring
@@ -2155,7 +2165,11 @@ def __init__(self, input):
 @wrap_name_default("recurrent_group")
 def recurrent_group(step, input, reverse=False, name=None):
     """
-    Recurrent Group. It supports time steps and sequence steps mechanisms.
+    Recurrent layer group is an extremely flexible recurrent unit in
+    PaddlePaddle. As long as the user defines the calculation done within a
+    time step, PaddlePaddle will iterate such a recurrent calculation over
+    sequence input. This is extremely usefull for attention based model, or
+    Neural Turning Machine like models.
 
     The basic usage (time steps) is:
 
@@ -2603,9 +2617,9 @@ def conv_operator(input, filter_size, num_filters,
     :type input: LayerOutput|list|tuple
     :param filter_size: The x dimension of a filter kernel.
     :type filter_size: int
-    :param filter_size_y: The y dimension of a filter kernel. Since paddle now
-                        support rectangular filters, the filter's shape
-                        will be (filter_size, filter_size_y).
+    :param filter_size_y: The y dimension of a filter kernel. Since
+                        PaddlePaddle now supports rectangular filters,
+                        the filter's shape can be (filter_size, filter_size_y).
     :type filter_size_y: int
     :param num_filter: channel of output data.
     :type num_filter: int
@@ -3264,9 +3278,9 @@ def lambda_cost(input, score, NDCG_num=5, max_sort_size=-1, coeff=1.0):
                           If max_sort_size = -1, then for each list, the
                           algorithm will sort the entire list to get gradient.
                           In other cases, max_sort_size must be greater than or
-                          equal to NDCG_num. And if max_sort_size is greater than
-                          the size of a list, the algorithm will sort the entire
-                          list of get gradient.
+                          equal to NDCG_num. And if max_sort_size is greater
+                          than the size of a list, the algorithm will sort the
+                          entire list of get gradient.
     :type max_sort_size: int
     :param name: The name of this layers. It is not necessary.
     :type name: None|basestring