fix bug in trainer_config_helpers

ISSUE=4592807 git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1423 1ad973e4-5ce8-4261-8a94-b56d1f490c56
woodstone121 · Aug 30, 2016 · 260c734 · 260c734
1 parent 9c0895e
commit 260c734
Show file tree

Hide file tree

Showing 9 changed files with 171 additions and 108 deletions.
diff --git a/doc/ui/api/trainer_config_helpers/activations.rst b/doc/ui/api/trainer_config_helpers/activations.rst
@@ -51,7 +51,7 @@ SequenceSoftmaxActivation
 =========================
 
 ..  automodule:: paddle.trainer_config_helpers.activations
-    :members: SequenceSoftmax
+    :members: SequenceSoftmaxActivation
     :noindex:
 
 ReluActivation

diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -136,6 +136,18 @@ gru_step_layer
 Recurrent Layer Group
 =====================
 
+recurrent_group
+---------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: recurrent_group
+    :noindex:
+
+beam_search
+------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: beam_search
+    :noindex:
+
 get_output_layer
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers

diff --git a/doc/ui/api/trainer_config_helpers/networks.rst b/doc/ui/api/trainer_config_helpers/networks.rst
@@ -43,34 +43,52 @@ vgg_16_network
 Recurrent
 =========
 
+LSTM
+----
+
 lstmemory_unit
---------------
+``````````````
 ..  automodule:: paddle.trainer_config_helpers.networks
     :members: lstmemory_unit
     :noindex:
 
 lstmemory_group
----------------
+```````````````
 ..  automodule:: paddle.trainer_config_helpers.networks
     :members: lstmemory_group
     :noindex:
 
+simple_lstm
+```````````
+..  automodule:: paddle.trainer_config_helpers.networks
+    :members: simple_lstm
+    :noindex:
+
+bidirectional_lstm
+``````````````````
+..  automodule:: paddle.trainer_config_helpers.networks
+    :members: bidirectional_lstm
+    :noindex:
+
+GRU
+---
+
 gru_unit
----------
+````````
 ..  automodule:: paddle.trainer_config_helpers.networks
     :members: gru_unit
     :noindex:
 
-simple_lstm
------------
+gru_group
+`````````
 ..  automodule:: paddle.trainer_config_helpers.networks
-    :members: simple_lstm
+    :members: gru_group
     :noindex:
 
-bidirectional_lstm
-------------------
+simple_gru
+``````````
 ..  automodule:: paddle.trainer_config_helpers.networks
-    :members: bidirectional_lstm
+    :members: simple_gru
     :noindex:
 
 simple_attention

diff --git a/doc/ui/api/trainer_config_helpers/optimizers.rst b/doc/ui/api/trainer_config_helpers/optimizers.rst
@@ -10,10 +10,10 @@ AdamOptimizer
     :members: AdamOptimizer
     :noindex:
 
-AdamxOptimizer
+AdamaxOptimizer
 ================
 ..  automodule:: paddle.trainer_config_helpers.optimizers
-    :members: AdamxOptimizer
+    :members: AdamaxOptimizer
     :noindex:
 
 AdaGradOptimizer

diff --git a/paddle/.gitignore b/paddle/.gitignore
@@ -28,9 +28,8 @@ ld-linux-x86-64.so.2
 x86_64-scm-linux-gnu/
 .lint.*.md5
 
-examples/crf/*.bin
-
 .idea/
+.test_env
 Paddle_wrap.cxx
 Paddle_wrap.h
 paddle.py

diff --git a/paddle/gserver/layers/LstmLayer.h b/paddle/gserver/layers/LstmLayer.h
@@ -97,13 +97,13 @@ class LstmLayer : public Layer, public LstmCompute {
    * @param starts Each start position of each samples.
    * @param inputValue The input values.
    */
-  void forwardSequence(int batchSize, size_t numSequences,
-                       const int *starts, MatrixPtr inputValue);
+  void forwardSequence(int batchSize, size_t numSequences, const int *starts,
+                       MatrixPtr inputValue);
   /**
    * Compute lstm backward one sequence by one sequence.
    */
-  void backwardSequence(int batchSize, size_t numSequences,
-                        const int *starts, MatrixPtr inputGrad);
+  void backwardSequence(int batchSize, size_t numSequences, const int *starts,
+                        MatrixPtr inputGrad);
 
   /**
    * Compute lstm forward one batch by one batch. The batch value is
@@ -121,21 +121,21 @@ class LstmLayer : public Layer, public LstmCompute {
    * }
    * @endcode
    */
-  void forwardBatch(int batchSize, size_t numSequences,
-                    const int *starts, MatrixPtr inputValue);
+  void forwardBatch(int batchSize, size_t numSequences, const int *starts,
+                    MatrixPtr inputValue);
   /**
    * Compute lstm backward one batch by one batch.
    */
-  void backwardBatch(int batchSize, size_t numSequences,
-                     const int *starts, MatrixPtr inputGrad);
+  void backwardBatch(int batchSize, size_t numSequences, const int *starts,
+                     MatrixPtr inputGrad);
 
   /**
    * This function only supports GPU. It not need to reorganize input into
    * batch value. It will launch one kernel to parallelly compute forward
    * propagation in sequence level.
    */
-  void forwardSeqParallel(int batchSize, size_t numSequences,
-                          const int *starts, MatrixPtr inputValue);
+  void forwardSeqParallel(int batchSize, size_t numSequences, const int *starts,
+                          MatrixPtr inputValue);
   /**
    * Backward propagation corresponding to forwardSeqParallel.
    */
@@ -157,7 +157,8 @@ class LstmLayer : public Layer, public LstmCompute {
   /// The weight ([size, 4*size]) contains \f$W_{hi}, W_{hf}, W_{hc}, W_{ho}\f$.
   std::unique_ptr<Weight> weight_;
   /// Learned bias parameter, shape: (1, 7 * size).
-  /// The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf}, W_{co}\f$.
+  /// The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf},
+  /// W_{co}\f$.
   std::unique_ptr<Weight> bias_;
   /// The reeal bias, point to \f$b_i, b_f, b_c, b_o\f$.
   MatrixPtr localBias_;

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -669,7 +669,7 @@ def fc_layer(input, size, act=None, name=None,
                      act=LinearActivation(),
                      bias_attr=False)
 
-   which is equal to:
+    which is equal to:
 
     .. code-block:: python
 
@@ -795,15 +795,15 @@ def lstmemory(input, name=None, reverse=False, act=None,
 
     ..  math::
 
-        i_t = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
+        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
 
-        f_t = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
+        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
 
-        c_t = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
+        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
 
-        o_t = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
+        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
 
-        h_t = o_t tanh(c_t)
+        h_t & = o_t tanh(c_t)
 
 
     NOTE: In paddle's implementation, the multiply operation
@@ -1294,15 +1294,15 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, layer_attr=No
                         label=data_layer,
                         num_classes=3)
 
-    :param name: layer name
-    :type name: basestring
     :param input: Input layers. It could be a LayerOutput or list/tuple of
                  LayerOutput.
     :type input: LayerOutput|list|tuple
     :param label: Label layer.
     :type label: LayerOutput
     :param num_classes: number of classes.
     :type num_classes: int
+    :param name: layer name
+    :type name: basestring
     :param bias_attr: Bias attribute. None means default bias.
                       False means no bias.
     :type bias_attr: ParameterAttribute|False
@@ -1943,18 +1943,18 @@ def lstm_step_layer(input, state, size, act=None,
 
     ..  math::
 
-        i_t = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
+        i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
 
-        f_t = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
+        f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
 
-        c_t = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
+        c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
 
-        o_t = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
+        o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
 
-        h_t = o_t tanh(c_t)
+        h_t & = o_t tanh(c_t)
 
 
-    The input\_ of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use
+    The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use
     :code:`mixed_layer` and :code:`full_matrix_projection` to calculate these
     input vector.
 
@@ -2347,12 +2347,12 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
 
        eos = eos_layer(input=layer, eos_id=id)
 
+    :param name: Layer name.
+    :type name: basestring
     :param input: Input layer name.
     :type input: LayerOutput
     :param eos_id: end id of sequence
     :type eos_id: int
-    :param name: Layer name.
-    :type name: basestring
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
     :return: layer name.
@@ -2529,11 +2529,11 @@ def conv_operator(input, filter_size, num_filters,
     :param num_filter: channel of output data.
     :type num_filter: int
     :param num_channel: channel of input data.
-    :rtype num_channel: int
+    :type num_channel: int
     :param stride: The x dimension of the stride.
-    :rtype stride: int
+    :type stride: int
     :param stride_y: The y dimension of the stride.
-    :rtype stride_y: int
+    :type stride_y: int
     :param padding: The x dimension of padding.
     :type padding: int
     :param padding_y: The y dimension of padding.
@@ -2632,7 +2632,7 @@ def tensor_layer(input, size, act=None, name=None,
     :param input: Input layer.
     :type input: LayerOutput|list|tuple.
     :param size: the layer dimension.
-    :rtype: int.
+    :type size: int.
     :param act: Activation Type. Default is tanh.
     :type act: BaseActivation
     :param param_attr: The Parameter Attribute.
@@ -2840,7 +2840,7 @@ def convex_comb_layer(input, size, name=None):
     """
     A layer for convex weighted average of vectors takes two inputs.
       - Input: a vector containing the convex weights (batchSize x weightdim),
-             and a matrix in a vector form (batchSize x (weightdim*datadim)).
+               and a matrix in a vector form (batchSize x (weightdim * datadim)).
       - Output: a vector (batchSize * datadim).
 
     .. math::
@@ -2893,8 +2893,8 @@ def block_expand_layer(input,
                        name=None):
     """
     Expand feature map to minibatch matrix.
-      - matrix width is: block_y * block_x * channel
-      - matirx height is: outputH * outputW
+       - matrix width is: block_y * block_x * channel
+       - matirx height is: outputH * outputW
 
     .. math::
 
@@ -3100,11 +3100,11 @@ def rank_cost(left, right, lable, weight=None, name=None, coeff=1.0):
 
     .. math::
 
-       C_{i,j} = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}})
+       C_{i,j} & = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}})
 
-       o_{i,j} =  o_i - o_j
+       o_{i,j} & =  o_i - o_j
 
-       \\tilde{P_{i,j}} = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\}
+       \\tilde{P_{i,j}} & = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\}
 
     In this formula:
       - :math:`C_{i,j}` is the cross entropy cost.