Skip to content

Commit

Permalink
fix bug in trainer_config_helpers
Browse files Browse the repository at this point in the history
ISSUE=4592807 

git-svn-id: https://svn.baidu.com/idl/trunk/paddle@1423 1ad973e4-5ce8-4261-8a94-b56d1f490c56
  • Loading branch information
luotao02 committed Aug 30, 2016
1 parent 9c0895e commit 260c734
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 108 deletions.
2 changes: 1 addition & 1 deletion doc/ui/api/trainer_config_helpers/activations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ SequenceSoftmaxActivation
=========================

.. automodule:: paddle.trainer_config_helpers.activations
:members: SequenceSoftmax
:members: SequenceSoftmaxActivation
:noindex:

ReluActivation
Expand Down
12 changes: 12 additions & 0 deletions doc/ui/api/trainer_config_helpers/layers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,18 @@ gru_step_layer
Recurrent Layer Group
=====================

recurrent_group
---------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: recurrent_group
:noindex:

beam_search
------------
.. automodule:: paddle.trainer_config_helpers.layers
:members: beam_search
:noindex:

get_output_layer
-----------------
.. automodule:: paddle.trainer_config_helpers.layers
Expand Down
36 changes: 27 additions & 9 deletions doc/ui/api/trainer_config_helpers/networks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,34 +43,52 @@ vgg_16_network
Recurrent
=========

LSTM
----

lstmemory_unit
--------------
``````````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: lstmemory_unit
:noindex:

lstmemory_group
---------------
```````````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: lstmemory_group
:noindex:

simple_lstm
```````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: simple_lstm
:noindex:

bidirectional_lstm
``````````````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: bidirectional_lstm
:noindex:

GRU
---

gru_unit
---------
````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: gru_unit
:noindex:

simple_lstm
-----------
gru_group
`````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: simple_lstm
:members: gru_group
:noindex:

bidirectional_lstm
------------------
simple_gru
``````````
.. automodule:: paddle.trainer_config_helpers.networks
:members: bidirectional_lstm
:members: simple_gru
:noindex:

simple_attention
Expand Down
4 changes: 2 additions & 2 deletions doc/ui/api/trainer_config_helpers/optimizers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ AdamOptimizer
:members: AdamOptimizer
:noindex:

AdamxOptimizer
AdamaxOptimizer
================
.. automodule:: paddle.trainer_config_helpers.optimizers
:members: AdamxOptimizer
:members: AdamaxOptimizer
:noindex:

AdaGradOptimizer
Expand Down
3 changes: 1 addition & 2 deletions paddle/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ ld-linux-x86-64.so.2
x86_64-scm-linux-gnu/
.lint.*.md5

examples/crf/*.bin

.idea/
.test_env
Paddle_wrap.cxx
Paddle_wrap.h
paddle.py
Expand Down
23 changes: 12 additions & 11 deletions paddle/gserver/layers/LstmLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,13 @@ class LstmLayer : public Layer, public LstmCompute {
* @param starts Each start position of each samples.
* @param inputValue The input values.
*/
void forwardSequence(int batchSize, size_t numSequences,
const int *starts, MatrixPtr inputValue);
void forwardSequence(int batchSize, size_t numSequences, const int *starts,
MatrixPtr inputValue);
/**
* Compute lstm backward one sequence by one sequence.
*/
void backwardSequence(int batchSize, size_t numSequences,
const int *starts, MatrixPtr inputGrad);
void backwardSequence(int batchSize, size_t numSequences, const int *starts,
MatrixPtr inputGrad);

/**
* Compute lstm forward one batch by one batch. The batch value is
Expand All @@ -121,21 +121,21 @@ class LstmLayer : public Layer, public LstmCompute {
* }
* @endcode
*/
void forwardBatch(int batchSize, size_t numSequences,
const int *starts, MatrixPtr inputValue);
void forwardBatch(int batchSize, size_t numSequences, const int *starts,
MatrixPtr inputValue);
/**
* Compute lstm backward one batch by one batch.
*/
void backwardBatch(int batchSize, size_t numSequences,
const int *starts, MatrixPtr inputGrad);
void backwardBatch(int batchSize, size_t numSequences, const int *starts,
MatrixPtr inputGrad);

/**
* This function only supports GPU. It not need to reorganize input into
* batch value. It will launch one kernel to parallelly compute forward
* propagation in sequence level.
*/
void forwardSeqParallel(int batchSize, size_t numSequences,
const int *starts, MatrixPtr inputValue);
void forwardSeqParallel(int batchSize, size_t numSequences, const int *starts,
MatrixPtr inputValue);
/**
* Backward propagation corresponding to forwardSeqParallel.
*/
Expand All @@ -157,7 +157,8 @@ class LstmLayer : public Layer, public LstmCompute {
/// The weight ([size, 4*size]) contains \f$W_{hi}, W_{hf}, W_{hc}, W_{ho}\f$.
std::unique_ptr<Weight> weight_;
/// Learned bias parameter, shape: (1, 7 * size).
/// The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf}, W_{co}\f$.
/// The bias contains \f$b_i, b_f, b_c, b_o\f$ and \f$W_{ci}, W_{cf},
/// W_{co}\f$.
std::unique_ptr<Weight> bias_;
/// The reeal bias, point to \f$b_i, b_f, b_c, b_o\f$.
MatrixPtr localBias_;
Expand Down
52 changes: 26 additions & 26 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def fc_layer(input, size, act=None, name=None,
act=LinearActivation(),
bias_attr=False)
which is equal to:
which is equal to:
.. code-block:: python
Expand Down Expand Up @@ -795,15 +795,15 @@ def lstmemory(input, name=None, reverse=False, act=None,
.. math::
i_t = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
f_t = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
c_t = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
o_t = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
h_t = o_t tanh(c_t)
h_t & = o_t tanh(c_t)
NOTE: In paddle's implementation, the multiply operation
Expand Down Expand Up @@ -1294,15 +1294,15 @@ def hsigmoid(input, label, num_classes, name=None, bias_attr=None, layer_attr=No
label=data_layer,
num_classes=3)
:param name: layer name
:type name: basestring
:param input: Input layers. It could be a LayerOutput or list/tuple of
LayerOutput.
:type input: LayerOutput|list|tuple
:param label: Label layer.
:type label: LayerOutput
:param num_classes: number of classes.
:type num_classes: int
:param name: layer name
:type name: basestring
:param bias_attr: Bias attribute. None means default bias.
False means no bias.
:type bias_attr: ParameterAttribute|False
Expand Down Expand Up @@ -1943,18 +1943,18 @@ def lstm_step_layer(input, state, size, act=None,
.. math::
i_t = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
i_t & = \\sigma(W_{xi}x_{t} + W_{hi}h_{t-1} + W_{ci}c_{t-1} + b_i)
f_t = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
f_t & = \\sigma(W_{xf}x_{t} + W_{hf}h_{t-1} + W_{cf}c_{t-1} + b_f)
c_t = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
c_t & = f_tc_{t-1} + i_t tanh (W_{xc}x_t+W_{hc}h_{t-1} + b_c)
o_t = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
o_t & = \\sigma(W_{xo}x_{t} + W_{ho}h_{t-1} + W_{co}c_t + b_o)
h_t = o_t tanh(c_t)
h_t & = o_t tanh(c_t)
The input\_ of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use
The input of lstm step is :math:`Wx_t + Wh_{t-1}`, and user should use
:code:`mixed_layer` and :code:`full_matrix_projection` to calculate these
input vector.
Expand Down Expand Up @@ -2347,12 +2347,12 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
eos = eos_layer(input=layer, eos_id=id)
:param name: Layer name.
:type name: basestring
:param input: Input layer name.
:type input: LayerOutput
:param eos_id: end id of sequence
:type eos_id: int
:param name: Layer name.
:type name: basestring
:param layer_attr: extra layer attributes.
:type layer_attr: ExtraLayerAttribute.
:return: layer name.
Expand Down Expand Up @@ -2529,11 +2529,11 @@ def conv_operator(input, filter_size, num_filters,
:param num_filter: channel of output data.
:type num_filter: int
:param num_channel: channel of input data.
:rtype num_channel: int
:type num_channel: int
:param stride: The x dimension of the stride.
:rtype stride: int
:type stride: int
:param stride_y: The y dimension of the stride.
:rtype stride_y: int
:type stride_y: int
:param padding: The x dimension of padding.
:type padding: int
:param padding_y: The y dimension of padding.
Expand Down Expand Up @@ -2632,7 +2632,7 @@ def tensor_layer(input, size, act=None, name=None,
:param input: Input layer.
:type input: LayerOutput|list|tuple.
:param size: the layer dimension.
:rtype: int.
:type size: int.
:param act: Activation Type. Default is tanh.
:type act: BaseActivation
:param param_attr: The Parameter Attribute.
Expand Down Expand Up @@ -2840,7 +2840,7 @@ def convex_comb_layer(input, size, name=None):
"""
A layer for convex weighted average of vectors takes two inputs.
- Input: a vector containing the convex weights (batchSize x weightdim),
and a matrix in a vector form (batchSize x (weightdim*datadim)).
and a matrix in a vector form (batchSize x (weightdim * datadim)).
- Output: a vector (batchSize * datadim).
.. math::
Expand Down Expand Up @@ -2893,8 +2893,8 @@ def block_expand_layer(input,
name=None):
"""
Expand feature map to minibatch matrix.
- matrix width is: block_y * block_x * channel
- matirx height is: outputH * outputW
- matrix width is: block_y * block_x * channel
- matirx height is: outputH * outputW
.. math::
Expand Down Expand Up @@ -3100,11 +3100,11 @@ def rank_cost(left, right, lable, weight=None, name=None, coeff=1.0):
.. math::
C_{i,j} = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}})
C_{i,j} & = -\\tilde{P_{ij}} * o_{i,j} + log(1 + e^{o_{i,j}})
o_{i,j} = o_i - o_j
o_{i,j} & = o_i - o_j
\\tilde{P_{i,j}} = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\}
\\tilde{P_{i,j}} & = \\{0, 0.5, 1\\} \ or \ \\{0, 1\\}
In this formula:
- :math:`C_{i,j}` is the cross entropy cost.
Expand Down
Loading

0 comments on commit 260c734

Please sign in to comment.