Skip to content

Commit

Permalink
Correctly handle memory in RecurrentGradientMachine for hirarchical RNN
Browse files Browse the repository at this point in the history
Change-Id: I8e0a8ea6fc2760652d9c76440a539c90860062d3
  • Loading branch information
xuwei06 authored and reyoung committed Sep 14, 2016
1 parent 699d5f2 commit 9a9de92
Show file tree
Hide file tree
Showing 9 changed files with 207 additions and 9 deletions.
11 changes: 9 additions & 2 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,6 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
dynamic_cast<GatherAgentLayer*>(outFrameLine.agentLayer.get());
gatherAgent->addRealLayer(outFrameLine.frames[i]);
}

// connect memory links
// Adopt info_[0].idIndex because seq which has_subseq=True
// doesn't support Memory with !hasSubseq bootlayer;
Expand All @@ -529,7 +528,7 @@ void RecurrentGradientMachine::forward(const std::vector<Argument>& inArgs,
NeuralNetwork::connect(
memoryFrameLine.agents[i],
i == 0 ? memoryFrameLine.bootLayer : memoryFrameLine.frames[i - 1],
idSize /*height of agent*/);
numSeqs_[i] /*height of agent*/);
}
}

Expand Down Expand Up @@ -622,6 +621,8 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// numSequences: # samples(sequences) in a batch
size_t numSequences = input.getNumSequences();
std::vector<int> allIds;

numSeqs_.clear();
Info* inlink_info = &info_[inlinks_id];
inlink_info->idIndex.clear();
inlink_info->idIndex.push_back(0); // first idIndex = 0
Expand All @@ -634,10 +635,12 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
// maxSequenceLength_: max number of sentences(subseq) in allsamples
for (int i = 0; i < maxSequenceLength_; ++i) {
sequenceStartPositions.push_back(0); // first element = 0
int numSeqs = 0;
for (size_t j = 0; j < numSubSequences; ++j) { // for each sentence
// seqLengthAndStart_[inlinks_id][j]:
// a 4-tuple including <subseqlen, subseqstart, seqid, subseqid>
if (std::get<3>(seqLengthAndStart_[inlinks_id][j]) == i) {
++numSeqs;
// subseqstart: the cpuSubSequenceStartPositions of this subseq
int subSeqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
int subSeqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
Expand All @@ -650,6 +653,7 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
}
inlink_info->idIndex.push_back(allIds.size());
inlink_info->seqStartPosIndex.push_back(sequenceStartPositions.size());
numSeqs_.push_back(numSeqs);
}
// inFrameLine create sequenceStartPositions one time
CHECK_EQ(sequenceStartPositions.size(),
Expand All @@ -659,16 +663,19 @@ void RecurrentGradientMachine::createInFrameInfo(int inlinks_id,
createSeqPos(sequenceStartPositions, &inlink_info->sequenceStartPositions);
} else { // for scatterAgentLayer
for (int i = 0; i < maxSequenceLength_; ++i) {
int numSeqs = 0;
for (size_t j = 0; j < numSequences; ++j) {
int seqLength = std::get<0>(seqLengthAndStart_[inlinks_id][j]);
if (i >= seqLength) {
break;
}
++numSeqs;
int seqStart = std::get<1>(seqLengthAndStart_[inlinks_id][j]);
allIds.push_back(reversed_ ? (seqStart + seqLength - 1 - i)
: (seqStart + i));
}
inlink_info->idIndex.push_back(allIds.size());
numSeqs_.push_back(numSeqs);
}
}

Expand Down
4 changes: 4 additions & 0 deletions paddle/gserver/gradientmachines/RecurrentGradientMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,10 @@ class RecurrentGradientMachine : public NeuralNetwork {
};
std::vector<Info> info_;

// numSeqs_[i] is the number sequences which is longer than i (for sequence
// data) or has more than i subsequences (for subsequence data)
std::vector<int> numSeqs_;

// each inlinks has a "std::vector<std::tuple<int, int, int, int>>" denotes
// its sequence info:
// if hasSubSeq, tuple of (subSeqLength, subSeqStart, seqIndex, subSeqIndex)
Expand Down
1 change: 1 addition & 0 deletions paddle/gserver/tests/Sequence/dummy.list
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dummy_file_no_use
35 changes: 35 additions & 0 deletions paddle/gserver/tests/rnn_data_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer.PyDataProvider2 import *

data = [
[[[1, 3, 2], [4, 5, 2]], 0],
[[[0, 2], [2, 5], [0, 1, 2]], 1],
]

@provider(input_types=[integer_value_sub_sequence(10),
integer_value(2)])
def process_subseq(settings, file_name):
for d in data:
yield d

@provider(input_types=[integer_value_sequence(10),
integer_value(2)])
def process_seq(settings, file_name):
for d in data:
seq = []
for subseq in d[0]:
seq += subseq
yield seq, d[1]
3 changes: 0 additions & 3 deletions paddle/gserver/tests/sequenceGen.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#!/usr/bin/env python
#coding=utf-8

# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down
75 changes: 75 additions & 0 deletions paddle/gserver/tests/sequence_nest_rnn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer_config_helpers import *

######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_subseq')


settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3

data = data_layer(name="word", size=dict_dim)

emb = embedding_layer(input=data, size=word_dim)

# This hierachical RNN is designed to be equivalent to the simple RNN in
# sequence_rnn.conf

def outer_step(x):
outer_mem = memory(name="outer_rnn_state", size=hidden_dim)
def inner_step(y):
inner_mem = memory(name="inner_rnn_state",
size=hidden_dim,
boot_layer=outer_mem)
return fc_layer(input=[y, inner_mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="inner_rnn_state")

inner_rnn_output = recurrent_group(
step=inner_step,
input=x)
last = last_seq(input=inner_rnn_output, name="outer_rnn_state")

# "return last" should also work. But currently RecurrentGradientMachine
# does not handle it correctly. Current implementation requires that
# all the out links are from sequences. However, it does not report error
# when the out links are not sequences.
return inner_rnn_output

out = recurrent_group(
step=outer_step,
input=SubsequenceInput(emb))

value_printer_evaluator(input=out)

rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)

outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
57 changes: 57 additions & 0 deletions paddle/gserver/tests/sequence_rnn.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.trainer_config_helpers import *

######################## data source ################################
define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list',
test_list=None,
module='rnn_data_provider',
obj='process_seq')


settings(batch_size=2, learning_rate=0.01)
######################## network configure ################################
dict_dim = 10
word_dim = 8
hidden_dim = 8
label_dim = 3

data = data_layer(name="word", size=dict_dim)

emb = embedding_layer(input=data, size=word_dim)

def step(y):
mem = memory(name="rnn_state", size=hidden_dim)
return fc_layer(input=[y, mem],
size=hidden_dim,
act=TanhActivation(),
bias_attr=True,
name="rnn_state")

out = recurrent_group(
step=step,
input=emb)

value_printer_evaluator(input=out)

rep = last_seq(input=out)
prob = fc_layer(size=label_dim,
input=rep,
act=SoftmaxActivation(),
bias_attr=True)

outputs(classification_cost(input=prob,
label=data_layer(name="label", size=label_dim)))
21 changes: 17 additions & 4 deletions paddle/gserver/tests/test_RecurrentGradientMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ limitations under the License. */
#include <paddle/trainer/TrainerInternal.h>
#include <paddle/gserver/gradientmachines/GradientMachine.h>

P_DECLARE_int32(seed);

using namespace paddle; // NOLINT
using namespace std; // NOLINT
class TrainerForTest : public paddle::Trainer {
Expand Down Expand Up @@ -68,7 +70,9 @@ void CalCost(const string& conf, const string& dir, real* cost,
CpuVector vecMomentum(dim);

// vecW needs to be assigned, otherwise the variable is an uncertain value.
vecW.zeroMem();

*ThreadLocalRand::getSeed() = FLAGS_seed;
vecW.randnorm(0, 0.1);

trainer.startTrain();
for (int i = 0; i < num_passes; ++i) {
Expand All @@ -88,15 +92,13 @@ void CalCost(const string& conf, const string& dir, real* cost,
rmDir(dir.c_str());
}

TEST(RecurrentGradientMachine, HasSubSequence) {
void test(const string& conf1, const string& conf2) {
int num_passes = 5;
real* cost1 = new real[num_passes];
const string conf1 = "gserver/tests/sequence_layer_group.conf";
const string dir1 = "gserver/tests/t1";
CalCost(conf1, dir1, cost1, num_passes);

real* cost2 = new real[num_passes];
const string conf2 = "gserver/tests/sequence_nest_layer_group.conf";
const string dir2 = "gserver/tests/t2";
CalCost(conf2, dir2, cost2, num_passes);

Expand All @@ -109,6 +111,17 @@ TEST(RecurrentGradientMachine, HasSubSequence) {
delete[] cost2;
}

TEST(RecurrentGradientMachine, HasSubSequence) {
test("gserver/tests/sequence_layer_group.conf",
"gserver/tests/sequence_nest_layer_group.conf");
}

TEST(RecurrentGradientMachine, rnn) {
test("gserver/tests/sequence_rnn.conf",
"gserver/tests/sequence_nest_rnn.conf");
}


int main(int argc, char** argv) {
if (paddle::version::isWithPyDataProvider()) {
if (!paddle::version::isWithGpu()) {
Expand Down
9 changes: 9 additions & 0 deletions paddle/parameter/Argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,15 @@ struct Argument {

/*
Get Sequence Length, startPositions and max Length according to input
1. For sequence data:
Each tuple is (seq_length, seq_start, seq_id, seq_id)
The tuples are sorted according to seq_length or subseq_length
*maxSequenceLength is the maximal sequence length
2. For subsequence data:
Each tuple is (subseq_length, subseq_start, seq_id, subseq_id)
The tuples are not sorted. They are in the original order.
*maxSequenceLenth is the maximal number of subsequences in each sequence.
*/
void getSeqLengthAndStart(
std::vector<std::tuple<int, int, int, int>>* seqLengthAndStart,
Expand Down

0 comments on commit 9a9de92

Please sign in to comment.