Skip to content

Commit

Permalink
remove some copyfrom in AgentLayer and ExpandLayer, fix warning in se…
Browse files Browse the repository at this point in the history
…q2seq config (PaddlePaddle#183)
  • Loading branch information
luotao1 authored and emailweixu committed Oct 14, 2016
1 parent cebdb66 commit 91df606
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 64 deletions.
8 changes: 4 additions & 4 deletions demo/seqToseq/seqToseq_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf,
encoded_vector = concat_layer(input=[src_forward, src_backward])

with mixed_layer(size=decoder_size) as encoded_proj:
encoded_proj += full_matrix_projection(encoded_vector)
encoded_proj += full_matrix_projection(input=encoded_vector)

backward_first = first_seq(input=src_backward)
with mixed_layer(size=decoder_size,
act=TanhActivation(), ) as decoder_boot:
decoder_boot += full_matrix_projection(backward_first)
decoder_boot += full_matrix_projection(input=backward_first)

def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = memory(name='gru_decoder',
Expand All @@ -113,8 +113,8 @@ def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_state=decoder_mem, )

with mixed_layer(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += full_matrix_projection(context)
decoder_inputs += full_matrix_projection(current_word)
decoder_inputs += full_matrix_projection(input=context)
decoder_inputs += full_matrix_projection(input=current_word)

gru_step = gru_step_layer(name='gru_decoder',
input=decoder_inputs,
Expand Down
2 changes: 1 addition & 1 deletion paddle/cuda/include/hl_sequence.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ extern void hl_context_projection_backward_weight(real* outputGrad,
*/
extern void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch);
Expand Down
2 changes: 1 addition & 1 deletion paddle/cuda/include/stub/hl_sequence_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ inline void hl_context_projection_backward_weight(real* outputGrad,

inline void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {}
Expand Down
4 changes: 2 additions & 2 deletions paddle/cuda/src/hl_cuda_sequence.cu
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ template<int blockDimX, int blockDimY, int gridDimX, bool seq2batch, bool isAdd>
__global__
void KeSequence2Batch(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount) {
int idx = threadIdx.x;
Expand Down Expand Up @@ -405,7 +405,7 @@ void KeSequence2Batch(real *batch,

void hl_sequence2batch_copy(real *batch,
real *sequence,
int *batchIndex,
const int *batchIndex,
int seqWidth,
int batchCount,
bool seq2batch) {
Expand Down
29 changes: 12 additions & 17 deletions paddle/gserver/layers/AgentLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


#include "AgentLayer.h"

#include "paddle/utils/Logging.h"
Expand Down Expand Up @@ -62,8 +61,8 @@ void SequenceAgentLayer::forward(PassType passType) {

// get Arguments from real layers
if (numSamples_ > 0 && numSamples_ < realNumSequences) {
int numRows = realOutput.sequenceStartPositions->
getData(false)[numSamples_];
int numRows =
realOutput.sequenceStartPositions->getData(false)[numSamples_];
CHECK(!realOutput.ids) << "Not supported";
output_.subArgFrom(realOutput, /* offset */ 0, numRows, getSize(), useGpu_,
/* trans */ false, /* seqFlag */ true,
Expand Down Expand Up @@ -141,8 +140,8 @@ void ScatterAgentLayer::forward(PassType passType) {

int width = this->getSize();
if (realOutArg_.value || realOutArg_.ids) {
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_);
} else { // used in generation
if (realLayer_->getOutput().ids) {
IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_);
Expand Down Expand Up @@ -224,8 +223,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {

if (realOutArg_.value || realOutArg_.ids) {
CHECK(realOutArg_.sequenceStartPositions);
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_,
width, useGpu_, /* trans */ false, /* seqFlag */ true,
output_.subArgFrom(realOutArg_, /* offset */ idIndex_, idSize_, width,
useGpu_, /* trans */ false, /* seqFlag */ true,
/* seqStart */ seqStartPosIndex_,
/* seqSize */ numSequences_);
} else {
Expand All @@ -249,11 +248,12 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
CHECK_NE(input.sequenceStartPositions.get(),
output_.sequenceStartPositions.get());
ICpuGpuVector::resizeOrCreate(output_.sequenceStartPositions,
numSequences + 1, false);
numSequences + 1, false);
int* outStarts = output_.sequenceStartPositions->getMutableData(false);

IVector::resizeOrCreate(cpuInputStartPos_, height, false);
int* inStarts = cpuInputStartPos_->getData();
ICpuGpuVector::resizeOrCreate(inputStartPos_, height, false);
int* inStarts = inputStartPos_->getMutableData(false);

size_t offsetOut = 0;
for (size_t i = 0; i < numSequences; ++i) {
outStarts[i] = offsetOut;
Expand All @@ -266,13 +266,8 @@ void SequenceScatterAgentLayer::forward(PassType passType) {
}
outStarts[numSequences] = offsetOut;

if (useGpu_) {
IVector::resizeOrCreate(inputStartPos_, height, true);
inputStartPos_->copyFrom(*cpuInputStartPos_, HPPL_STREAM_DEFAULT);
} else {
inputStartPos_ = cpuInputStartPos_;
}
outputValue->copyByRowIndex(*input.value, *inputStartPos_);
outputValue->copyByRowIndex(*input.value,
*inputStartPos_->getVector(useGpu_));
}
}

Expand Down
6 changes: 1 addition & 5 deletions paddle/gserver/layers/AgentLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,7 @@ class SequenceScatterAgentLayer : public ScatterAgentLayer {
protected:
// use to store expanded cpuStartPositions or subSequenceStartPositions
// of real layer.
IVectorPtr cpuInputStartPos_;

// point to cpuInputStartPos_ when useGpu_ is false
// copy from cpuInputStartPos_ when useGpu_ is true
IVectorPtr inputStartPos_;
ICpuGpuVectorPtr inputStartPos_;

public:
explicit SequenceScatterAgentLayer(const LayerConfig& config)
Expand Down
31 changes: 10 additions & 21 deletions paddle/gserver/layers/ExpandLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */


#include "ExpandLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
Expand Down Expand Up @@ -53,9 +52,8 @@ void ExpandLayer::forward(PassType passType) {
const Argument& shapeInput = getInput(1);
const Argument& dataInput = getInput(0);
size_t outputBatchSize = shapeInput.getBatchSize();
auto startPositions =
type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
auto startPositions = type_ ? shapeInput.subSequenceStartPositions
: shapeInput.sequenceStartPositions;
size_t numSequences = startPositions->getSize() - 1;
const int* starts = startPositions->getData(false);

Expand All @@ -71,8 +69,7 @@ void ExpandLayer::forward(PassType passType) {
// set output sequence info as shape sequence
output_.sequenceStartPositions = shapeInput.sequenceStartPositions;
if (shapeInput.hasSubseq()) {
output_.subSequenceStartPositions =
shapeInput.subSequenceStartPositions;
output_.subSequenceStartPositions = shapeInput.subSequenceStartPositions;
}

// reserve output: Expand output to batchsize of sequence data.
Expand All @@ -81,24 +78,17 @@ void ExpandLayer::forward(PassType passType) {
MatrixPtr inputValue = getInputValue(0);
MatrixPtr outputValue = getOutputValue();

IVector::resizeOrCreate(cpuExpandStartsPos_, outputBatchSize, false);
int* expandStarts = cpuExpandStartsPos_->getData();
ICpuGpuVector::resizeOrCreate(expandStartsPos_, outputBatchSize, false);
int* expandStarts = expandStartsPos_->getMutableData(false);
for (size_t sequenceId = 0; sequenceId < numSequences; ++sequenceId) {
int sequenceLength = starts[sequenceId + 1] - starts[sequenceId];
for (int j = 0; j < sequenceLength; j++) {
expandStarts[starts[sequenceId] + j] = sequenceId;
}
}

if (useGpu_) {
// TODO(Dangqingqing) move copyFrom
IVector::resizeOrCreate(expandStartsPos_, outputBatchSize, true);
expandStartsPos_->copyFrom(*cpuExpandStartsPos_, HPPL_STREAM_DEFAULT);
} else {
expandStartsPos_ = cpuExpandStartsPos_;
}

outputValue->copyByRowIndex(*inputValue, *expandStartsPos_);
outputValue->copyByRowIndex(*inputValue,
*expandStartsPos_->getVector(useGpu_));

if (biases_.get() != NULL) {
outputValue->addBias(*(biases_->getW()), 1);
Expand All @@ -108,16 +98,15 @@ void ExpandLayer::forward(PassType passType) {
void ExpandLayer::backward(const UpdateCallback& callback) {
if (biases_ && biases_->getWGrad()) {
biases_->getWGrad()->collectBias(*getOutputGrad(), 1);
/* Increasing the number of gradient */
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}

if (!getInputGrad(0)) return;
MatrixPtr inputGrad = getInputGrad(0);
MatrixPtr outputGrad = getOutputGrad();
auto cpuSeqStartPos =
type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
auto cpuSeqStartPos = type_ ? getInput(1).subSequenceStartPositions
: getInput(1).sequenceStartPositions;
size_t numSequences = cpuSeqStartPos->getSize() - 1;
const int* starts = cpuSeqStartPos->getData(false);

Expand Down
7 changes: 1 addition & 6 deletions paddle/gserver/layers/ExpandLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,9 @@ class ExpandLayer : public Layer {
enum ExpandLevel { kNonSeq = 0, kSeq = 1 };
/// store the ExpandLevel
int type_;
// TODO(luotao) use ICpuGpuVectorPtr to merge cpuExpandStartsPos_
// and expandStartsPos_
/// expanded sequenceStartPositions or subSequenceStartPositions
/// of input[1]
IVectorPtr cpuExpandStartsPos_;
/// point to cpuExpandStartsPos_ when useGpu_ is false,
/// copy from cpuExpandStartsPos_ when useGpu_ is true
IVectorPtr expandStartsPos_;
ICpuGpuVectorPtr expandStartsPos_;

public:
explicit ExpandLayer(const LayerConfig& config) : Layer(config) {}
Expand Down
8 changes: 4 additions & 4 deletions paddle/math/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,13 @@ void GpuMatrix::copyFrom(const IVector& src) {
copyFrom(matrix);
}

void GpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void GpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
real* dst = getData();
real* src = b.getData();
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
hl_sequence2batch_copy(dst, src, index, width, height, true);
}

Expand Down Expand Up @@ -1278,11 +1278,11 @@ void CpuMatrix::copyFrom(const IVector& src) {
}
}

void CpuMatrix::copyByRowIndex(Matrix& b, IVector& rowIndex) {
void CpuMatrix::copyByRowIndex(Matrix& b, const IVector& rowIndex) {
size_t height = getHeight();
size_t width = getWidth();
CHECK_EQ(b.getWidth(), width);
int* index = rowIndex.getData();
const int* index = rowIndex.getData();
for (size_t i = 0; i < height; i++) {
CHECK_LT(static_cast<size_t>(index[i]), b.getHeight());
real* src = b.getData() + index[i] * width;
Expand Down
6 changes: 3 additions & 3 deletions paddle/math/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ class Matrix : public BaseMatrix {
LOG(FATAL) << "copy data from int vector only available on CpuMatrix.";
}

virtual void copyByRowIndex(Matrix& b, IVector& rowIndex) {
virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) {
LOG(FATAL) << "Not implemented";
}

Expand Down Expand Up @@ -979,7 +979,7 @@ class GpuMatrix : public Matrix {

void copyFrom(const IVector& src);

void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);

MatrixPtr clone(size_t height, size_t width, bool useGpu = false);

Expand Down Expand Up @@ -1241,7 +1241,7 @@ class CpuMatrix : public Matrix {

void copyFrom(CpuSparseMatrix& src);

void copyByRowIndex(Matrix& b, IVector& rowIndex);
void copyByRowIndex(Matrix& b, const IVector& rowIndex);

MatrixPtr clone(size_t height, size_t width, bool useGpu = false);

Expand Down

0 comments on commit 91df606

Please sign in to comment.