Skip to content

Commit

Permalink
merge Daoyuan's FuncArgs, pass the ContextProjection test.
Browse files Browse the repository at this point in the history
  • Loading branch information
xutianbing committed Jan 12, 2017
1 parent 1482ec4 commit 23ac0b7
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 247 deletions.
2 changes: 1 addition & 1 deletion paddle/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if(WITH_TESTING)
add_simple_unittest(TensorTypeTest)
add_simple_unittest(BufferArgTest)
add_simple_unittest(FunctionTest)
# add_simple_unittest(ContextProjectionOpTest)
add_simple_unittest(ContextProjectionOpTest)
endif()
endif()

Expand Down
181 changes: 35 additions & 146 deletions paddle/function/ContextProjectionOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ class ContextProjectionForwardFunc : public FunctionBase {

CHECK_EQ(outputs[0].getArgType(), ADD_TO);
auto out_mat = outputs[0].matrix<Device>();
auto in_mat = inputs[0].matrix<Device>();
auto w_mat = !inputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
const auto in_mat = inputs[0].matrix<Device>();
const auto w_mat =
!inputs[1].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
const auto seq_vec = inputs[2].vector<int, Device>();
ContextProjectionForward<Device>(out_mat,
in_mat,
w_mat,
Expand All @@ -150,7 +150,6 @@ class ContextProjectionForwardFunc : public FunctionBase {
*
*/
template <>
<<<<<<< HEAD
void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
CpuMatrix& in_grad_mat,
CpuMatrix& w_grad_mat,
Expand All @@ -174,7 +173,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i], pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size);
sub->addAtOffset(*mat, j * input_dim);
}
Expand All @@ -185,8 +185,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
int64_t pad_size =
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
if (is_padding && w_grad_mat) {
MatrixPtr mat =
out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr mat = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(starts[i + 1] - pad_size, pad_size);
MatrixPtr sub = w_grad_mat.subMatrix(
begin_pad + context_start + j - pad_size, pad_size);
sub->addAtOffset(*mat, j * input_dim);
Expand All @@ -197,7 +197,8 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
if (end <= begin) continue;
if (!in_grad_mat) continue;
MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin);
MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin);
MatrixPtr dst = const_cast<CpuMatrix&>(out_grad_mat)
.subMatrix(dst_begin, dst_end - dst_begin);
src->addAtOffset(*dst, j * input_dim);
}
}
Expand All @@ -207,10 +208,10 @@ void ContextProjectionBackward<DEVICE_TYPE_CPU>(const CpuMatrix& out_grad_mat,
* Context Projection Backward Function.
* Update the weight gradient and input layer gradient with backprop
*
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
* \param inouts[0] input grad.
* \param inouts[1] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output layer grad.
* \param outputs[0] input layer grad.
* \param outputs[1] weight grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardFunc : public FunctionBase {
Expand All @@ -224,32 +225,34 @@ class ContextProjectionBackwardFunc : public FunctionBase {
}

void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ((size_t)3, inputs.size());
CHECK_EQ((size_t)1, outputs.size());
CHECK_EQ((size_t)2, inputs.size());
CHECK_EQ((size_t)2, outputs.size());

CHECK(outputs[0].data() && inputs[2].data());
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[0].shape().ndims(), (size_t)2);
CHECK(inputs[0].data() && inputs[1].data());
CHECK_EQ(inputs[0].shape().ndims(), (size_t)1);
CHECK_EQ(inputs[1].shape().ndims(), (size_t)2);
CHECK_EQ(inputs[2].shape().ndims(), (size_t)1);
CHECK_EQ(outputs[0].shape().ndims(), (size_t)2);
CHECK_EQ(outputs[1].shape().ndims(), (size_t)2);

/// dim of input == dim of weight
CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]);
/// input and output has the same batch_size
CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]);
/// dim of output = dim of input * context_length
CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_);
/// dim of input grad == dim of weight
CHECK_EQ(outputs[0].shape()[1], outputs[1].shape()[1]);
/// input and output grad has the same batch_size
CHECK_EQ(outputs[0].shape()[0], inputs[1].shape()[0]);
/// dim of output val = dim of input grad * context_length
CHECK_EQ(inputs[1].shape()[1], outputs[0].shape()[1] * context_length_);

CHECK_EQ(outputs[0].getArgType(), ADD_TO);
CHECK_EQ(outputs[1].getArgType(), ADD_TO);

auto out_grad_mat = outputs[0].matrix<Device>();
const auto seq_vec = inputs[0].vector<int, Device>();
const auto out_grad_mat = inputs[1].matrix<Device>();
auto in_grad_mat =
!inputs[0].data() ? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[0].matrix<Device>();
auto w_grad_mat = !inputs[1].data()
!outputs[0].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: outputs[0].matrix<Device>();
auto w_grad_mat = !outputs[1].data()
? typename Tensor<real, Device>::Matrix(nullptr, 0, 0)
: inputs[1].matrix<Device>();
auto seq_vec = inputs[2].vector<int, Device>();
: outputs[1].matrix<Device>();
ContextProjectionBackward<Device>(out_grad_mat,
in_grad_mat,
w_grad_mat,
Expand All @@ -269,112 +272,6 @@ class ContextProjectionBackwardFunc : public FunctionBase {
size_t total_pad_;
};

#if 0
/**
* Context Projection Backward Data Function.
* Update gradient of the input layer with backprop.
*
* \param inouts[0] input grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardDataFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
}

void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());

CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);
/// input and output grad have the same batch_size
CHECK_EQ(inouts[0].dims_[0], inputs[1].dims_[0]);

typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto in_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);

ContextProjectionBackwardData<Device>(out_grad_mat.get(),
in_grad_mat.get(),
seq_vec,
context_length_,
context_start_);
}

private:
size_t context_length_;
int context_start_;
};

/**
* Context Projection Backward Weight Function.
* Update weight gradient with backprop.
*
* \param inouts[0] weight grad.
* \param inputs[0] input sequence.
* \param inputs[1] output grad.
*/
template <DeviceType Device>
class ContextProjectionBackwardWeightFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
context_length_ = config.get<size_t>("context_length");
context_start_ = config.get<int>("context_start");
begin_pad_ = config.get<size_t>("begin_pad");
total_pad_ = config.get<size_t>("total_pad");
}

void calc(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) override {
CHECK_EQ(2, inputs.size());
CHECK_EQ(0, outputs.size());
CHECK_EQ(1, inouts.size());

CHECK(inouts[0].getData() && inputs[0].getData() && inputs[1].getData());
CHECK_EQ(inputs[0].dims_.size(), 1);
CHECK_EQ(inputs[1].dims_.size(), 2);
CHECK_EQ(inouts[0].dims_.size(), 2);
CHECK_EQ(inputs[1].dims_[1], inouts[0].dims_[1] * context_length_);

typename SequenceT<Device>::type seq_vec(
inputs[0].dims_[0], reinterpret_cast<int*>(inputs[0].getData()));
const auto out_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]);
auto w_grad_mat = std::make_shared<typename MatrixT<Device>::type>(
inouts[0].getData(), inouts[0].dims_[0], inouts[0].dims_[1]);

ContextProjectionBackwardWeight<Device>(out_grad_mat.get(),
w_grad_mat.get(),
seq_vec,
context_length_,
context_start_,
total_pad_,
begin_pad_);
}

private:
size_t context_length_;
int context_start_;
size_t begin_pad_;
size_t total_pad_;
};
#endif

REGISTER_TYPED_FUNC(ContextProjectionForward,
CPU,
ContextProjectionForwardFunc);
Expand All @@ -388,13 +285,5 @@ REGISTER_TYPED_FUNC(ContextProjectionForward,
REGISTER_TYPED_FUNC(ContextProjectionBackward,
GPU,
ContextProjectionBackwardFunc);
#if 0
REGISTER_TYPED_FUNC(ContextProjectionBackwardData,
GPU,
ContextProjectionBackwardDataFunc);
REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight,
GPU,
ContextProjectionBackwardWeightFunc);
#endif
#endif
} // namespace paddle
2 changes: 1 addition & 1 deletion paddle/function/ContextProjectionOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void ContextProjectionForward(
*/
template <DeviceType DType>
void ContextProjectionBackward(
typename Tensor<real, DType>::Matrix& out_grad,
const typename Tensor<real, DType>::Matrix& out_grad,
typename Tensor<real, DType>::Matrix& in_grad,
typename Tensor<real, DType>::Matrix& w_grad,
const typename Tensor<int, DType>::Vector& seq_vec,
Expand Down
1 change: 0 additions & 1 deletion paddle/function/ContextProjectionOpGpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ void hl_context_projection_backward_data(const real* out_grad,
}

template <>
<<<<<<< HEAD
void ContextProjectionBackwardData<DEVICE_TYPE_GPU>(const GpuMatrix& out_grad,
GpuMatrix& in_grad,
const GpuIVector& sequence,
Expand Down
75 changes: 38 additions & 37 deletions paddle/function/ContextProjectionOpTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,25 @@ void testMatrixProjectionForward(int context_start,
cpu_out.randomizeUniform();
gpu_out.copyFrom(cpu_out);

compare.getCpuFunction()->calc(
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()})},
{},
{Tensor(cpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
compare.getGpuFunction()->calc(
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
Dims{pad, input_dim}),
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()})},
{},
{Tensor(gpu_out.getData(),
Dims{batch_size, input_dim * context_length})});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(cpu_in);
cpu_inputs.addArg(cpu_weight ? *cpu_weight
: CpuMatrix(nullptr, 0, input_dim));
cpu_inputs.addArg(*cpu_seq);
cpu_outputs.addArg(cpu_out, ADD_TO);

compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);

BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(gpu_in);
gpu_inputs.addArg(gpu_weight ? *gpu_weight
: GpuMatrix(nullptr, 0, input_dim));
gpu_inputs.addArg(*gpu_seq);
gpu_outputs.addArg(gpu_out, ADD_TO);

compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);

autotest::TensorCheckEqual(cpu_out, gpu_out);
}
Expand Down Expand Up @@ -119,25 +120,25 @@ void testMatrixProjectionBackward(int context_start,
gpu_w_grad->copyFrom(*cpu_w_grad);
}

compare.getCpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
Dims{cpu_seq->getSize()}),
Tensor(cpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(cpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(cpu_w_grad ? cpu_w_grad->getData() : nullptr,
Dims{pad, input_dim})});

compare.getGpuFunction()->calc(
{Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
Dims{gpu_seq->getSize()}),
Tensor(gpu_out_grad.getData(),
Dims{batch_size, input_dim * context_length})},
{},
{Tensor(gpu_in_grad.getData(), Dims{batch_size, input_dim}),
Tensor(gpu_w_grad ? gpu_w_grad->getData() : nullptr,
Dims{pad, input_dim})});
BufferArgs cpu_inputs;
BufferArgs cpu_outputs;
cpu_inputs.addArg(*cpu_seq);
cpu_inputs.addArg(cpu_out_grad);
cpu_outputs.addArg(cpu_in_grad, ADD_TO);
cpu_outputs.addArg(
cpu_w_grad ? *cpu_w_grad : CpuMatrix(nullptr, 0, input_dim), ADD_TO);

compare.getCpuFunction()->calc(cpu_inputs, cpu_outputs);

BufferArgs gpu_inputs;
BufferArgs gpu_outputs;
gpu_inputs.addArg(*gpu_seq);
gpu_inputs.addArg(gpu_out_grad);
gpu_outputs.addArg(gpu_in_grad, ADD_TO);
gpu_outputs.addArg(
gpu_w_grad ? *gpu_w_grad : GpuMatrix(nullptr, 0, input_dim), ADD_TO);

compare.getGpuFunction()->calc(gpu_inputs, gpu_outputs);

autotest::TensorCheckErr(cpu_in_grad, gpu_in_grad);
if (is_padding) {
Expand Down
Loading

0 comments on commit 23ac0b7

Please sign in to comment.