Skip to content

Commit

Permalink
Support rectangle input for CNN
Browse files Browse the repository at this point in the history
  • Loading branch information
luotao1 committed Nov 17, 2016
1 parent 6561242 commit 496d64e
Show file tree
Hide file tree
Showing 29 changed files with 360 additions and 269 deletions.
13 changes: 4 additions & 9 deletions paddle/gserver/layers/BatchNormBaseLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,10 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,

void BatchNormBaseLayer::calFeatureMapSize() {
const ImageConfig& conf = config_.inputs(0).image_conf();
if (inputLayers_[0]->getOutput().getFrameHeight() == 0 &&
inputLayers_[0]->getOutput().getFrameWidth() == 0) {
imgSize_ = conf.img_size();
imageH_ = imgSize_;
imageW_ = imgSize_;
} else {
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
}
imageH_ = inputLayers_[0]->getOutput().getFrameHeight();
imageW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imageH_ == 0) imageH_ = conf.img_size_y();
if (imageW_ == 0) imageW_ = conf.img_size();
imgPixels_ = imageH_ * imageW_;
getOutput().setFrameHeight(imageH_);
getOutput().setFrameWidth(imageW_);
Expand Down
5 changes: 2 additions & 3 deletions paddle/gserver/layers/BatchNormBaseLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,8 @@ class BatchNormBaseLayer : public Layer {
MatrixPtr savedMean_;
MatrixPtr savedInvVar_;

/// Height or width of input image feature, now height is equal to width.
/// imgSize is 1 if the input is fully-connected layer.
int imgSize_;
/// Height or width of input image feature.
/// Both of them are 1 if the input is fully-connected layer.
int imageH_;
int imageW_;
/// Height * Width.
Expand Down
6 changes: 3 additions & 3 deletions paddle/gserver/layers/BilinearInterpLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ size_t BilinearInterpLayer::getSize() {

const BilinearInterpConfig& conf = config_.inputs(0).bilinear_interp_conf();
if (inImgH_ == 0) {
inImgH_ = conf.img_size_y();
inImgH_ = conf.image_conf().img_size_y();
}
if (inImgW_ == 0) {
inImgW_ = conf.img_size_x();
inImgW_ = conf.image_conf().img_size();
}

outImgH_ = conf.out_size_y();
outImgW_ = conf.out_size_x();
numChannels_ = conf.num_channels();
numChannels_ = conf.image_conf().channels();

CHECK(outImgH_ > 0 && outImgW_ > 0);
CHECK(inImgH_ > 0 && inImgW_ > 0);
Expand Down
15 changes: 9 additions & 6 deletions paddle/gserver/layers/ConvBaseLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
filterSizeY_.push_back(conf.filter_size_y());
filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
channels_.push_back(conf.channels());
imgSizeH_.push_back(conf.img_size());
imgSizeH_.push_back(conf.has_img_size_y() ? conf.img_size_y() :
conf.img_size());
imgSizeW_.push_back(conf.img_size());
groups_.push_back(conf.groups());
filterChannels_.push_back(conf.filter_channels());
outputH_.push_back(conf.output_x());
outputH_.push_back(conf.has_output_y() ? conf.output_y() :
conf.output_x());
outputW_.push_back(conf.output_x());
}

Expand Down Expand Up @@ -90,11 +92,12 @@ size_t ConvBaseLayer::calOutputSize() {
for (size_t i = 0; i < inputLayers_.size(); i++) {
inH.push_back(inputLayers_[i]->getOutput().getFrameHeight());
inW.push_back(inputLayers_[i]->getOutput().getFrameWidth());
const ConvConfig& conf = config_.inputs(i).conv_conf();
if (isDeconv_) {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().output_x();
inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().output_x();
inW[i] = conf.output_x();
outH.push_back(
imageSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
caffeMode_));
Expand All @@ -103,9 +106,9 @@ size_t ConvBaseLayer::calOutputSize() {
caffeMode_));
} else {
if (inH[i] == 0)
inH[i] = config_.inputs(i).conv_conf().img_size();
inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
if (inW[i] == 0)
inW[i] = config_.inputs(i).conv_conf().img_size();
inW[i] = conf.img_size();
outH.push_back(
outputSize(inH[i], filterSizeY_[i], paddingY_[i], strideY_[i],
caffeMode_));
Expand Down
13 changes: 9 additions & 4 deletions paddle/gserver/layers/ConvOperator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class ConvOperator : public Operator {
bool caffeMode_;
int inputOffset_, outputOffset_, weightOffset_;
int numFilters_;
int padding_, stride_, filterSize_, channels_, imgSize_;
int padding_, stride_, filterSize_, channels_, imgSize_, imgSizeY_;
int paddingY_, strideY_, filterSizeY_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputs_;
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;

/// Following member variables are same with CudnnConvLayer.
/// There is no explanation here.
Expand Down Expand Up @@ -144,7 +144,7 @@ void ConvOperator::allocConvWorkSpace(size_t maxWorkSpace) {
void ConvOperator::reshape(int batchSize) {
imageH_ = ins_[0]->getFrameHeight();
imageW_ = ins_[0]->getFrameWidth();
if (imageH_ == 0) imageH_ = imgSize_;
if (imageH_ == 0) imageH_ = imgSizeY_;
if (imageW_ == 0) imageW_ = imgSize_;
outputH_ = outputSize(imageH_, filterSizeY_, paddingY_, strideY_, caffeMode_);
outputW_ = outputSize(imageW_, filterSize_, padding_, stride_, caffeMode_);
Expand Down Expand Up @@ -176,7 +176,10 @@ void ConvOperator::computeConvSizes() {
hl_create_tensor_descriptor(&inputDesc_);
int outputX =
outputSize(imgSize_, filterSize_, padding_, stride_, caffeMode_);
int outputY =
outputSize(imgSizeY_, filterSizeY_, paddingY_, strideY_, caffeMode_);
CHECK_EQ(outputX, outputX_);
CHECK_EQ(outputY, outputY_);
hl_create_tensor_descriptor(&outputDesc_);
hl_create_convolution_descriptor(&convDesc_, inputDesc_, filterDesc_,
paddingY_, padding_, strideY_, stride_);
Expand Down Expand Up @@ -208,10 +211,12 @@ void ConvOperator::getConvParams() {
filterPixels_ = filterSize_ * filterSizeY_;
channels_ = conf.channels();
imgSize_ = conf.img_size();
imgPixels_ = imgSize_ * imgSize_;
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
imgPixels_ = imgSize_ * imgSizeY_;
CHECK_EQ(conf.groups(), 1U);
filterChannels_ = conf.filter_channels();
outputX_ = conf.output_x();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
outputs_ = outputX_ * outputX_;
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/layers/ConvProjection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void ConvProjection::getConvParams() {
filterH_ = conf.filter_size_y();
filterW_ = conf.filter_size();

configImgH_ = conf.img_size();
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
configImgW_ = conf.img_size();

channels_ = conf.channels();
Expand Down
4 changes: 2 additions & 2 deletions paddle/gserver/layers/DataLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ void DataLayer::copyDataToOutput(Argument& output) {
output.ids->copyFrom(*data_.ids);
}
}
output.setFrameHeight(data_.getFrameHeight());
output.setFrameWidth(data_.getFrameWidth());
output.setFrameHeight(config_.height());
output.setFrameWidth(config_.width());
output.cpuSequenceDims = data_.cpuSequenceDims;
output.sequenceStartPositions = data_.sequenceStartPositions;
output.subSequenceStartPositions = data_.subSequenceStartPositions;
Expand Down
30 changes: 16 additions & 14 deletions paddle/gserver/layers/ExpandConvBaseLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@ bool ExpandConvBaseLayer::init(const LayerMap &layerMap,
* meaning as in conv, we need to swap channels_ and numFilters here for
* convTrans, and in other functions too.
* */
int channel;
int numFilters;

/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
numFilters = isDeconv_ ? conf.channels() : numFilters_;
int numFilters = isDeconv_ ? conf.channels() : numFilters_;
subM_.push_back(numFilters / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
conf.groups());
subN_.push_back(conf.output_x() *
(conf.has_output_y() ? conf.output_y() : conf.output_x()));
int channel = isDeconv_ ? numFilters_ : conf.channels();
subK_.push_back(
channel * conf.filter_size() *
(conf.has_filter_size_y() ? conf.filter_size_y() : conf.filter_size()) /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
Expand Down Expand Up @@ -107,9 +109,9 @@ void ExpandConvBaseLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channel, filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
channel, filterSizeY_[inIdx],
filterSize_[inIdx], strideY_[inIdx], stride_[inIdx],
paddingY_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
Expand Down Expand Up @@ -188,10 +190,10 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr image,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
useGpu_);
vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channel, filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
channel, filterSizeY_[inpIdx],
filterSize_[inpIdx], strideY_[inpIdx], stride_[inpIdx],
paddingY_[inpIdx], padding_[inpIdx], outputH_[inpIdx],
outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneGradTmp->clear();

Expand Down
6 changes: 3 additions & 3 deletions paddle/gserver/layers/MaxOutLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ size_t MaxOutLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = maxoutConf.img_size_y();
imgSizeH_ = maxoutConf.image_conf().img_size_y();
}
if (imgSizeW_ == 0) {
imgSizeW_ = maxoutConf.img_size_x();
imgSizeW_ = maxoutConf.image_conf().img_size();
}

featLen_ = imgSizeH_ * imgSizeW_;
Expand All @@ -50,7 +50,7 @@ bool MaxOutLayer::init(const LayerMap& layerMap,

const MaxOutConfig& conf = config_.inputs(0).maxout_conf();
groups_ = conf.groups();
channels_ = conf.channels();
channels_ = conf.image_conf().channels();
CHECK_EQ(channels_ % groups_, 0UL);
outputChannels_ = channels_ / groups_;

Expand Down
3 changes: 3 additions & 0 deletions paddle/gserver/layers/NormLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
denoms_ = NULL;

outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
return true;
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/layers/NormLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class NormLayer : public Layer {
*/
class ResponseNormLayer : public NormLayer {
protected:
size_t channels_, size_, outputX_, imgSize_;
size_t channels_, size_, outputX_, imgSize_, outputY_, imgSizeY_;
float scale_, pow_;
MatrixPtr denoms_;

Expand Down
2 changes: 1 addition & 1 deletion paddle/gserver/layers/NormProjectionLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ size_t CMRProjectionNormLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = imgSize_;
imgSizeH_ = imgSizeY_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = imgSize_;
Expand Down
13 changes: 7 additions & 6 deletions paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
size_t SpatialPyramidPoolLayer::getSize() {
CHECK_EQ(inputLayers_.size(), 1UL);
size_t layerSize = 0;
const SppConfig& sppConf = config_.inputs(0).spp_conf();
const ImageConfig& conf = config_.inputs(0).spp_conf().image_conf();
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
imgSizeH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
}
if (imgSizeW_ == 0) {
imgSizeW_ = sppConf.img_size();
imgSizeW_ = conf.img_size();
}

size_t outputH = 1;
Expand All @@ -82,9 +82,10 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
pyramidHeight_ = sppConf.pyramid_height();
poolType_ = sppConf.pool_type();

channels_ = sppConf.channels();
imgSizeW_ = sppConf.img_size();
imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
const ImageConfig& imageConf = sppConf.image_conf();
channels_ = imageConf.channels();
imgSizeW_ = imageConf.img_size();
imgSizeH_ = imageConf.has_img_size_y() ? imageConf.img_size_y() : imgSizeW_;
poolProjections_.reserve(pyramidHeight_);
projCol_.reserve(pyramidHeight_);
projOutput_.resize(pyramidHeight_);
Expand Down
2 changes: 0 additions & 2 deletions paddle/gserver/tests/img_pool_a.conf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ maxpool = img_pool_layer(input=conv,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=MaxPooling(),
)
avgpool = img_pool_layer(input=conv,
Expand All @@ -39,7 +38,6 @@ avgpool = img_pool_layer(input=conv,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=AvgPooling(),
)

Expand Down
Loading

0 comments on commit 496d64e

Please sign in to comment.