Added a Reshape layer for copying-free modification of blob dimensions.

chiehchiu · May 15, 2015 · 4fb3c9e · 4fb3c9e
1 parent 35a5df5
commit 4fb3c9e
Show file tree

Hide file tree

Showing 6 changed files with 348 additions and 1 deletion.
diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
@@ -419,6 +419,48 @@ The `SPLIT` layer is a utility layer that splits an input blob to multiple outpu
 
 The `FLATTEN` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w) * 1 * 1`.
 
+#### Reshape
+
+* LayerType: `RESHAPE`
+* CPU implementation: `./src/caffe/layers/reshape_layer.cpp`
+* CUDA GPU implementation: `./src/caffe/layers/reshape_layer.cu`
+* Parameters (`ReshapeParameter reshape_param`)
+    - Optional: (also see detailed description below)
+        - `num` [default 0]
+        - `channels` [default 0]
+        - `width` [default 0]
+        - `height` [default 0]
+
+* Input
+    - a single blob with arbitrary dimensions
+* Output
+    - the same blob, with modified dimensions, as specified by `reshape_param`
+
+* Sample
+
+        layers {
+          name: "reshape"
+          type: RESHAPE
+          bottom: "input"
+          top: "output"
+
+          reshape_param {
+            num: 0  # copy the dimension from below
+            channels: 2
+            width: 3
+            height: -1 # infer it from the other dimensions
+          }
+        }
+
+The `RESHAPE` layer can be used to change the dimensions of its input, without changing its data. Just like the `FLATTEN` layer, only the dimensions are changed, no data is copied in the process.
+
+Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
+
+* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom layer has 2 channels, the top one will have 2 channels too, given `channels: 0` as target dimension. Since the default value of all the target dimensions is 0, omitting any of the target dimensions will also cause it to be copied.
+* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. If this is not possible, an error is raised. Also, at most one -1 can be used in a reshape operation.
+
+As another example, giving `num: 0, channels: -1, height: 1, width: 1` as parameters makes the layer behave in exactly the same way as the `FLATTEN` layer.
+
 #### Concatenation
 
 * LayerType: `CONCAT`

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -297,6 +297,41 @@ class MVNLayer : public Layer<Dtype> {
   Blob<Dtype> sum_multiplier_;
 };
 
+/*
+ * @brief Reshapes the input Blob into an arbitrary-sized output Blob.
+ *
+ * Note: similarly to FlattenLayer, this layer does not change the input values
+ * (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
+ */
+template <typename Dtype>
+class ReshapeLayer : public Layer<Dtype> {
+ public:
+  explicit ReshapeLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  void FillInSingleUnspecifiedDimension(int bottom_count);
+
+  int num_out;
+  int channels_out;
+  int height_out;
+  int width_out;
+};
+
 /**
  * @brief Ignores bottom blobs while producing no top blobs. (This is useful
  *        to suppress outputs during testing.)

diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
@@ -0,0 +1,113 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(bottom.size(), 1) << "Reshape Layer takes a single blob as input.";
+  CHECK_EQ(top.size(), 1) << "Reshape Layer takes a single blob as output.";
+
+  num_out = this->layer_param_.reshape_param().num();
+  // Dimensions set to 0 (either by default or explicitly) will be copied from
+  // the bottom layer.
+  if (num_out == 0) {
+    num_out = bottom[0]->num();
+  }
+
+  channels_out = this->layer_param_.reshape_param().channels();
+  if (channels_out == 0) {
+    channels_out = bottom[0]->channels();
+  }
+
+  width_out = this->layer_param_.reshape_param().width();
+  if (width_out == 0) {
+    width_out = bottom[0]->width();
+  }
+
+  height_out = this->layer_param_.reshape_param().height();
+  if (height_out == 0) {
+    height_out = bottom[0]->height();
+  }
+
+  FillInSingleUnspecifiedDimension(bottom[0]->count());
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->Reshape(num_out, channels_out, height_out, width_out);
+
+  const size_t out_count = num_out * channels_out * height_out * width_out;
+  CHECK_EQ(out_count, bottom[0]->count()) <<
+      "Bottom layer count isn't equal to predicted; output layer size is " <<
+      num_out << "x" << channels_out << "x" << height_out << "x" << width_out;
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->ShareData(*bottom[0]);
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  bottom[0]->ShareDiff(*top[0]);
+}
+
+/**
+ * @brief Fill in a single dimension left unspecified.
+ *
+ * If a dimension is set to -1, it will be filled in with a value inferred from
+ * the count of the bottom layer (if the product of the nonzero dimensions is a
+ * divisor of the count).
+ *
+ * @param bottom_count Count of the bottom layer.
+ */
+template <typename Dtype>
+void ReshapeLayer<Dtype>::FillInSingleUnspecifiedDimension(int bottom_count) {
+  int* const dimensions[] = {&num_out, &channels_out, &width_out, &height_out};
+  const size_t N_DIMENSIONS = 4;
+
+  // How many -1 dimensions do we have.
+  int n_unspecified = 0;
+  // Product of the remaining dimensions.
+  int product_without_unspecified_dim = 1;
+
+  for (size_t i = 0; i < N_DIMENSIONS; i++) {
+    if (*(dimensions[i]) == -1) {
+      n_unspecified++;
+    } else {
+      product_without_unspecified_dim *= *(dimensions[i]);
+    }
+  }
+
+  if (n_unspecified == 0) {
+    // Everything is filled out, nothing to do.
+    return;
+  }
+
+  CHECK_EQ(n_unspecified, 1) << "Only one dimension can be set -1.";
+  CHECK_EQ(bottom_count % product_without_unspecified_dim, 0) <<
+    "Bottom layer count " << bottom_count << " not divisible by product " <<
+    product_without_unspecified_dim;
+
+  // Fill up the one remaining dimension.
+  for (size_t i = 0; i < N_DIMENSIONS; i++) {
+    if (*(dimensions[i]) == -1) {
+      *(dimensions[i]) = bottom_count / product_without_unspecified_dim;
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ReshapeLayer);
+#endif
+
+INSTANTIATE_CLASS(ReshapeLayer);
+REGISTER_LAYER_CLASS(RESHAPE, ReshapeLayer);
+}  // namespace caffe
diff --git a/src/caffe/layers/reshape_layer.cu b/src/caffe/layers/reshape_layer.cu
@@ -0,0 +1,23 @@
+#include <vector>
+
+#include "caffe/common_layers.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  top[0]->ShareData(*bottom[0]);
+}
+
+template <typename Dtype>
+void ReshapeLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  bottom[0]->ShareDiff(*top[0]);
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ReshapeLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -259,7 +259,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 133 (last added: spp_param)
+// LayerParameter next available layer-specific ID: 134 (last added: reshape_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -326,6 +326,7 @@ message LayerParameter {
   optional PReLUParameter prelu_param = 131;
   optional PythonParameter python_param = 130;
   optional ReLUParameter relu_param = 123;
+  optional ReshapeParameter reshape_param = 133;
   optional SigmoidParameter sigmoid_param = 124;
   optional SoftmaxParameter softmax_param = 125;
   optional SPPParameter spp_param = 132;
@@ -690,6 +691,19 @@ message ReLUParameter {
   optional Engine engine = 2 [default = DEFAULT];
 }
 
+// Message that stores parameters used by ReshapeLayer
+message ReshapeParameter {
+  // Specify the output dimensions. If some of the following parameters are
+  // omitted or set to 0 explicitly, the corresponding dimension from the bottom
+  // layer is used (unchanged). Also, if exactly one of them is set to -1, its
+  // value is calculated from the count of the bottom layer and the remaining
+  // dimensions, if possible.
+  optional int32 num = 1 [default = 0];
+  optional int32 channels = 2 [default = 0];
+  optional int32 height = 3 [default = 0];
+  optional int32 width = 4 [default = 0];
+}
+
 // Message that stores parameters used by SigmoidLayer
 message SigmoidParameter {
   enum Engine {

diff --git a/src/caffe/test/test_reshape_layer.cpp b/src/caffe/test/test_reshape_layer.cpp
@@ -0,0 +1,120 @@
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/common_layers.hpp"
+#include "caffe/filler.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class ReshapeLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+ protected:
+  ReshapeLayerTest()
+    : blob_bottom_(new Blob<Dtype>(2, 3, 6, 5)),
+      blob_top_(new Blob<Dtype>()) {
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+
+  virtual ~ReshapeLayerTest() { delete blob_bottom_; delete blob_top_; }
+
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(ReshapeLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(ReshapeLayerTest, TestFlattenOutputSizes) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  reshape_param->set_channels(-1);
+  reshape_param->set_height(1);
+  reshape_param->set_width(1);
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3 * 6 * 5);
+  EXPECT_EQ(this->blob_top_->height(), 1);
+  EXPECT_EQ(this->blob_top_->width(), 1);
+}
+
+TYPED_TEST(ReshapeLayerTest, TestFlattenValues) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  reshape_param->set_channels(-1);
+  reshape_param->set_height(1);
+  reshape_param->set_width(1);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int c = 0; c < 3 * 6 * 5; ++c) {
+    EXPECT_EQ(this->blob_top_->data_at(0, c, 0, 0),
+        this->blob_bottom_->data_at(0, c / (6 * 5), (c / 5) % 6, c % 5));
+    EXPECT_EQ(this->blob_top_->data_at(1, c, 0, 0),
+        this->blob_bottom_->data_at(1, c / (6 * 5), (c / 5) % 6, c % 5));
+  }
+}
+
+// Test whether setting output dimensions to 0 either explicitly or implicitly
+// copies the respective dimension of the input layer.
+TYPED_TEST(ReshapeLayerTest, TestCopyDimensions) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  // Omitting num to test implicit zeroes.
+  reshape_param->set_channels(0);
+  reshape_param->set_height(0);
+  reshape_param->set_width(0);
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 6);
+  EXPECT_EQ(this->blob_top_->width(), 5);
+}
+
+// When a dimension is set to -1, we should infer its value from the other
+// dimensions (including those that get copied from below).
+TYPED_TEST(ReshapeLayerTest, TestInferenceOfUnspecified) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ReshapeParameter* reshape_param =
+      layer_param.mutable_reshape_param();
+  // Since omitted, num is implicitly set to 0 (thus, copies 2).
+  reshape_param->set_channels(3);
+  reshape_param->set_height(10);
+  reshape_param->set_width(-1);
+
+  // Count is 180, thus height should be 180 / (2*3*10) = 3.
+
+  ReshapeLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 10);
+  EXPECT_EQ(this->blob_top_->width(), 3);
+}
+
+}  // namespace caffe