Merge pull request #2089 from jeffdonahue/reduction-layer

ReductionLayer
justsz · Jun 3, 2015 · 0cc7e18 · 0cc7e18
2 parents f3eabad + 823d055
commit 0cc7e18
Show file tree

Hide file tree

Showing 5 changed files with 599 additions and 1 deletion.
diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -399,6 +399,51 @@ class ReshapeLayer : public Layer<Dtype> {
   int constant_count_;
 };
 
+/**
+ * @brief Compute "reductions" -- operations that return a scalar output Blob
+ *        for an input Blob of arbitrary size, such as the sum, absolute sum,
+ *        and sum of squares.
+ *
+ * TODO(dox): thorough documentation for Forward, Backward, and proto params.
+ */
+template <typename Dtype>
+class ReductionLayer : public Layer<Dtype> {
+ public:
+  explicit ReductionLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Reduction"; }
+  virtual inline int ExactNumBottomBlobs() const { return 1; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  /// @brief the reduction operation performed by the layer
+  ReductionParameter_ReductionOp op_;
+  /// @brief a scalar coefficient applied to all outputs
+  Dtype coeff_;
+  /// @brief the index of the first input axis to reduce
+  int axis_;
+  /// @brief the number of reductions performed
+  int num_;
+  /// @brief the input size of each reduction
+  int dim_;
+  /// @brief a helper Blob used for summation (op_ == SUM)
+  Blob<Dtype> sum_multiplier_;
+};
+
 /**
  * @brief Ignores bottom blobs while producing no top blobs. (This is useful
  *        to suppress outputs during testing.)

diff --git a/src/caffe/layers/reduction_layer.cpp b/src/caffe/layers/reduction_layer.cpp
@@ -0,0 +1,132 @@
+#include <algorithm>
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  op_ = this->layer_param_.reduction_param().operation();
+}
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  axis_ = bottom[0]->CanonicalAxisIndex(
+      this->layer_param_.reduction_param().axis());
+  // In the output, we'll keep all axes up to the reduction axis, but
+  // throw away any after that.
+  // Note: currently reducing along non-tail axes is not supported; otherwise,
+  // we'd need to also copy any axes following an "end_axis".
+  vector<int> top_shape(bottom[0]->shape().begin(),
+                        bottom[0]->shape().begin() + axis_);
+  top[0]->Reshape(top_shape);
+  num_ = bottom[0]->count(0, axis_);
+  dim_ = bottom[0]->count(axis_);
+  CHECK_EQ(num_, top[0]->count());
+  if (op_ == ReductionParameter_ReductionOp_SUM ||
+      op_ == ReductionParameter_ReductionOp_MEAN) {
+    vector<int> sum_mult_shape(1, dim_);
+    sum_multiplier_.Reshape(sum_mult_shape);
+    caffe_set(dim_, Dtype(1), sum_multiplier_.mutable_cpu_data());
+  }
+  coeff_ = this->layer_param().reduction_param().coeff();
+  if (op_ == ReductionParameter_ReductionOp_MEAN) {
+    coeff_ /= dim_;
+  }
+}
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::Forward_cpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  const Dtype* mult_data = NULL;
+  if (sum_multiplier_.count() > 0) {
+    mult_data = sum_multiplier_.cpu_data();
+  }
+  Dtype* top_data = top[0]->mutable_cpu_data();
+  for (int i = 0; i < num_; ++i) {
+    switch (op_) {
+    case ReductionParameter_ReductionOp_SUM:
+    case ReductionParameter_ReductionOp_MEAN:
+      *top_data = caffe_cpu_dot(dim_, mult_data, bottom_data);
+      break;
+    case ReductionParameter_ReductionOp_ASUM:
+      *top_data = caffe_cpu_asum(dim_, bottom_data);
+      break;
+    case ReductionParameter_ReductionOp_SUMSQ:
+      *top_data = caffe_cpu_dot(dim_, bottom_data, bottom_data);
+      break;
+    default:
+      LOG(FATAL) << "Unknown reduction op: "
+          << ReductionParameter_ReductionOp_Name(op_);
+    }
+    bottom_data += dim_;
+    ++top_data;
+  }
+  if (coeff_ != Dtype(1)) {
+    // Reset the top_data pointer.
+    top_data = top[0]->mutable_cpu_data();
+    caffe_scal(num_, coeff_, top_data);
+  }
+}
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (!propagate_down[0]) { return; }
+  // Get bottom_data, if needed.
+  const Dtype* bottom_data = NULL;
+  switch (op_) {
+  // Operations that don't need bottom_data
+  case ReductionParameter_ReductionOp_SUM:
+  case ReductionParameter_ReductionOp_MEAN:
+    break;
+  // Operations that need bottom_data
+  case ReductionParameter_ReductionOp_ASUM:
+  case ReductionParameter_ReductionOp_SUMSQ:
+    bottom_data = bottom[0]->cpu_data();
+    break;
+  default:
+    LOG(FATAL) << "Unknown reduction op: "
+        << ReductionParameter_ReductionOp_Name(op_);
+  }
+  const Dtype* top_diff = top[0]->cpu_diff();
+  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+  for (int i = 0; i < num_; ++i) {
+    const Dtype bottom_coeff = (*top_diff) * coeff_;
+    switch (op_) {
+    case ReductionParameter_ReductionOp_SUM:
+    case ReductionParameter_ReductionOp_MEAN:
+      caffe_set(dim_, bottom_coeff, bottom_diff);
+      break;
+    case ReductionParameter_ReductionOp_ASUM:
+      caffe_cpu_sign(dim_, bottom_data, bottom_diff);
+      caffe_scal(dim_, bottom_coeff, bottom_diff);
+      break;
+    case ReductionParameter_ReductionOp_SUMSQ:
+      caffe_cpu_scale(dim_, 2 * bottom_coeff, bottom_data, bottom_diff);
+      break;
+    default:
+      LOG(FATAL) << "Unknown reduction op: "
+          << ReductionParameter_ReductionOp_Name(op_);
+    }
+    bottom_data += dim_;
+    bottom_diff += dim_;
+    ++top_diff;
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ReductionLayer);
+#endif
+
+INSTANTIATE_CLASS(ReductionLayer);
+REGISTER_LAYER_CLASS(Reduction);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/reduction_layer.cu b/src/caffe/layers/reduction_layer.cu
@@ -0,0 +1,93 @@
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  const Dtype* mult_data = NULL;
+  if (sum_multiplier_.count() > 0) {
+    mult_data = sum_multiplier_.gpu_data();
+  }
+  Dtype* top_data = top[0]->mutable_cpu_data();
+  for (int i = 0; i < num_; ++i) {
+    switch (op_) {
+    case ReductionParameter_ReductionOp_SUM:
+    case ReductionParameter_ReductionOp_MEAN:
+      caffe_gpu_dot(dim_, mult_data, bottom_data, top_data);
+      break;
+    case ReductionParameter_ReductionOp_ASUM:
+      caffe_gpu_asum(dim_, bottom_data, top_data);
+      break;
+    case ReductionParameter_ReductionOp_SUMSQ:
+      caffe_gpu_dot(dim_, bottom_data, bottom_data, top_data);
+      break;
+    default:
+      LOG(FATAL) << "Unknown reduction op: "
+          << ReductionParameter_ReductionOp_Name(op_);
+    }
+    bottom_data += dim_;
+    ++top_data;
+  }
+  if (coeff_ != Dtype(1)) {
+    // Reset the top_data pointer.
+    top_data = top[0]->mutable_gpu_data();
+    caffe_gpu_scal(num_, coeff_, top_data);
+  }
+}
+
+template <typename Dtype>
+void ReductionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (!propagate_down[0]) { return; }
+  // Get bottom_data, if needed.
+  const Dtype* bottom_data = NULL;
+  switch (op_) {
+  // Operations that don't need bottom_data
+  case ReductionParameter_ReductionOp_SUM:
+  case ReductionParameter_ReductionOp_MEAN:
+    break;
+  // Operations that need bottom_data
+  case ReductionParameter_ReductionOp_ASUM:
+  case ReductionParameter_ReductionOp_SUMSQ:
+    bottom_data = bottom[0]->gpu_data();
+    break;
+  default:
+    LOG(FATAL) << "Unknown reduction op: "
+        << ReductionParameter_ReductionOp_Name(op_);
+  }
+  const Dtype* top_diff = top[0]->cpu_diff();
+  Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+  for (int i = 0; i < num_; ++i) {
+    const Dtype bottom_coeff = (*top_diff) * coeff_;
+    switch (op_) {
+    case ReductionParameter_ReductionOp_SUM:
+    case ReductionParameter_ReductionOp_MEAN:
+      caffe_gpu_set(dim_, bottom_coeff, bottom_diff);
+      break;
+    case ReductionParameter_ReductionOp_ASUM:
+      caffe_gpu_sign(dim_, bottom_data, bottom_diff);
+      caffe_gpu_scal(dim_, bottom_coeff, bottom_diff);
+      break;
+    case ReductionParameter_ReductionOp_SUMSQ:
+      caffe_gpu_scale(dim_, 2 * bottom_coeff, bottom_data, bottom_diff);
+      break;
+    default:
+      LOG(FATAL) << "Unknown reduction op: "
+          << ReductionParameter_ReductionOp_Name(op_);
+    }
+    bottom_data += dim_;
+    bottom_diff += dim_;
+    ++top_diff;
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ReductionLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -269,7 +269,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 136 (last added: flatten_param)
+// LayerParameter next available layer-specific ID: 137 (last added: reduction_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -341,6 +341,7 @@ message LayerParameter {
   optional PowerParameter power_param = 122;
   optional PReLUParameter prelu_param = 131;
   optional PythonParameter python_param = 130;
+  optional ReductionParameter reduction_param = 136;
   optional ReLUParameter relu_param = 123;
   optional ReshapeParameter reshape_param = 133;
   optional SigmoidParameter sigmoid_param = 124;
@@ -704,6 +705,36 @@ message PythonParameter {
   optional string layer = 2;
 }
 
+// Message that stores parameters used by ReductionLayer
+message ReductionParameter {
+  enum ReductionOp {
+    SUM = 1;
+    ASUM = 2;
+    SUMSQ = 3;
+    MEAN = 4;
+  }
+
+  optional ReductionOp operation = 1 [default = SUM]; // reduction operation
+
+  // The first axis to reduce to a scalar -- may be negative to index from the
+  // end (e.g., -1 for the last axis).
+  // (Currently, only reduction along ALL "tail" axes is supported; reduction
+  // of axis M through N, where N < num_axes - 1, is unsupported.)
+  // Suppose we have an n-axis bottom Blob with shape:
+  //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
+  // If axis == m, the output Blob will have shape
+  //     (d0, d1, d2, ..., d(m-1)),
+  // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
+  // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
+  // If axis == 0 (the default), the output Blob always has the empty shape
+  // (count 1), performing reduction across the entire input --
+  // often useful for creating new loss functions.
+  optional int32 axis = 2 [default = 0];
+
+  optional float coeff = 3 [default = 1.0]; // coefficient for output
+}
+
+// Message that stores parameters used by ReLULayer
 message ReLUParameter {
   // Allow non-zero slope for negative inputs to speed up optimization
   // Described in: