[Functions] Add +-*/ operators and reshape for FDTensor (PaddlePaddle…

…#655) * Add +-*/ functions * Add same dims test case for operations * add broadcast 0 * Add broadcast dim2 testcase * Add broadcast dim3 and dim4 testcase * Add +-*/ operators * Add mixed operation * refresh code style * Add reshape op * update code style
ccrrong · Nov 23, 2022 · de98163 · de98163
1 parent c11bfb8
commit de98163
Show file tree

Hide file tree

Showing 12 changed files with 1,163 additions and 126 deletions.
diff --git a/.clang-format b/.clang-format
@@ -149,7 +149,7 @@
 # SpaceBeforeRangeBasedForLoopColon: true
 # SpaceInEmptyBlock: false
 # SpaceInEmptyParentheses: false
-# SpacesBeforeTrailingComments: 1
+# SpacesBeforeTrailingComments: 2
 # SpacesInAngles:  Never
 # SpacesInConditionalStatement: false
 # SpacesInContainerLiterals: true

diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc
@@ -11,11 +11,11 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include <cstring>
-
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/core/float16.h"
 #include "fastdeploy/utils/utils.h"
+#include <algorithm>
+#include <cstring>
 #ifdef WITH_GPU
 #include <cuda_runtime_api.h>
 #endif
@@ -151,9 +151,63 @@ void FDTensor::Resize(const std::vector<int64_t>& new_shape,
   shape.assign(new_shape.begin(), new_shape.end());
 }
 
+bool FDTensor::Reshape(const std::vector<int64_t>& new_shape) {
+  int numel = Numel();
+  const int64_t unk_dim_val = -1;
+  const int64_t copy_dim_val = 0;
+
+  std::vector<int64_t> output_shape(new_shape.size(), 0);
+  int64_t capacity = 1;
+  int unk_dim_idx = -1;
+  for (size_t i = 0; i < new_shape.size(); ++i) {
+    if (new_shape[i] == unk_dim_val) {
+      FDASSERT(unk_dim_idx == -1,
+               "Only one dimension value of 'shape' in ReshapeOp can "
+               "be -1. But received shape = [%s], shape[%d] is also -1.",
+               Str(new_shape).c_str(), i);
+      unk_dim_idx = i;
+    } else if (new_shape[i] == copy_dim_val) {
+      FDASSERT(i < shape.size(),
+               "The index of 0 in `shape` must be less than "
+               "the input tensor X's dimensions. "
+               "But received shape = [%s], shape[%d] = 0, X's shape = [%s], "
+               "X's dimensions = %d.",
+               Str(new_shape).c_str(), i, Str(shape).c_str(), shape.size());
+    } else {
+      FDASSERT(new_shape[i] > 0,
+               "Each dimension value of 'shape' in ReshapeOp must not "
+               "be negative except one unknown dimension. "
+               "But received  shape = [%s], shape[%d] = %d.",
+               Str(new_shape).c_str(), i, new_shape[i]);
+    }
+    capacity *= (new_shape[i] ? new_shape[i] : shape[i]);
+    output_shape[i] = (new_shape[i] ? new_shape[i] : shape[i]);
+  }
+  if (unk_dim_idx != -1) {
+    output_shape[unk_dim_idx] = -numel / capacity;
+    FDASSERT(output_shape[unk_dim_idx] * capacity == -numel,
+             "The 'shape' attribute in ReshapeOp is invalid. "
+             "The input tensor X'size must be divisible by known "
+             "capacity of 'shape'. "
+             "But received X's shape = [%s], X's size = %d, "
+             "'shape' is [%s], known capacity of 'shape' is %d.",
+             Str(shape).c_str(), numel, Str(new_shape).c_str(), capacity);
+  } else {
+    FDASSERT(numel == capacity,
+             "The 'shape' in ReshapeOp is invalid. "
+             "The input tensor X'size must be equal to the capacity of "
+             "'shape'. "
+             "But received X's shape = [%s], X's size = %d, 'shape' is "
+             "[%s], the capacity of 'shape' is %d.",
+             Str(shape).c_str(), numel, Str(shape).c_str(), capacity);
+  }
+  shape = output_shape;
+  return true;
+}
+
 template <typename T>
-void CalculateStatisInfo(const void* src_ptr, int size, double* mean, double* max,
-                         double* min) {
+void CalculateStatisInfo(const void* src_ptr, int size, double* mean,
+                         double* max, double* min) {
   const T* ptr = static_cast<const T*>(src_ptr);
   *mean = 0;
   *max = -99999999;
@@ -213,10 +267,9 @@ bool FDTensor::ReallocFn(size_t nbytes) {
     }
     return buffer_ != nullptr;
 #else
-    FDASSERT(false,
-             "The FastDeploy FDTensor allocator didn't compile under "
-             "-DWITH_GPU=ON,"
-             "so this is an unexpected problem happend.");
+    FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under "
+                    "-DWITH_GPU=ON,"
+                    "so this is an unexpected problem happend.");
 #endif
   } else {
     if (is_pinned_memory) {
@@ -230,10 +283,9 @@ bool FDTensor::ReallocFn(size_t nbytes) {
       }
       return buffer_ != nullptr;
 #else
-      FDASSERT(false,
-               "The FastDeploy FDTensor allocator didn't compile under "
-               "-DWITH_GPU=ON,"
-               "so this is an unexpected problem happend.");
+      FDASSERT(false, "The FastDeploy FDTensor allocator didn't compile under "
+                      "-DWITH_GPU=ON,"
+                      "so this is an unexpected problem happend.");
 #endif
     }
     buffer_ = realloc(buffer_, nbytes);
@@ -242,7 +294,8 @@ bool FDTensor::ReallocFn(size_t nbytes) {
 }
 
 void FDTensor::FreeFn() {
-  if (external_data_ptr != nullptr) external_data_ptr = nullptr;
+  if (external_data_ptr != nullptr)
+    external_data_ptr = nullptr;
   if (buffer_ != nullptr) {
     if (device == Device::GPU) {
 #ifdef WITH_GPU
@@ -293,11 +346,8 @@ void FDTensor::CopyBuffer(void* dst, const void* src, size_t nbytes,
 FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; }
 
 FDTensor::FDTensor(const FDTensor& other)
-    : shape(other.shape),
-      name(other.name),
-      dtype(other.dtype),
-      device(other.device),
-      external_data_ptr(other.external_data_ptr) {
+    : shape(other.shape), name(other.name), dtype(other.dtype),
+      device(other.device), external_data_ptr(other.external_data_ptr) {
   // Copy buffer
   if (other.buffer_ == nullptr) {
     buffer_ = nullptr;
@@ -310,12 +360,9 @@ FDTensor::FDTensor(const FDTensor& other)
 }
 
 FDTensor::FDTensor(FDTensor&& other)
-    : buffer_(other.buffer_),
-      shape(std::move(other.shape)),
-      name(std::move(other.name)),
-      dtype(other.dtype),
-      external_data_ptr(other.external_data_ptr),
-      device(other.device) {
+    : buffer_(other.buffer_), shape(std::move(other.shape)),
+      name(std::move(other.name)), dtype(other.dtype),
+      external_data_ptr(other.external_data_ptr), device(other.device) {
   other.name = "";
   // Note(zhoushunjie): Avoid double free.
   other.buffer_ = nullptr;

diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h
@@ -57,9 +57,7 @@ struct FASTDEPLOY_DECL FDTensor {
 
   void* Data();
 
-  bool IsShared() {
-    return external_data_ptr != nullptr;
-  }
+  bool IsShared() { return external_data_ptr != nullptr; }
 
   void StopSharing();
 
@@ -116,6 +114,7 @@ struct FASTDEPLOY_DECL FDTensor {
               const FDDataType& data_type, const std::string& tensor_name = "",
               const Device& new_device = Device::CPU);
 
+  bool Reshape(const std::vector<int64_t>& new_shape);
   // Debug function
   // Use this function to print shape, dtype, mean, max, min
   // prefix will also be printed as tag
@@ -141,7 +140,7 @@ struct FASTDEPLOY_DECL FDTensor {
 
   static void CopyBuffer(void* dst, const void* src, size_t nbytes,
                          const Device& device = Device::CPU,
-                        bool is_pinned_memory = false);
+                         bool is_pinned_memory = false);
 };
 
 }  // namespace fastdeploy
diff --git a/fastdeploy/function/concat.cc b/fastdeploy/function/concat.cc
@@ -14,26 +14,17 @@
 
 #include "fastdeploy/function/concat.h"
 
+#include "fastdeploy/utils/utils.h"
 #include <cstring>
 #include <limits>
 #include <set>
 #include <sstream>
-#include "fastdeploy/utils/utils.h"
 
 namespace fastdeploy {
 namespace function {
-std::string Str(const std::vector<int64_t>& shape) {
-  std::ostringstream oss;
-  oss << "[ " << shape[0];
-  for (int i = 1; i < shape.size(); ++i) {
-    oss << " ," << shape[i];
-  }
-  oss << " ]";
-  return oss.str();
-}
 
-std::vector<int64_t> ComputeAndCheckConcatOutputShape(
-    const std::vector<FDTensor>& input, int axis) {
+std::vector<int64_t>
+ComputeAndCheckConcatOutputShape(const std::vector<FDTensor>& input, int axis) {
   const size_t n = input.size();
   auto out_dims = input[0].shape;
   size_t in_zero_dims_size = out_dims.size();
@@ -58,8 +49,7 @@ std::vector<int64_t> ComputeAndCheckConcatOutputShape(
   return out_dims;
 }
 
-template <typename T>
-struct ConcatFunctor {
+template <typename T> struct ConcatFunctor {
   void operator()(const std::vector<FDTensor>& input, int axis,
                   FDTensor* output) {
     size_t num = input.size();

diff --git a/fastdeploy/function/elementwise.cc b/fastdeploy/function/elementwise.cc
@@ -0,0 +1,75 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/function/elementwise.h"
+#include "fastdeploy/function/eigen.h"
+#include "fastdeploy/function/elementwise_base.h"
+#include "fastdeploy/function/elementwise_functor.h"
+#include "fastdeploy/utils/utils.h"
+#include <algorithm>
+
+namespace fastdeploy {
+namespace function {
+
+DEFINE_ELEMENTWISE_OP(Add);
+DEFINE_ELEMENTWISE_OP(Multiply);
+DEFINE_ELEMENTWISE_OP(Subtract);
+DEFINE_ELEMENTWISE_OP(Divide);
+
+void Add(const FDTensor& x, const FDTensor& y, FDTensor* out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "AddRawKernel",
+                     ([&] { AddRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+FDTensor operator+(const FDTensor& x, const FDTensor& y) {
+  FDTensor out;
+  Add(x, y, &out);
+  return out;
+}
+
+void Subtract(const FDTensor& x, const FDTensor& y, FDTensor* out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "SubtractRawKernel",
+                     ([&] { SubtractRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+FDTensor operator-(const FDTensor& x, const FDTensor& y) {
+  FDTensor out;
+  Subtract(x, y, &out);
+  return out;
+}
+
+void Multiply(const FDTensor& x, const FDTensor& y, FDTensor* out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "MultiplyRawKernel",
+                     ([&] { MultiplyRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+FDTensor operator*(const FDTensor& x, const FDTensor& y) {
+  FDTensor out;
+  Multiply(x, y, &out);
+  return out;
+}
+
+void Divide(const FDTensor& x, const FDTensor& y, FDTensor* out) {
+  FD_VISIT_ALL_TYPES(x.dtype, "DivideRawKernel",
+                     ([&] { DivideRawKernel<data_t>()(x, y, -1, out); }));
+}
+
+FDTensor operator/(const FDTensor& x, const FDTensor& y) {
+  FDTensor out;
+  Divide(x, y, &out);
+  return out;
+}
+
+}  // namespace function
+}  // namespace fastdeploy
diff --git a/fastdeploy/function/elementwise.h b/fastdeploy/function/elementwise.h
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/core/fd_tensor.h"
+
+namespace fastdeploy {
+namespace function {
+
+/** Excute the add operation for input FDTensors. *out = x + y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+FASTDEPLOY_DECL void Add(const FDTensor& x, const FDTensor& y, FDTensor* out);
+
+FASTDEPLOY_DECL FDTensor operator+(const FDTensor& x, const FDTensor& y);
+
+/** Excute the subtract operation for input FDTensors.  *out = x - y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+FASTDEPLOY_DECL void Subtract(const FDTensor& x, const FDTensor& y,
+                              FDTensor* out);
+
+FASTDEPLOY_DECL FDTensor operator-(const FDTensor& x, const FDTensor& y);
+
+/** Excute the multiply operation for input FDTensors.  *out = x * y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+FASTDEPLOY_DECL void Multiply(const FDTensor& x, const FDTensor& y,
+                              FDTensor* out);
+
+FASTDEPLOY_DECL FDTensor operator*(const FDTensor& x, const FDTensor& y);
+/** Excute the divide operation for input FDTensors.  *out = x / y.
+    @param x The input tensor.
+    @param y The input tensor.
+    @param out The output tensor which stores the result.
+*/
+FASTDEPLOY_DECL void Divide(const FDTensor& x, const FDTensor& y,
+                            FDTensor* out);
+FASTDEPLOY_DECL FDTensor operator/(const FDTensor& x, const FDTensor& y);
+
+}  // namespace function
+}  // namespace fastdeploy