[onert] Add FullyConnected Op on xnnpack (Samsung#5230)

Add FullyConnected Op on xnnpack for float32 ONE-DCO-1.0-Signed-off-by: Yongseop Kim <[email protected]>
sycomix · Dec 2, 2020 · d262184 · d262184
1 parent b425875
commit d262184
Show file tree

Hide file tree

Showing 8 changed files with 242 additions and 3 deletions.
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.cc b/runtime/onert/backend/xnnpack/ConstantInitializer.cc
@@ -75,6 +75,20 @@ void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
   registerExternalInitializer(bias_index, bias_obj);
 }
 
+void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
+{
+  const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
+  const auto &weight_obj = _operands.at(weight_index);
+  registerExternalInitializer(weight_index, weight_obj);
+
+  const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
+  if (!bias_index.undefined())
+  {
+    const auto &bias_obj = _operands.at(bias_index);
+    registerExternalInitializer(bias_index, bias_obj);
+  }
+}
+
 } // namespace xnnpack
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ConstantInitializer.h b/runtime/onert/backend/xnnpack/ConstantInitializer.h
@@ -47,6 +47,7 @@ class ConstantInitializer : public IConstantInitializer
 public:
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
 
 private:
   std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }

diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.cc b/runtime/onert/backend/xnnpack/KernelGenerator.cc
@@ -18,6 +18,7 @@
 
 #include "ops/ConvolutionLayer.h"
 #include "ops/DepthwiseConvolutionLayer.h"
+#include "ops/FullyConnectedLayer.h"
 
 #include <backend/Backend.h>
 #include <backend/IConfig.h>
@@ -169,6 +170,28 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   _return_fn = std::move(fn);
 }
 
+void KernelGenerator::visit(const ir::operation::FullyConnected &node)
+{
+  using ir::operation::FullyConnected;
+
+  const auto output_index{node.getOutputs().at(0)};
+  const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
+  const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
+  const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
+  const auto activation = node.param().activation;
+
+  auto output_tensor = _tensor_reg->getPortableTensor(output_index);
+  auto input_tensor = _tensor_reg->getPortableTensor(input_index);
+  auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
+  auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);
+
+  auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);
+
+  fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);
+
+  _return_fn = std::move(fn);
+}
+
 } // namespace xnnpack
 } // namespace backend
 } // namespace onert
diff --git a/runtime/onert/backend/xnnpack/KernelGenerator.h b/runtime/onert/backend/xnnpack/KernelGenerator.h
@@ -48,6 +48,7 @@ class KernelGenerator : public IKernelGenerator
   void visit(const ir::OpSequence &) override;
   void visit(const ir::operation::Conv2D &) override;
   void visit(const ir::operation::DepthwiseConv2D &) override;
+  void visit(const ir::operation::FullyConnected &) override;
 
 private:
   const ir::Operands &_ctx;

diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "ir/Padding.h"
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
+    : Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
+      _activation(ir::Activation::NONE)
+{
+  // DO NOTHING
+}
+
+void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
+                                    const IPortableTensor *bias, ir::Activation activation,
+                                    IPortableTensor *output)
+{
+  _input = input;
+  _kernel = weights;
+  _bias = bias;
+  _activation = activation;
+  _output = output;
+
+  // TODO Support not nhwc layer
+  assert(_input->layout() == ir::Layout::NHWC);
+
+  assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
+         _activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
+}
+
+void FullyConnectedLayer::run()
+{
+  assert(_external_context && _external_context->getThreadPool());
+  if (!_setup)
+  {
+    _setup = setup();
+    assert(_setup);
+  }
+
+  if (_input->data_type() == OperandType::FLOAT32)
+  {
+    enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
+    if (status != xnn_status_success)
+    {
+      throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
+    }
+  }
+  else
+  {
+    throw std::runtime_error{"XNNPACK FC: unsupported data type"};
+  }
+}
+
+bool FullyConnectedLayer::create()
+{
+  float output_activation_min = 0.f, output_activation_max = 0.f;
+  CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);
+
+  const auto &kernel_shape = _kernel->getShape();
+  assert(kernel_shape.rank() == 2);
+  uint32_t output_channels = kernel_shape.dim(0);
+  uint32_t input_channels = kernel_shape.dim(1);
+
+  const auto &input_shape = _input->getShape();
+  const auto &output_shape = _output->getShape();
+  uint32_t flag = 0;
+  if (input_shape.rank() != output_shape.rank())
+  {
+    flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
+    assert(input_shape.num_elements() % input_channels == 0);
+  }
+  else
+  {
+    assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
+  }
+
+  assert(_kernel && _kernel->buffer());
+  const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
+  const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;
+
+  enum xnn_status status = xnn_create_fully_connected_nc_f32(
+      input_channels, output_channels, input_channels /* input stride */,
+      output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
+      output_activation_max, flag, &_kernel_op);
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  assert(_kernel_op != nullptr);
+  return true;
+}
+
+bool FullyConnectedLayer::setup()
+{
+  if (_input->buffer() == nullptr || _output->buffer() == nullptr)
+  {
+    // it could be models's input or output
+    return false;
+  }
+
+  uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
+  enum xnn_status status = xnn_setup_fully_connected_nc_f32(
+      _kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
+      reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
+  if (status != xnn_status_success)
+  {
+    throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
+  }
+  return true;
+}
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
diff --git a/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h b/runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
+
+#include "Layer.h"
+
+#include <xnnpack.h>
+
+namespace onert
+{
+namespace backend
+{
+namespace xnnpack
+{
+namespace ops
+{
+
+class FullyConnectedLayer : public Layer
+{
+public:
+  FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);
+
+public:
+  void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
+                 const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);
+
+  void run() override;
+
+  bool create() override;
+  bool setup() override;
+
+private:
+  const IPortableTensor *_input;
+  const IPortableTensor *_kernel;
+  const IPortableTensor *_bias;
+  IPortableTensor *_output;
+
+  ir::Activation _activation;
+};
+
+} // namespace ops
+} // namespace xnnpack
+} // namespace backend
+} // namespace onert
+
+#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
diff --git a/runtime/onert/backend/xnnpack/ops/Layer.h b/runtime/onert/backend/xnnpack/ops/Layer.h
@@ -21,6 +21,7 @@
 #include <backend/IPortableTensor.h>
 #include "OperationUtils.h"
 #include "../ExternalContext.h"
+#include "../Tensor.h"
 
 #include <cassert>
 #include <memory>

diff --git a/tests/nnfw_api/src/one_op_tests/FullyConnected.cc b/tests/nnfw_api/src/one_op_tests/FullyConnected.cc
@@ -52,7 +52,7 @@ TEST_F(GenModelTest, OneOp_FullyConnected)
   _context = std::make_unique<GenModelTestContext>(cgen.finish());
   _context->addTestCase(
       uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
-  _context->setBackends({"cpu", "acl_neon"});
+  _context->setBackends({"cpu", "acl_neon", "xnnpack"});
 
   SUCCEED();
 }
@@ -173,7 +173,7 @@ TEST_F(GenModelTest, OneOp_FullyConnected_OptionalBias)
   _context->addTestCase(
       uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
                         {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
 
   SUCCEED();
 }
@@ -202,7 +202,7 @@ TEST_F(GenModelTest, neg_OneOp_FullyConnected_NoBias)
   _context->addTestCase(
       uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
                         {{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
-  _context->setBackends({"acl_cl", "acl_neon", "cpu"});
+  _context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
   _context->expectFailCompile();
 
   SUCCEED();