Skip to content

Commit

Permalink
[onert] Add FullyConnected Op on xnnpack (Samsung#5230)
Browse files Browse the repository at this point in the history
Add FullyConnected Op on xnnpack for float32

ONE-DCO-1.0-Signed-off-by: Yongseop Kim <[email protected]>
  • Loading branch information
YongseopKim authored Dec 2, 2020
1 parent b425875 commit d262184
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 3 deletions.
14 changes: 14 additions & 0 deletions runtime/onert/backend/xnnpack/ConstantInitializer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@ void ConstantInitializer::visit(const ir::operation::DepthwiseConv2D &node)
registerExternalInitializer(bias_index, bias_obj);
}

void ConstantInitializer::visit(const ir::operation::FullyConnected &node)
{
const auto &weight_index = node.getInputs().at(ir::operation::FullyConnected::WEIGHT);
const auto &weight_obj = _operands.at(weight_index);
registerExternalInitializer(weight_index, weight_obj);

const auto &bias_index = node.getInputs().at(ir::operation::FullyConnected::BIAS);
if (!bias_index.undefined())
{
const auto &bias_obj = _operands.at(bias_index);
registerExternalInitializer(bias_index, bias_obj);
}
}

} // namespace xnnpack
} // namespace backend
} // namespace onert
1 change: 1 addition & 0 deletions runtime/onert/backend/xnnpack/ConstantInitializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class ConstantInitializer : public IConstantInitializer
public:
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
void visit(const ir::operation::FullyConnected &) override;

private:
std::shared_ptr<ITensorRegistry> tensor_registry() const override { return _tensor_reg; }
Expand Down
23 changes: 23 additions & 0 deletions runtime/onert/backend/xnnpack/KernelGenerator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "ops/ConvolutionLayer.h"
#include "ops/DepthwiseConvolutionLayer.h"
#include "ops/FullyConnectedLayer.h"

#include <backend/Backend.h>
#include <backend/IConfig.h>
Expand Down Expand Up @@ -169,6 +170,28 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
_return_fn = std::move(fn);
}

void KernelGenerator::visit(const ir::operation::FullyConnected &node)
{
using ir::operation::FullyConnected;

const auto output_index{node.getOutputs().at(0)};
const auto input_index{node.getInputs().at(FullyConnected::Input::INPUT)};
const auto weight_index{node.getInputs().at(FullyConnected::Input::WEIGHT)};
const auto bias_index{node.getInputs().at(FullyConnected::Input::BIAS)};
const auto activation = node.param().activation;

auto output_tensor = _tensor_reg->getPortableTensor(output_index);
auto input_tensor = _tensor_reg->getPortableTensor(input_index);
auto weight_tensor = _tensor_reg->getPortableTensor(weight_index);
auto bias_tensor = bias_index.undefined() ? nullptr : _tensor_reg->getPortableTensor(bias_index);

auto fn = std::make_unique<ops::FullyConnectedLayer>(_external_context);

fn->configure(input_tensor, weight_tensor, bias_tensor, activation, output_tensor);

_return_fn = std::move(fn);
}

} // namespace xnnpack
} // namespace backend
} // namespace onert
1 change: 1 addition & 0 deletions runtime/onert/backend/xnnpack/KernelGenerator.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class KernelGenerator : public IKernelGenerator
void visit(const ir::OpSequence &) override;
void visit(const ir::operation::Conv2D &) override;
void visit(const ir::operation::DepthwiseConv2D &) override;
void visit(const ir::operation::FullyConnected &) override;

private:
const ir::Operands &_ctx;
Expand Down
138 changes: 138 additions & 0 deletions runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "FullyConnectedLayer.h"

#include "ir/Padding.h"

namespace onert
{
namespace backend
{
namespace xnnpack
{
namespace ops
{

FullyConnectedLayer::FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context)
: Layer(external_context), _input(nullptr), _kernel(nullptr), _bias(nullptr), _output(nullptr),
_activation(ir::Activation::NONE)
{
// DO NOTHING
}

void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
const IPortableTensor *bias, ir::Activation activation,
IPortableTensor *output)
{
_input = input;
_kernel = weights;
_bias = bias;
_activation = activation;
_output = output;

// TODO Support not nhwc layer
assert(_input->layout() == ir::Layout::NHWC);

assert(_activation == ir::Activation::NONE || _activation == ir::Activation::RELU ||
_activation == ir::Activation::RELU1 || _activation == ir::Activation::RELU6);
}

void FullyConnectedLayer::run()
{
assert(_external_context && _external_context->getThreadPool());
if (!_setup)
{
_setup = setup();
assert(_setup);
}

if (_input->data_type() == OperandType::FLOAT32)
{
enum xnn_status status = xnn_run_operator(_kernel_op, _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to run FP32 FullyConnected operator"};
}
}
else
{
throw std::runtime_error{"XNNPACK FC: unsupported data type"};
}
}

bool FullyConnectedLayer::create()
{
float output_activation_min = 0.f, output_activation_max = 0.f;
CalculateActivationRange<float>(_activation, &output_activation_min, &output_activation_max);

const auto &kernel_shape = _kernel->getShape();
assert(kernel_shape.rank() == 2);
uint32_t output_channels = kernel_shape.dim(0);
uint32_t input_channels = kernel_shape.dim(1);

const auto &input_shape = _input->getShape();
const auto &output_shape = _output->getShape();
uint32_t flag = 0;
if (input_shape.rank() != output_shape.rank())
{
flag |= XNN_FLAG_TENSORFLOW_RESHAPE_2D;
assert(input_shape.num_elements() % input_channels == 0);
}
else
{
assert(static_cast<uint32_t>(input_shape.dim(input_shape.rank() - 1)) == input_channels);
}

assert(_kernel && _kernel->buffer());
const float *kernel_buffer = reinterpret_cast<const float *>(_kernel->buffer());
const float *bias_buffer = (_bias) ? reinterpret_cast<const float *>(_bias->buffer()) : nullptr;

enum xnn_status status = xnn_create_fully_connected_nc_f32(
input_channels, output_channels, input_channels /* input stride */,
output_channels /* output stride */, kernel_buffer, bias_buffer, output_activation_min,
output_activation_max, flag, &_kernel_op);
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
}
assert(_kernel_op != nullptr);
return true;
}

bool FullyConnectedLayer::setup()
{
if (_input->buffer() == nullptr || _output->buffer() == nullptr)
{
// it could be models's input or output
return false;
}

uint32_t batch_size = _input->getShape().num_elements() / _kernel->getShape().dim(1);
enum xnn_status status = xnn_setup_fully_connected_nc_f32(
_kernel_op, batch_size, reinterpret_cast<const float *>(_input->buffer()),
reinterpret_cast<float *>(_output->buffer()), _external_context->getThreadPool());
if (status != xnn_status_success)
{
throw std::runtime_error{"failed to create FP32 FullyConnected operator"};
}
return true;
}

} // namespace ops
} // namespace xnnpack
} // namespace backend
} // namespace onert
61 changes: 61 additions & 0 deletions runtime/onert/backend/xnnpack/ops/FullyConnectedLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
#define __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__

#include "Layer.h"

#include <xnnpack.h>

namespace onert
{
namespace backend
{
namespace xnnpack
{
namespace ops
{

class FullyConnectedLayer : public Layer
{
public:
FullyConnectedLayer(const std::shared_ptr<ExternalContext> external_context);

public:
void configure(const IPortableTensor *input, const IPortableTensor *_kernel,
const IPortableTensor *bias, ir::Activation activation, IPortableTensor *output);

void run() override;

bool create() override;
bool setup() override;

private:
const IPortableTensor *_input;
const IPortableTensor *_kernel;
const IPortableTensor *_bias;
IPortableTensor *_output;

ir::Activation _activation;
};

} // namespace ops
} // namespace xnnpack
} // namespace backend
} // namespace onert

#endif // __ONERT_BACKEND_XNNPACK_OPS_FULLY_CONNECTED_LAYER_H__
1 change: 1 addition & 0 deletions runtime/onert/backend/xnnpack/ops/Layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <backend/IPortableTensor.h>
#include "OperationUtils.h"
#include "../ExternalContext.h"
#include "../Tensor.h"

#include <cassert>
#include <memory>
Expand Down
6 changes: 3 additions & 3 deletions tests/nnfw_api/src/one_op_tests/FullyConnected.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TEST_F(GenModelTest, OneOp_FullyConnected)
_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(
uniformTCD<float>({{1, 3, 2, 1}}, {{2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 5, 2, 1, 5, 6}}));
_context->setBackends({"cpu", "acl_neon"});
_context->setBackends({"cpu", "acl_neon", "xnnpack"});

SUCCEED();
}
Expand Down Expand Up @@ -173,7 +173,7 @@ TEST_F(GenModelTest, OneOp_FullyConnected_OptionalBias)
_context->addTestCase(
uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
{{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
_context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});

SUCCEED();
}
Expand Down Expand Up @@ -202,7 +202,7 @@ TEST_F(GenModelTest, neg_OneOp_FullyConnected_NoBias)
_context->addTestCase(
uniformTCD<float>({{3, -1, -1, 1, -2, 0, -2, 1}},
{{-4, -2, 9, -6, 8, 13, 5, 18, 5, -3, -7, -2, -16, -5, -1, -1}}));
_context->setBackends({"acl_cl", "acl_neon", "cpu"});
_context->setBackends({"acl_cl", "acl_neon", "cpu", "xnnpack"});
_context->expectFailCompile();

SUCCEED();
Expand Down

0 comments on commit d262184

Please sign in to comment.