Skip to content

Commit

Permalink
[onert] Support Conv2D int8 channel-wise quantization (Samsung#5619)
Browse files Browse the repository at this point in the history
: Introduce convQuant8PerChannel
: Add two positive and two negative testcases for int8

ONE-DCO-1.0-Signed-off-by: Sanggyu Lee <[email protected]>
  • Loading branch information
glistening authored Jan 12, 2021
1 parent a9c9c9b commit 207f8ee
Show file tree
Hide file tree
Showing 6 changed files with 297 additions and 0 deletions.
39 changes: 39 additions & 0 deletions compute/cker/include/cker/operation/Conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,33 @@ class Conv
}
}

void getQuantizedConvolutionMultipliersAndShifts(float input_scale, float output_scale,
const float *filter_scales,
size_t filter_scales_size, int num_channels)
{
// Originates from tflite's PopulateConvolutionQuantizationParams()
_per_channel_output_multiplier.resize(num_channels);
_per_channel_output_shift.resize(num_channels);

const bool is_per_channel = filter_scales_size > 1;
auto per_channel_multiplier = _per_channel_output_multiplier.data();
auto per_channel_shift = _per_channel_output_shift.data();
for (int i = 0; i < num_channels; ++i)
{
// If per-tensor quantization parameter is specified, broadcast it along the
// quantization dimension (channels_out).
const float scale = is_per_channel ? filter_scales[i] : filter_scales[0];
const double filter_scale = static_cast<double>(scale);
const double effective_output_scale =
static_cast<double>(input_scale) * filter_scale / static_cast<double>(output_scale);
int32_t significand;
int channel_shift;
QuantizeMultiplier(effective_output_scale, &significand, &channel_shift);
per_channel_multiplier[i] = significand;
per_channel_shift[i] = channel_shift;
}
}

void operator()(const ConvParams &params, const Shape &input_shape, const float *input_data,
const Shape &filter_shape, const float *filter_data, const Shape &bias_shape,
const float *bias_data, const Shape &output_shape, float *output_data)
Expand Down Expand Up @@ -138,6 +165,15 @@ class Conv
}
}

void operator()(const ConvParams &params, const Shape &input_shape, const int8_t *input_data,
const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
{
reference::Conv(params, _per_channel_output_multiplier.data(), _per_channel_output_shift.data(),
input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data,
output_shape, output_data);
}

private:
bool usableMultiThreaded(PaddingType padding_type, uint32_t dilation_width_factor,
int32_t dilation_height_factor)
Expand Down Expand Up @@ -180,6 +216,9 @@ class Conv
Shape _im2col_shape;
bool _need_im2col;
bool _prepared;
// Per channel output multiplier and shift.
std::vector<int32_t> _per_channel_output_multiplier;
std::vector<int> _per_channel_output_shift;
};
} // namespace cker
} // namespace nnfw
Expand Down
110 changes: 110 additions & 0 deletions compute/cker/include/cker/operation/reference/Conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,116 @@ inline void Conv(const ConvParams &params, const Shape &input_shape, const uint8
}
}

inline void Conv(const ConvParams &params, const int32_t *output_multiplier,
const int32_t *output_shift, const Shape &input_shape, const int8_t *input_data,
const Shape &filter_shape, const int8_t *filter_data, const Shape &bias_shape,
const int32_t *bias_data, const Shape &output_shape, int8_t *output_data)
{
UNUSED_RELEASE(bias_shape);
// Get parameters.
const int32_t input_offset = params.input_offset; // r = s(q - Z)
const int stride_width = params.stride_width;
const int stride_height = params.stride_height;
const int dilation_width_factor = params.dilation_width_factor;
const int dilation_height_factor = params.dilation_height_factor;
const int pad_width = params.padding_values.width;
const int pad_height = params.padding_values.height;
const int32_t output_offset = params.output_offset;

// Set min and max value of the output.
const int32_t output_activation_min = params.quantized_activation_min;
const int32_t output_activation_max = params.quantized_activation_max;

// Consistency check.
assert(output_activation_min < output_activation_max);
assert(input_shape.DimensionsCount() == 4);
assert(filter_shape.DimensionsCount() == 4);
assert(output_shape.DimensionsCount() == 4);
const int batches = MatchingDim(input_shape, 0, output_shape, 0);
const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
if (bias_data)
{
assert(bias_shape.FlatSize() == output_depth);
}

// Check dimensions of the tensors.
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
for (int batch = 0; batch < batches; ++batch)
{
for (int out_y = 0; out_y < output_height; ++out_y)
{
const int in_y_origin = (out_y * stride_height) - pad_height;
for (int out_x = 0; out_x < output_width; ++out_x)
{
const int in_x_origin = (out_x * stride_width) - pad_width;
for (int out_channel = 0; out_channel < output_depth; ++out_channel)
{
int32_t acc = 0;
for (int filter_y = 0; filter_y < filter_height; ++filter_y)
{
const int in_y = in_y_origin + dilation_height_factor * filter_y;
for (int filter_x = 0; filter_x < filter_width; ++filter_x)
{
const int in_x = in_x_origin + dilation_width_factor * filter_x;

// Zero padding by omitting the areas outside the image.
const bool is_point_inside_image =
(in_x >= 0) && (in_x < input_width) && (in_y >= 0) && (in_y < input_height);

if (!is_point_inside_image)
{
continue;
}

for (int in_channel = 0; in_channel < input_depth; ++in_channel)
{
int32_t input_val = input_data[Offset(input_shape, batch, in_y, in_x, in_channel)];
int32_t filter_val =
filter_data[Offset(filter_shape, out_channel, filter_y, filter_x, in_channel)];
// Accumulate with 32 bits accumulator.
// In the nudging process during model quantization, we force
// real value of 0.0 be represented by a quantized value. This
// guarantees that the input_offset is a int8_t, even though
// it is represented using int32_t. int32_t += int8_t *
// (int8_t - int8_t) so the highest value we can get from each
// accumulation is [-127, 127] * ([-128, 127] -
// [-128, 127]), which is [-32512, 32512]. log2(32512)
// = 14.98, which means we can accumulate at least 2^16
// multiplications without overflow. The accumulator is
// applied to a filter so the accumulation logic will hold as
// long as the filter size (filter_y * filter_x * in_channel)
// does not exceed 2^16, which is the case in all the models
// we have seen so far.
// TODO(jianlijianli): Add a check to make sure the
// accumulator depth is smaller than 2^16.
acc += filter_val * (input_val + input_offset);
}
}
}

if (bias_data)
{
acc += bias_data[out_channel];
}
acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[out_channel],
output_shift[out_channel]);
acc += output_offset;
acc = std::max(acc, output_activation_min);
acc = std::min(acc, output_activation_max);
output_data[Offset(output_shape, batch, out_y, out_x, out_channel)] =
static_cast<int8_t>(acc);
}
}
}
}
}

} // namespace reference
} // namespace cker
} // namespace nnfw
Expand Down
43 changes: 43 additions & 0 deletions runtime/onert/backend/cpu/ops/ConvolutionLayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,32 @@ void ConvolutionLayer::convQuant8()
getBuffer<uint8_t>(_output));
}

void ConvolutionLayer::convQuant8PerChannel()
{
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
&output_activation_max);

nnfw::cker::ConvParams op_params;
op_params.input_offset = -_input->data_zero_point();
op_params.output_offset = _output->data_zero_point();
op_params.stride_height = _strideHeight;
op_params.stride_width = _strideWidth;
op_params.dilation_height_factor = _dilationHeightFactor;
op_params.dilation_width_factor = _dilationWidthFactor;
op_params.padding_values.height = _paddingTop;
op_params.padding_values.width = _paddingLeft;
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;

nnfw::cker::Conv &kernel = *_conv_kernel;
kernel(op_params, getShape(_input), reinterpret_cast<const int8_t *>(_input->buffer()),
getShape(_kernel), reinterpret_cast<const int8_t *>(_kernel->buffer()), getShape(_bias),
reinterpret_cast<const int32_t *>(_bias->buffer()), getShape(_output),
reinterpret_cast<int8_t *>(_output->buffer()));
}

void ConvolutionLayer::configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, const ir::PaddingType paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight,
Expand Down Expand Up @@ -164,6 +190,10 @@ void ConvolutionLayer::run()
{
convQuant8();
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
convQuant8PerChannel();
}
else
{
throw std::runtime_error{"Conv: unsupported data type"};
Expand Down Expand Up @@ -197,6 +227,19 @@ void ConvolutionLayer::prepare()
kernel.prepareQuant(getShape(_input), getShape(_kernel), getShape(_output), _strideWidth,
_strideHeight, _dilationWidthFactor, _dilationHeightFactor);
}
else if (_input->data_type() == OperandType::QUANT_INT8_ASYMM)
{
if (_kernel->is_constant() && !_input->is_dynamic() && !_output->is_dynamic())
{
kernel.getQuantizedConvolutionMultipliersAndShifts(
_input->data_scale(), _output->data_scale(), _kernel->data_scales().data(),
_kernel->data_scales().size(), getShape(_kernel).Dims(0));
}
else
{
throw std::runtime_error{"Conv2D: Int8 dynamic weight is not supported"};
}
}
_prepare = true;
}

Expand Down
2 changes: 2 additions & 0 deletions runtime/onert/backend/cpu/ops/ConvolutionLayer.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class ConvolutionLayer : public ::onert::exec::IFunction

void convQuant8();

void convQuant8PerChannel();

void configure(const IPortableTensor *input, const IPortableTensor *kernel,
const IPortableTensor *bias, ir::PaddingType _paddingType,
const uint32_t paddingLeft, const uint32_t paddingRight, const uint32_t paddingTop,
Expand Down
7 changes: 7 additions & 0 deletions runtime/onert/core/src/ir/OperationValidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ void OperationValidator::visit(const operation::Concat &node)
void OperationValidator::visit(const operation::Conv2D &node)
{
const auto input_index{node.getInputs().at(operation::Conv2D::Input::INPUT)};
const auto kernel_index{node.getInputs().at(operation::Conv2D::Input::KERNEL)};
const auto output_index{node.getOutputs().at(0)};

uint32_t stride_horizontal = node.param().stride.horizontal;
Expand All @@ -187,6 +188,12 @@ void OperationValidator::visit(const operation::Conv2D &node)
OP_REQUIRES((stride_horizontal > 0) && (stride_vertical > 0));
OP_REQUIRES((dilation_width > 0) && (dilation_height > 0));
OP_REQUIRES(isSameType(input_index, output_index));

if (isConstant(kernel_index) && operandType(kernel_index) == DataType::QUANT_INT8_ASYMM)
{
for (const auto zeropoint : _operands.at(kernel_index).typeInfo().zero_points())
OP_REQUIRES(zeropoint == 0);
}
}

void OperationValidator::visit(const operation::DepthToSpace &node)
Expand Down
96 changes: 96 additions & 0 deletions tests/nnfw_api/src/one_op_tests/Conv2D.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,54 @@ TEST_F(GenModelTest, OneOp_Conv2D_Dilation)
SUCCEED();
}

TEST_F(GenModelTest, OneOp_Conv2D_I8)
{
CircleGen cgen;
std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<int32_t> bias_data{0, 2, 4};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
int weight =
cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 0);
int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 38, 61}}));
_context->setBackends({"cpu"});

SUCCEED();
}

TEST_F(GenModelTest, OneOp_Conv2D_I8_PerChannel)
{
CircleGen cgen;
std::vector<int8_t> weight_data{1, 2, 3, 1, 2, 3, 7, 8, 9};
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<int32_t> bias_data{0, 0, 0};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
std::vector<float> weight_scales = {0.5, 1, 0.5};
std::vector<int64_t> weight_zeropoints = {0, 0, 0};
int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
weight_scales, weight_zeropoints);
int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->addTestCase(uniformTCD<int8_t>({{10, 10, 10}}, {{15, 30, 60}}));
_context->setBackends({"cpu"});

SUCCEED();
}

TEST_F(GenModelTest, neg_OneOp_Conv2D_Type)
{
CircleGen cgen;
Expand Down Expand Up @@ -150,3 +198,51 @@ TEST_F(GenModelTest, neg_OneOp_Conv2D_Dilation)

SUCCEED();
}

TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoint)
{
CircleGen cgen;
std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<int32_t> bias_data{0, 2, 4};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
int weight =
cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf}, 0.5, 17);
int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 1.0, 0);
cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->setBackends({"cpu"});
_context->expectFailModelLoad();

SUCCEED();
}

TEST_F(GenModelTest, neg_OneOp_Conv2D_I8_NonZero_ZeroPoints)
{
CircleGen cgen;
std::vector<int8_t> weight_data{1, 2, 3, 4, 5, 6, 7, 8, 9};
uint32_t weight_buf = cgen.addBuffer(weight_data);
std::vector<int32_t> bias_data{0, 2, 4};
uint32_t bias_buf = cgen.addBuffer(bias_data);
int in = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT8}, 0.5, 0);
std::vector<float> weight_scales = {0.5, 1, 0.5};
std::vector<int64_t> weight_zeropoints = {0, 0, 10};
int weight = cgen.addTensor({{3, 1, 1, 3}, circle::TensorType::TensorType_INT8, weight_buf},
weight_scales, weight_zeropoints);
int bias = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_INT32, bias_buf}, 1.0, 0);
int out = cgen.addTensor({{1, 1, 1, 3}, circle::TensorType::TensorType_FLOAT32}, 1.0, 0);
cgen.addOperatorConv2D({{in, weight, bias}, {out}}, circle::Padding_VALID, 1, 1,
circle::ActivationFunctionType_NONE);
cgen.setInputsAndOutputs({in}, {out});

_context = std::make_unique<GenModelTestContext>(cgen.finish());
_context->setBackends({"cpu"});
_context->expectFailModelLoad();

SUCCEED();
}

0 comments on commit 207f8ee

Please sign in to comment.