Skip to content

Commit

Permalink
[BYOC][ACL] Add support for dense (fully connected) layer (apache#6254)
Browse files Browse the repository at this point in the history
* [BYOC][ACL] Add support for dense (fully connected) layer

This patch adds the ability to offload dense (or fully connected) operators to ACL.

For fp32 a single dense layer can be offloaded, or the composite variant: nn.dense, nn.bias_add? (ACL does not currently offer fused activation).
For uint8: qnn.dense, nn.bias_add?, qnn.requantize

Change-Id: I83ea00b2aa6bdc5d9ef5cd6d54bbf981e523bd14

* Don't offload dense layer with unsupported datatype

Change-Id: I856eb2298499fdf22c172ba7f85d21033d3cc920
  • Loading branch information
lhutton1 authored Aug 13, 2020
1 parent 16b2a4b commit 15eef5c
Show file tree
Hide file tree
Showing 8 changed files with 527 additions and 4 deletions.
7 changes: 7 additions & 0 deletions docs/deploy/arm_compute_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,13 @@ Operator support
| | |
| | (only groups = 1 supported) |
+--------------+-------------------------------------------------------------------------+
| nn.dense | fp32: |
| | Simple: nn.dense |
| | Composite: nn.dense, nn.bias_add? |
+--------------+-------------------------------------------------------------------------+
| qnn.dense | uint8: |
| | Composite: qnn.dense, nn.bias_add?, qnn.requantize |
+--------------+-------------------------------------------------------------------------+
| nn.maxpool2d | fp32, uint8 |
+--------------+-------------------------------------------------------------------------+
| reshape | fp32, uint8 |
Expand Down
74 changes: 73 additions & 1 deletion python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,33 @@ def qnn_conv_pattern():
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def dense_pattern():
"""Create a dense (fully-connected) pattern.
Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('nn.dense')(wildcard(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
return pattern

def qnn_dense_pattern():
"""Create a quantized dense (fully-connected) pattern.
Returns
-------
pattern : dataflow_pattern.AltPattern
Denotes the convolution pattern.
"""
pattern = is_op('qnn.dense')(
wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant())
pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
pattern = is_op('qnn.requantize')(
pattern, wildcard(), wildcard(), is_constant(), is_constant())
return pattern

def check_conv(extract):
"""Check conv pattern is supported by ACL."""
call = extract
Expand All @@ -114,8 +141,26 @@ def check_qnn_conv(extract):
call = call.args[0]
return qnn_conv2d(call.attrs, call.args)

def check_dense(extract):
"""Check conv pattern is supported by ACL."""
call = extract
while call.op.name != "nn.dense":
call = call.args[0]
return dense(call.attrs, call.args)

def check_qnn_dense(extract):
"""Check qnn conv pattern is supported by ACL."""
if extract.attrs.out_dtype != "uint8":
return False
call = extract
while call.op.name != "qnn.dense":
call = call.args[0]
return qnn_dense(call.attrs, call.args)

return [('arm_compute_lib.conv2d', conv_pattern(), check_conv),
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv)]
('arm_compute_lib.qnn_conv2d', qnn_conv_pattern(), check_qnn_conv),
('arm_compute_lib.dense', dense_pattern(), check_dense),
('arm_compute_lib.qnn_dense', qnn_dense_pattern(), check_qnn_dense)]


def _register_external_op_helper(op_name, supported=True):
Expand Down Expand Up @@ -164,6 +209,33 @@ def qnn_conv2d(attrs, args):
return True


@tvm.ir.register_op_attr("nn.dense", "target.arm_compute_lib")
def dense(attrs, args):
"""Check if the external ACL codegen for dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "float32":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "float32":
return False
if attrs.out_dtype != "float32" and attrs.out_dtype != "":
return False
return True


def qnn_dense(attrs, args):
"""Check if the external ACL codegen for qnn.dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "uint8":
return False
kernel_typ = args[1].checked_type
if len(kernel_typ.shape) != 2 or kernel_typ.dtype != "uint8":
return False
if attrs.out_dtype != "int32":
return False
return True


@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
def max_pool2d(attrs, args):
"""Check if the external ACL codegen for maxpool2d should be used."""
Expand Down
77 changes: 77 additions & 0 deletions src/relay/backend/contrib/arm_compute_lib/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
const CallNode* requantize = nullptr;
};

/*!
* \brief A series of operators that form a composite
* dense layer. Supports both nn.dense and qnn.dense.
*/
struct CompositeDenseNode {
const CallNode* dense = nullptr;
const CallNode* bias = nullptr;
const CallNode* requantize = nullptr;
};

/*!
* \brief Visit call nodes and generate appropriate JSON node.
*
Expand All @@ -82,6 +92,8 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
std::shared_ptr<JSONGraphNode> json_node;
if (name == "arm_compute_lib.conv2d" || name == "arm_compute_lib.qnn_conv2d") {
json_node = CreateCompositeConvJSONNode(cn);
} else if (name == "arm_compute_lib.dense" || name == "arm_compute_lib.qnn_dense") {
json_node = CreateCompositeDenseJSONNode(cn);
} else {
LOG(FATAL) << "Unrecognized Arm Compute Library pattern: " << name;
}
Expand Down Expand Up @@ -190,6 +202,71 @@ class ACLJSONSerializer : public backend::contrib::JSONSerializer {
}
return json_node;
}

/*!
* \brief Extract dense nodes from a composite function.
*
* \param cn The call node of the composite function.
* \return Extracted composite convolution nodes.
*/
static CompositeDenseNode UnpackCompositeDense(const CallNode* cn) {
CompositeDenseNode nodes{};
const auto* fn = cn->op.as<FunctionNode>();
CHECK(fn);

// Traverse composite dense function from child to parent
const auto* current_call = fn->body.as<CallNode>();
if (backend::IsOp(current_call, "qnn.requantize")) {
nodes.requantize = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "nn.bias_add")) {
nodes.bias = current_call;
current_call = current_call->args[0].as<CallNode>();
}
// Enforce a dense node exists at this point during traversal
if (nodes.requantize) {
CHECK(backend::IsOp(current_call, "qnn.dense"));
} else {
CHECK(backend::IsOp(current_call, "nn.dense"));
}
nodes.dense = current_call;
return nodes;
}

/*!
* \brief Create a JSON representation of a composite dense (fully-connected) operator.
*
* \param cn The call to be represented.
* \return A JSON representation of a specific operator.
*/
std::shared_ptr<JSONGraphNode> CreateCompositeDenseJSONNode(const CallNode* cn) {
CompositeDenseNode nodes = UnpackCompositeDense(cn);
std::string name = "nn.dense";

// Inputs must be added in the same order they appear in the relay graph.
std::vector<JSONGraphNodeEntry> inputs;
inputs.push_back(VisitExpr(cn->args[0])[0]);
inputs.push_back(VisitExpr(nodes.dense->args[1])[0]);
if (nodes.requantize) {
name = "qnn.dense";
inputs.push_back(VisitExpr(nodes.dense->args[2])[0]); // input zero-point
inputs.push_back(VisitExpr(nodes.dense->args[3])[0]); // weight zero-point
inputs.push_back(VisitExpr(nodes.dense->args[4])[0]); // input scale
inputs.push_back(VisitExpr(nodes.dense->args[5])[0]); // weight scale
}
if (nodes.bias) {
inputs.push_back(VisitExpr(nodes.bias->args[1])[0]);
}
if (nodes.requantize) {
inputs.push_back(VisitExpr(nodes.requantize->args[3])[0]); // output scale
inputs.push_back(VisitExpr(nodes.requantize->args[4])[0]); // output zero-point
}

auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
SetCallNodeAttribute(json_node, nodes.dense);
return json_node;
}
};

/*!
Expand Down
48 changes: 48 additions & 0 deletions src/runtime/contrib/arm_compute_lib/acl_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>

Expand Down Expand Up @@ -128,6 +129,9 @@ class ACLRuntime : public JSONRuntimeBase {
if ("nn.conv2d" == op_name || "qnn.conv2d" == op_name) {
CreateConvolution2DLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.dense" == op_name || "qnn.dense" == op_name) {
CreateFullyConnectedLayer(&layer_, node, mm);
num_pools++;
} else if ("nn.max_pool2d" == op_name) {
CreatePoolingLayer(&layer_, node);
} else if ("reshape" == op_name) {
Expand Down Expand Up @@ -257,6 +261,50 @@ class ACLRuntime : public JSONRuntimeBase {
layer->function = function;
}

/*!
* \brief Create a fully connected (dense) layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
* \param mm The ACL fully connected layer can request auxiliary memory from TVM.
*/
void CreateFullyConnectedLayer(CachedLayer* layer, const JSONGraphNode& node,
const std::shared_ptr<arm_compute::MemoryManagerOnDemand>& mm) {
arm_compute::FullyConnectedLayerInfo fc_info;
fc_info.set_weights_trained_layout(arm_compute::DataLayout::NHWC);

// Collect inputs and outputs, handling both nn.dense and qnn.dense cases.
std::vector<JSONGraphNodeEntry> inputs = node.GetInputs();
size_t num_inputs = inputs.size();
bool has_bias;
if (node.GetOpName() == "qnn.dense") {
CHECK(num_inputs >= 8U && num_inputs <= 9U)
<< "Quantized fully connected (dense) layer requires 9 inputs with a bias, 8 inputs "
"without.";
has_bias = num_inputs == 9;
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[0], &inputs[4], &inputs[2]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[1], &inputs[5], &inputs[3]));
if (has_bias) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(inputs[6]));
}
layer->outputs.push_back(
MakeACLTensorFromJSONNode(node, &inputs[6 + has_bias], &inputs[7 + has_bias]));
} else {
CHECK(num_inputs >= 2U && num_inputs <= 3U)
<< "Fully connected (dense) layer requires 3 inputs with a bias, 2 inputs without.";
has_bias = num_inputs == 3;
for (const auto& i : inputs) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(i));
}
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
}

auto function = std::make_shared<arm_compute::NEFullyConnectedLayer>(mm);
function->configure(&layer->inputs[0], &layer->inputs[1],
has_bias ? &layer->inputs[2] : nullptr, &layer->outputs[0], fc_info);
layer->function = function;
}

/*!
* \brief Create a pooling layer.
*
Expand Down
2 changes: 1 addition & 1 deletion tests/python/contrib/test_arm_compute_lib/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def test_qnn_conv2d():
"output scale": output_sc,
"output zero point": output_zp
}
verify(outputs, atol=1, rtol=0, params=params)
verify(outputs, atol=1, rtol=0, params=params, verify_saturation=True)


def test_codegen_qnn_conv2d():
Expand Down
Loading

0 comments on commit 15eef5c

Please sign in to comment.