Skip to content

Commit

Permalink
Adding support for MKL Quantized Concat
Browse files Browse the repository at this point in the history
  • Loading branch information
mahmoud-abuzaina committed Jan 23, 2019
1 parent 09bdeff commit 9883edb
Show file tree
Hide file tree
Showing 9 changed files with 480 additions and 35 deletions.
14 changes: 12 additions & 2 deletions tensorflow/core/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1138,6 +1138,13 @@ tf_gen_op_libs(
deps = [":protos_all_cc"],
)

tf_gen_op_libs(
op_lib_names = [
"mkl_array_ops",
],
deps = [":protos_all_cc"],
)

tf_gen_op_libs(
op_lib_names = [
"audio_ops",
Expand Down Expand Up @@ -1277,7 +1284,10 @@ cc_library(
":training_ops_op_lib",
":user_ops_op_lib",
":word2vec_ops",
] + if_mkl([":mkl_nn_ops_op_lib"]) + tf_additional_cloud_op_deps(),
] + if_mkl([
":mkl_array_ops_op_lib",
":mkl_nn_ops_op_lib",
]) + tf_additional_cloud_op_deps(),
alwayslink = 1,
)

Expand Down Expand Up @@ -4472,7 +4482,7 @@ tf_cc_test(
"//tensorflow/cc:scope",
"//tensorflow/core/kernels:cwise_op",
"//third_party/eigen3",
],
] + if_mkl([":mkl_array_ops_op_lib"]),
)

tf_cc_test(
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/core/api_def/excluded_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ const std::unordered_set<std::string>* GetExcludedOps() {
"GcsConfigureBlockCache", "GcsConfigureCredentials",
#ifdef INTEL_MKL
// QuantizedFusedOps for Intel CPU
"QuantizedConv2DAndRequantize", "QuantizedConv2DWithBias",
"QuantizedConv2DWithBiasAndRequantize", "QuantizedConv2DAndRelu",
"QuantizedConv2DAndReluAndRequantize",
"QuantizedConcatV2", "QuantizedConv2DAndRequantize",
"QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
"QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
"QuantizedConv2DWithBiasAndRelu",
"QuantizedConv2DWithBiasAndReluAndRequantize",
"QuantizedConv2DWithBiasSumAndRelu",
Expand Down
20 changes: 12 additions & 8 deletions tensorflow/core/framework/common_shape_fns.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,8 @@ Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
if (*output_size < 0) {
return errors::InvalidArgument(
"Computed output size would be negative: ", *output_size,
" [input_size: ", input_size,
", effective_filter_size: ", effective_filter_size,
", stride: ", stride, "]");
" [input_size: ", input_size, ", effective_filter_size: ",
effective_filter_size, ", stride: ", stride, "]");
}
return Status::OK();
}
Expand Down Expand Up @@ -1299,6 +1298,12 @@ Status ConcatV2Shape(InferenceContext* c) {
c->num_inputs() - 1 /* dim_index */);
}

Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat) {
return ConcatShapeHelper(c, 0 /* start_value_index */,
num_inputs_to_concat /* end_value_index */,
num_inputs_to_concat /* dim_index */);
}

Status BroadcastBinaryOpOutputShapeFnHelper(InferenceContext* c,
ShapeHandle shape_x,
ShapeHandle shape_y,
Expand Down Expand Up @@ -1562,11 +1567,10 @@ Status ScatterNdUpdateShape(InferenceContext* c) {
Status s = c->Merge(prefix_indices, prefix_updates, &unused);
if (!s.ok()) {
return errors::InvalidArgument(
"The outer ", num_outer_dims,
" dimensions of indices.shape=", c->DebugString(indices_shape),
" must match the outer ", num_outer_dims,
" dimensions of updates.shape=", c->DebugString(updates_shape),
": ", s.error_message());
"The outer ", num_outer_dims, " dimensions of indices.shape=",
c->DebugString(indices_shape), " must match the outer ",
num_outer_dims, " dimensions of updates.shape=",
c->DebugString(updates_shape), ": ", s.error_message());
}

ShapeHandle input_suffix;
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/core/framework/common_shape_fns.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ Status ConcatShape(shape_inference::InferenceContext* c,
// Shape function for concat operations.
Status ConcatV2Shape(shape_inference::InferenceContext* c);

Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat);

// Shape function for binary operators that broadcast their inputs
// and with output to output_index.
// Note: out cannot be NULL.
Expand Down
24 changes: 24 additions & 0 deletions tensorflow/core/kernels/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6570,6 +6570,30 @@ tf_cc_test(
],
)

tf_cc_test_mkl(
name = "mkl_quantized_concat_op_test",
size = "small",
srcs = ["mkl_quantized_concat_op_test.cc"],
deps = [
":mkl_concat_op",
":ops_testutil",
":ops_util",
":quantization_utils",
":quantized_ops",
"//tensorflow/core:array_ops_op_lib",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:mkl_array_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
],
)

tf_cc_test(
name = "quantized_batch_norm_op_test",
size = "small",
Expand Down
103 changes: 89 additions & 14 deletions tensorflow/core/kernels/mkl_concat_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,22 @@ limitations under the License.
#include <vector>

#include "mkldnn.hpp"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/bounds_check.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/kernels/concat_lib.h"
#include "tensorflow/core/kernels/concat_lib_cpu.h"
#include "tensorflow/core/kernels/quantization_utils.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/mkl_util.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

using mkldnn::concat;
using mkldnn::stream;
#include "tensorflow/core/util/mkl_util.h"

namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
Expand Down Expand Up @@ -78,9 +80,8 @@ class EigenConcatBaseOp : public OpKernel {
const TensorShape& input_shape = input_shapes[0];

int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
OP_REQUIRES(c,
(0 <= axis && axis < input_dims) ||
(allow_legacy_scalars() && concat_dim == 0),
OP_REQUIRES(c, (0 <= axis && axis < input_dims) ||
(allow_legacy_scalars() && concat_dim == 0),
errors::InvalidArgument(
"ConcatOp : Expected concatenating dimensions in the range "
"[",
Expand All @@ -102,13 +103,12 @@ class EigenConcatBaseOp : public OpKernel {
const auto in = values[i];
const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
OP_REQUIRES(
c,
(input_shapes[i].dims() == input_dims) ||
(input_is_scalar && in_is_scalar),
c, (input_shapes[i].dims() == input_dims) ||
(input_is_scalar && in_is_scalar),
errors::InvalidArgument(
"ConcatOp : Ranks of all input tensors should match: shape[0] = ",
input_shape.DebugString(), " vs. shape[", i,
"] = ", input_shapes[i].DebugString()));
input_shape.DebugString(), " vs. shape[", i, "] = ",
input_shapes[i].DebugString()));
if (in.NumElements() > 0) {
int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
Expand Down Expand Up @@ -226,9 +226,53 @@ class MklConcatOp : public OpKernel {
// format and avoid calling eigen version.
if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;

OpInputList input_mins, input_maxes;
if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
// MKL DNN concat does not support input tensors that have different
// ranges, check if the ranges of the all input tensors are the same
// if not, forward it to Eigen implementation.

OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins));
OP_REQUIRES(context, (input_mins.size() == N),
errors::InvalidArgument(
"QuantizedConcatOp : Expected mins input list length ",
input_mins.size(), " to equal values length ", N));

OP_REQUIRES_OK(context,
context->input_list("input_maxes", &input_maxes));
OP_REQUIRES(context, (input_maxes.size() == N),
errors::InvalidArgument(
"QuantizedConcatOp : Expected maxes input list length ",
input_maxes.size(), " to equal values length ", N));
float input_min = input_mins[0].flat<float>()(0);
float input_max = input_maxes[0].flat<float>()(0);
const float eps = 1.0e-6;
for (int i = 1; i < N; i++) {
float min = input_mins[i].flat<float>()(0);
float max = input_maxes[i].flat<float>()(0);

if (fabs(input_min - min) > eps || fabs(input_max - max) > eps) {
invoke_eigen = true;
break;
}
}
}

// Call Eigen library
if (invoke_eigen) {
CallEigenVersion(context, input_tensors, mkl_input_shapes);
if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
// MKL DNN quantized concat does not support input tensors with
// different ranges.
// TODO (mabuzain): Add quantized version of CallEigen() to support
// this case.
OP_REQUIRES(context, false,
errors::Unimplemented("MKL DNN quantized concat does not "
"support input tensors that have "
"different ranges"));
} else {
CallEigenVersion(context, input_tensors, mkl_input_shapes);
}

return;
}

Expand Down Expand Up @@ -374,10 +418,27 @@ class MklConcatOp : public OpKernel {
std::vector<primitive> net;
net.push_back(concat_op);
stream(stream::kind::eager).submit(net).wait();

// For quantized concat, min and max outputs are also computed.
if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
Tensor* output_min = nullptr;
Tensor* output_max = nullptr;
MklDnnShape output_min_mkl_shape, output_max_mkl_shape;
output_min_mkl_shape.SetMklTensor(false);
output_max_mkl_shape.SetMklTensor(false);
AllocateOutputSetMklShape(context, 1, &output_min, {},
output_min_mkl_shape);
AllocateOutputSetMklShape(context, 2, &output_max, {},
output_max_mkl_shape);
// All input tensors should have the same range, just use the
// first one
output_min->flat<float>()(0) = input_mins[0].flat<float>()(0);
output_max->flat<float>()(0) = input_maxes[0].flat<float>()(0);
}
} catch (mkldnn::error& e) {
string error_msg = "Status: " + std::to_string(e.status) +
", message: " + string(e.message) + ", in file " +
string(__FILE__) + ":" + std::to_string(__LINE__);
string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
string(e.message) + ", in file " + string(__FILE__) +
":" + std::to_string(__LINE__);
OP_REQUIRES_OK(
context,
errors::Aborted("Operation received an exception:", error_msg));
Expand Down Expand Up @@ -490,6 +551,20 @@ class MklConcatOp : public OpKernel {

TF_CALL_float(REGISTER_MKL_CPU);

REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
.Device(DEVICE_CPU)
.TypeConstraint<quint8>("T")
.HostMemory("axis")
.Label(mkl_op_registry::kMklQuantizedOpLabel),
MklConcatOp<CPUDevice, quint8, NAME_IS_AXIS>)

REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
.Device(DEVICE_CPU)
.TypeConstraint<qint8>("T")
.HostMemory("axis")
.Label(mkl_op_registry::kMklQuantizedOpLabel),
MklConcatOp<CPUDevice, qint8, NAME_IS_AXIS>)

#undef REGISTER_CONCAT_MKL
} // namespace tensorflow

Expand Down
Loading

0 comments on commit 9883edb

Please sign in to comment.