Adding support for MKL Quantized Concat

chinaericy · Jan 23, 2019 · 9883edb · 9883edb
1 parent 09bdeff
commit 9883edb
Show file tree

Hide file tree

Showing 9 changed files with 480 additions and 35 deletions.
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
@@ -1138,6 +1138,13 @@ tf_gen_op_libs(
     deps = [":protos_all_cc"],
 )
 
+tf_gen_op_libs(
+    op_lib_names = [
+        "mkl_array_ops",
+    ],
+    deps = [":protos_all_cc"],
+)
+
 tf_gen_op_libs(
     op_lib_names = [
         "audio_ops",
@@ -1277,7 +1284,10 @@ cc_library(
         ":training_ops_op_lib",
         ":user_ops_op_lib",
         ":word2vec_ops",
-    ] + if_mkl([":mkl_nn_ops_op_lib"]) + tf_additional_cloud_op_deps(),
+    ] + if_mkl([
+        ":mkl_array_ops_op_lib",
+        ":mkl_nn_ops_op_lib",
+    ]) + tf_additional_cloud_op_deps(),
     alwayslink = 1,
 )
 
@@ -4472,7 +4482,7 @@ tf_cc_test(
         "//tensorflow/cc:scope",
         "//tensorflow/core/kernels:cwise_op",
         "//third_party/eigen3",
-    ],
+    ] + if_mkl([":mkl_array_ops_op_lib"]),
 )
 
 tf_cc_test(

diff --git a/tensorflow/core/api_def/excluded_ops.cc b/tensorflow/core/api_def/excluded_ops.cc
@@ -24,9 +24,9 @@ const std::unordered_set<std::string>* GetExcludedOps() {
            "GcsConfigureBlockCache", "GcsConfigureCredentials",
 #ifdef INTEL_MKL
            // QuantizedFusedOps for Intel CPU
-           "QuantizedConv2DAndRequantize", "QuantizedConv2DWithBias",
-           "QuantizedConv2DWithBiasAndRequantize", "QuantizedConv2DAndRelu",
-           "QuantizedConv2DAndReluAndRequantize",
+           "QuantizedConcatV2", "QuantizedConv2DAndRequantize",
+           "QuantizedConv2DWithBias", "QuantizedConv2DWithBiasAndRequantize",
+           "QuantizedConv2DAndRelu", "QuantizedConv2DAndReluAndRequantize",
            "QuantizedConv2DWithBiasAndRelu",
            "QuantizedConv2DWithBiasAndReluAndRequantize",
            "QuantizedConv2DWithBiasSumAndRelu",

diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc
@@ -57,9 +57,8 @@ Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
   if (*output_size < 0) {
     return errors::InvalidArgument(
         "Computed output size would be negative: ", *output_size,
-        " [input_size: ", input_size,
-        ", effective_filter_size: ", effective_filter_size,
-        ", stride: ", stride, "]");
+        " [input_size: ", input_size, ", effective_filter_size: ",
+        effective_filter_size, ", stride: ", stride, "]");
   }
   return Status::OK();
 }
@@ -1299,6 +1298,12 @@ Status ConcatV2Shape(InferenceContext* c) {
                            c->num_inputs() - 1 /* dim_index */);
 }
 
+Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat) {
+  return ConcatShapeHelper(c, 0 /* start_value_index */,
+                           num_inputs_to_concat /* end_value_index */,
+                           num_inputs_to_concat /* dim_index */);
+}
+
 Status BroadcastBinaryOpOutputShapeFnHelper(InferenceContext* c,
                                             ShapeHandle shape_x,
                                             ShapeHandle shape_y,
@@ -1562,11 +1567,10 @@ Status ScatterNdUpdateShape(InferenceContext* c) {
       Status s = c->Merge(prefix_indices, prefix_updates, &unused);
       if (!s.ok()) {
         return errors::InvalidArgument(
-            "The outer ", num_outer_dims,
-            " dimensions of indices.shape=", c->DebugString(indices_shape),
-            " must match the outer ", num_outer_dims,
-            " dimensions of updates.shape=", c->DebugString(updates_shape),
-            ": ", s.error_message());
+            "The outer ", num_outer_dims, " dimensions of indices.shape=",
+            c->DebugString(indices_shape), " must match the outer ",
+            num_outer_dims, " dimensions of updates.shape=",
+            c->DebugString(updates_shape), ": ", s.error_message());
       }
 
       ShapeHandle input_suffix;

diff --git a/tensorflow/core/framework/common_shape_fns.h b/tensorflow/core/framework/common_shape_fns.h
@@ -279,6 +279,8 @@ Status ConcatShape(shape_inference::InferenceContext* c,
 // Shape function for concat operations.
 Status ConcatV2Shape(shape_inference::InferenceContext* c);
 
+Status QuantizedConcatV2Shape(InferenceContext* c, int num_inputs_to_concat);
+
 // Shape function for binary operators that broadcast their inputs
 // and with output to output_index.
 // Note: out cannot be NULL.

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
@@ -6570,6 +6570,30 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test_mkl(
+    name = "mkl_quantized_concat_op_test",
+    size = "small",
+    srcs = ["mkl_quantized_concat_op_test.cc"],
+    deps = [
+        ":mkl_concat_op",
+        ":ops_testutil",
+        ":ops_util",
+        ":quantization_utils",
+        ":quantized_ops",
+        "//tensorflow/core:array_ops_op_lib",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:math_ops_op_lib",
+        "//tensorflow/core:mkl_array_ops_op_lib",
+        "//tensorflow/core:nn_ops_op_lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_cc_test(
     name = "quantized_batch_norm_op_test",
     size = "small",

diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc
@@ -17,20 +17,22 @@ limitations under the License.
 #include <vector>
 
 #include "mkldnn.hpp"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/bounds_check.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/kernels/concat_lib.h"
+#include "tensorflow/core/kernels/concat_lib_cpu.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/mkl_util.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 using mkldnn::concat;
 using mkldnn::stream;
-#include "tensorflow/core/util/mkl_util.h"
 
 namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -78,9 +80,8 @@ class EigenConcatBaseOp : public OpKernel {
     const TensorShape& input_shape = input_shapes[0];
 
     int32 axis = concat_dim < 0 ? concat_dim + input_dims : concat_dim;
-    OP_REQUIRES(c,
-                (0 <= axis && axis < input_dims) ||
-                    (allow_legacy_scalars() && concat_dim == 0),
+    OP_REQUIRES(c, (0 <= axis && axis < input_dims) ||
+                       (allow_legacy_scalars() && concat_dim == 0),
                 errors::InvalidArgument(
                     "ConcatOp : Expected concatenating dimensions in the range "
                     "[",
@@ -102,13 +103,12 @@ class EigenConcatBaseOp : public OpKernel {
       const auto in = values[i];
       const bool in_is_scalar = IsLegacyScalar(input_shapes[i]);
       OP_REQUIRES(
-          c,
-          (input_shapes[i].dims() == input_dims) ||
-              (input_is_scalar && in_is_scalar),
+          c, (input_shapes[i].dims() == input_dims) ||
+                 (input_is_scalar && in_is_scalar),
           errors::InvalidArgument(
               "ConcatOp : Ranks of all input tensors should match: shape[0] = ",
-              input_shape.DebugString(), " vs. shape[", i,
-              "] = ", input_shapes[i].DebugString()));
+              input_shape.DebugString(), " vs. shape[", i, "] = ",
+              input_shapes[i].DebugString()));
       if (in.NumElements() > 0) {
         int64 inputs_flat_dim1 = in.NumElements() / inputs_flat_dim0;
         inputs_flat.emplace_back(new typename TTypes<T, 2>::ConstMatrix(
@@ -226,9 +226,53 @@ class MklConcatOp : public OpKernel {
       // format and avoid calling eigen version.
       if (!are_all_tf_inputs && !are_all_mkl_inputs) invoke_eigen = true;
 
+      OpInputList input_mins, input_maxes;
+      if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
+        // MKL DNN concat does not support input tensors that have different
+        // ranges, check if the ranges of the all input tensors are the same
+        // if not, forward it to Eigen implementation.
+
+        OP_REQUIRES_OK(context, context->input_list("input_mins", &input_mins));
+        OP_REQUIRES(context, (input_mins.size() == N),
+                    errors::InvalidArgument(
+                        "QuantizedConcatOp : Expected mins input list length ",
+                        input_mins.size(), " to equal values length ", N));
+
+        OP_REQUIRES_OK(context,
+                       context->input_list("input_maxes", &input_maxes));
+        OP_REQUIRES(context, (input_maxes.size() == N),
+                    errors::InvalidArgument(
+                        "QuantizedConcatOp : Expected maxes input list length ",
+                        input_maxes.size(), " to equal values length ", N));
+        float input_min = input_mins[0].flat<float>()(0);
+        float input_max = input_maxes[0].flat<float>()(0);
+        const float eps = 1.0e-6;
+        for (int i = 1; i < N; i++) {
+          float min = input_mins[i].flat<float>()(0);
+          float max = input_maxes[i].flat<float>()(0);
+
+          if (fabs(input_min - min) > eps || fabs(input_max - max) > eps) {
+            invoke_eigen = true;
+            break;
+          }
+        }
+      }
+
       // Call Eigen library
       if (invoke_eigen) {
-        CallEigenVersion(context, input_tensors, mkl_input_shapes);
+        if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
+          // MKL DNN quantized concat does not support input tensors with
+          // different ranges.
+          // TODO (mabuzain): Add quantized version of CallEigen() to support
+          // this case.
+          OP_REQUIRES(context, false,
+                      errors::Unimplemented("MKL DNN quantized concat does not "
+                                            "support input tensors that have "
+                                            "different ranges"));
+        } else {
+          CallEigenVersion(context, input_tensors, mkl_input_shapes);
+        }
+
         return;
       }
 
@@ -374,10 +418,27 @@ class MklConcatOp : public OpKernel {
       std::vector<primitive> net;
       net.push_back(concat_op);
       stream(stream::kind::eager).submit(net).wait();
+
+      // For quantized concat, min and max outputs are also computed.
+      if (std::is_same<T, qint8>::value || std::is_same<T, quint8>::value) {
+        Tensor* output_min = nullptr;
+        Tensor* output_max = nullptr;
+        MklDnnShape output_min_mkl_shape, output_max_mkl_shape;
+        output_min_mkl_shape.SetMklTensor(false);
+        output_max_mkl_shape.SetMklTensor(false);
+        AllocateOutputSetMklShape(context, 1, &output_min, {},
+                                  output_min_mkl_shape);
+        AllocateOutputSetMklShape(context, 2, &output_max, {},
+                                  output_max_mkl_shape);
+        // All input tensors should have the same range, just use the
+        // first one
+        output_min->flat<float>()(0) = input_mins[0].flat<float>()(0);
+        output_max->flat<float>()(0) = input_maxes[0].flat<float>()(0);
+      }
     } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted("Operation received an exception:", error_msg));
@@ -490,6 +551,20 @@ class MklConcatOp : public OpKernel {
 
 TF_CALL_float(REGISTER_MKL_CPU);
 
+REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<quint8>("T")
+                            .HostMemory("axis")
+                            .Label(mkl_op_registry::kMklQuantizedOpLabel),
+                        MklConcatOp<CPUDevice, quint8, NAME_IS_AXIS>)
+
+REGISTER_KERNEL_BUILDER(Name("_MklQuantizedConcatV2")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint8>("T")
+                            .HostMemory("axis")
+                            .Label(mkl_op_registry::kMklQuantizedOpLabel),
+                        MklConcatOp<CPUDevice, qint8, NAME_IS_AXIS>)
+
 #undef REGISTER_CONCAT_MKL
 }  // namespace tensorflow