[luci-interpreter] Let PALDepthwiseConv2d cast temporary tensors (Sam…

…sung#8072) This commit lets set data type for temporary tensors in PAL interface for DepthwiseConv2D kernel. ONE-DCO-1.0-Signed-off-by: Artem Balyshev [email protected]
Tigerly · Dec 6, 2021 · ed89080 · ed89080
1 parent 04e28bc
commit ed89080
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 5 deletions.
diff --git a/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/cmsisnn/PALDepthwiseConv2d.h
@@ -140,6 +140,7 @@ inline void DepthwiseConvPerChannel<int8_t>(
 
 static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
                                          const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
                                          const tflite::RuntimeShape &input_shape,
                                          const tflite::RuntimeShape &filter_shape,
                                          const tflite::RuntimeShape &output_shape)
@@ -150,6 +151,7 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
 
   if (dw_conv_params.dilation.h == 1 && dw_conv_params.dilation.w == 1)
   {
+    assert(input_data_type == loco::DataType::S8);
     const int batch_size = tflite::MatchingDim(input_shape, 0, output_shape, 0);
     const int output_depth = tflite::MatchingDim(filter_shape, 3, output_shape, 3);
 
@@ -174,7 +176,9 @@ static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
     const int32_t buf_size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
       &dw_conv_params, &input_dims, &filter_dims, &output_dims);
 
-    luci_interpreter::Shape scratchpad_shape{buf_size};
+    auto data_type_size = static_cast<int32_t>(luci_interpreter::getDataTypeSize(input_data_type));
+
+    luci_interpreter::Shape scratchpad_shape{buf_size * data_type_size};
     scratchpad->resize(scratchpad_shape);
   }
   else

diff --git a/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/linux/PALDepthwiseConv2d.h
@@ -71,12 +71,14 @@ inline void DepthwiseConvPerChannel<int8_t>(
 
 static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
                                          const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
                                          const tflite::RuntimeShape &input_shape,
                                          const tflite::RuntimeShape &filter_shape,
                                          const tflite::RuntimeShape &output_shape)
 
 {
   (void)params;
+  (void)input_data_type;
   (void)input_shape;
   (void)filter_shape;
   (void)output_shape;

diff --git a/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h b/compiler/luci-interpreter/pal/mcu/PALDepthwiseConv2d.h
@@ -71,12 +71,14 @@ inline void DepthwiseConvPerChannel<int8_t>(
 
 static inline void SetupScratchpadTensor(luci_interpreter::Tensor *scratchpad,
                                          const tflite::DepthwiseParams &params,
+                                         const luci_interpreter::DataType &input_data_type,
                                          const tflite::RuntimeShape &input_shape,
                                          const tflite::RuntimeShape &filter_shape,
                                          const tflite::RuntimeShape &output_shape)
 
 {
   (void)params;
+  (void)input_data_type;
   (void)input_shape;
   (void)filter_shape;
   (void)output_shape;

diff --git a/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/kernels/DepthwiseConv2D.cpp
@@ -115,8 +115,9 @@ void DepthwiseConv2D::configure()
   params.dilation_width_factor = _params.dilation_width_factor;
 
   auto scratchpad = getOutputTensors()[1];
-  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, getTensorShape(input()),
-                                              getTensorShape(filter()), getTensorShape(output()));
+  luci_interpreter_pal::SetupScratchpadTensor(scratchpad, params, input()->element_type(),
+                                              getTensorShape(input()), getTensorShape(filter()),
+                                              getTensorShape(output()));
 }
 
 void DepthwiseConv2D::execute() const

diff --git a/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp b/compiler/luci-interpreter/src/loader/nodes/DepthwiseConv2D.cpp
@@ -44,8 +44,9 @@ std::unique_ptr<Kernel> build_kernel_CircleDepthwiseConv2D(const luci::CircleNod
   params.dilation_width_factor = node->dilation()->w();
   params.activation = node->fusedActivationFunction();
 
-  auto scratchpad =
-    std::make_unique<Tensor>(input->element_type(), Shape({}), AffineQuantization{}, "");
+  // It is unknown what data will be stored in scratchpad tensor,
+  // using UINT8 as a most general option
+  auto scratchpad = std::make_unique<Tensor>(DataType::U8, Shape({}), AffineQuantization{}, "");
   scratchpad->set_observable(false);
   scratchpad->set_data_buffer(nullptr);
   // If node has execution plan then read memory offsets for scratchpad temporary tensor