Support all datatypes for Xtensa softmax implementation.

* This allows the micro_speech and person_detect examples to be usable with the optimized xtensa kernels. * enabled all the softmax kernel test cases for Xtensa (since we have a fallback to the reference kernels). * The keyword_benchmark specific variant is moved to its own .cc file since that allows the Xtensa linker to properly drop unsused symbols and ensures that the binary size for the keyword_benchmark is unchanged. Manually tested that the following tests pass: ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_person_detection_test_int8 -j8 make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_micro_speech_test -j8 make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8 ``` Confirmed that the binary size for the keyword_benchmark is unchanged (relative to tip of tree): ``` make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade keyword_benchmark -j8 BUILD_TYPE=release xt-size tensorflow/lite/micro/tools/make/gen/xtensa_fusion_f1_release/bin/keyword_benchmark ``` Gives: ``` text data bss dec hex filename 70368 41140 24856 136364 214ac tensorflow/lite/micro/tools/make/gen/xtensa_fusion_f1_release/bin/keyword_benchmark ```
CynthiaCYX · May 19, 2021 · b359894 · b359894
1 parent e90f048
commit b359894
Show file tree

Hide file tree

Showing 11 changed files with 409 additions and 260 deletions.
diff --git a/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc b/tensorflow/lite/micro/benchmarks/keyword_benchmark.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h"
 #include "tensorflow/lite/micro/benchmarks/micro_benchmark.h"
 #include "tensorflow/lite/micro/kernels/fully_connected.h"
+#include "tensorflow/lite/micro/kernels/softmax.h"
 #include "tensorflow/lite/micro/micro_error_reporter.h"
 #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
 #include "tensorflow/lite/micro/micro_profiler.h"
@@ -60,7 +61,7 @@ KeywordBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
   KeywordOpResolver* op_resolver = new (op_resolver_buffer) KeywordOpResolver();
   op_resolver->AddFullyConnected(tflite::Register_FULLY_CONNECTED_INT8());
   op_resolver->AddQuantize();
-  op_resolver->AddSoftmax();
+  op_resolver->AddSoftmax(tflite::Register_SOFTMAX_INT8_INT16());
   op_resolver->AddSvdf();
 
   return new (benchmark_runner_buffer)

diff --git a/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/lite/micro/examples/micro_speech/micro_speech_test.cc
@@ -52,7 +52,11 @@ TF_LITE_MICRO_TEST(TestInvoke) {
   micro_op_resolver.AddSoftmax();
 
   // Create an area of memory to use for input, output, and intermediate arrays.
-  const int tensor_arena_size = 10 * 1024;
+#if defined(XTENSA)
+  constexpr int tensor_arena_size = 15 * 1024;
+#else
+  constexpr int tensor_arena_size = 10 * 1024;
+#endif
   uint8_t tensor_arena[tensor_arena_size];
 
   // Build an interpreter to run the model with.

diff --git a/tensorflow/lite/micro/kernels/micro_ops.h b/tensorflow/lite/micro/kernels/micro_ops.h
@@ -53,7 +53,6 @@ TfLiteRegistration Register_LOG_SOFTMAX();
 TfLiteRegistration Register_QUANTIZE();
 TfLiteRegistration Register_RESIZE_BILINEAR();
 TfLiteRegistration Register_SHAPE();
-TfLiteRegistration Register_SOFTMAX();
 TfLiteRegistration Register_SPACE_TO_BATCH_ND();
 TfLiteRegistration Register_SQUEEZE();
 TfLiteRegistration Register_SVDF();

diff --git a/tensorflow/lite/micro/kernels/softmax.h b/tensorflow/lite/micro/kernels/softmax.h
@@ -25,6 +25,21 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);
 
 TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);
 
+// This is the most generic TfLiteRegistration. The actual supported types may
+// still be target dependent. The only requirement is that every implementation
+// (reference or optimized) must define this function.
+TfLiteRegistration Register_SOFTMAX();
+
+#if defined(XTENSA)
+// Returns a TfLiteRegistration struct for kernel variant that only supports
+// int8 input and int16 output.
+TfLiteRegistration Register_SOFTMAX_INT8_INT16();
+#else
+inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
+  return Register_SOFTMAX();
+}
+#endif
+
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
diff --git a/tensorflow/lite/micro/kernels/softmax_test.cc b/tensorflow/lite/micro/kernels/softmax_test.cc
@@ -24,7 +24,6 @@ namespace tflite {
 namespace testing {
 namespace {
 
-#if !defined(XTENSA)
 // The Softmax kernel assumes an output in the range [0, 1.0], leading to these
 // quantization parameters.
 const float output_scale_int8 = 1.0f / 256.0f;
@@ -42,7 +41,6 @@ const float input_data_1d[] = {1.0, 2.0, 3.0, 4.0, 5.0};
 const float golden_1d[] = {0.011656231, 0.031684921, 0.086128544, 0.234121657,
                            0.636408647};
 
-#endif
 // 2-dimensional test data.
 const int flat_size_2d = 10;
 int shape_2d[] = {2, 2, 5};
@@ -52,7 +50,6 @@ const float golden_2d[] = {0.011656231, 0.031684921, 0.086128544, 0.234121657,
                            0.636408647, 0.636408647, 0.234121657, 0.086128544,
                            0.031684921, 0.011656231};
 
-#if !defined(XTENSA)
 // 3-dimensional test data.
 const int flat_size_3d = 60;
 int shape_3d[] = {3, 3, 4, 5};
@@ -247,7 +244,6 @@ const float golden_4d[] = {
     // h = 3
     0.268866557, 0.000033181, 0.730855076, 0.000000011, 0.000245175};
 
-#endif
 template <typename T>
 void ValidateSoftmaxGoldens(TfLiteTensor* tensors, const int tensor_count,
                             T* output_data, const T* expected_output,
@@ -271,7 +267,6 @@ void ValidateSoftmaxGoldens(TfLiteTensor* tensors, const int tensor_count,
   }
 }
 
-#if !defined(XTENSA)
 void TestSoftmaxFloat(int* input_dims_data, const float* input_data,
                       int* output_dims_data, const float* expected_output_data,
                       float* output_data) {
@@ -290,7 +285,6 @@ void TestSoftmaxFloat(int* input_dims_data, const float* input_data,
   ValidateSoftmaxGoldens(tensors, tensors_size, output_data,
                          expected_output_data, output_dims_count, 1e-5);
 }
-#endif
 
 template <typename inputT, typename outputT>
 void TestSoftmaxQuantized(int* input_dims_data, const float* input_data,
@@ -326,7 +320,6 @@ void TestSoftmaxQuantized(int* input_dims_data, const float* input_data,
 
 TF_LITE_MICRO_TESTS_BEGIN
 
-#if !defined(XTENSA)
 TF_LITE_MICRO_TEST(Softmax1DFloatShouldMatchGolden) {
   float output_data[tflite::testing::flat_size_1d];
   tflite::testing::TestSoftmaxFloat(
@@ -476,7 +469,6 @@ TF_LITE_MICRO_TEST(Softmax4DQuantizedInt16ShouldMatchGolden) {
       tflite::testing::output_zero_point_int16, output_data,
       tflite::testing::tolerance_int16);
 }
-#endif
 
 TF_LITE_MICRO_TEST(Softmax2DQuantizedInt8InputInt16OutputShouldMatchGolden) {
   const float input_scale = 0.1f;