Skip to content

Commit

Permalink
Support all datatypes for Xtensa softmax implementation.
Browse files Browse the repository at this point in the history
 * This allows the micro_speech and person_detect examples to be usable
   with the optimized xtensa kernels.

 * enabled all the softmax kernel test cases for Xtensa (since we have a
   fallback to the reference kernels).

 * The keyword_benchmark specific variant is moved to its own .cc file
   since that allows the Xtensa linker to properly drop unsused symbols
   and ensures that the binary size for the keyword_benchmark is
   unchanged.

Manually tested that the following tests pass:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_person_detection_test_int8 -j8
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_micro_speech_test -j8
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8
```

Confirmed that the binary size for the keyword_benchmark is unchanged (relative to tip of tree):
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa TARGET_ARCH=fusion_f1 OPTIMIZED_KERNEL_DIR=xtensa XTENSA_CORE=F1_190305_swupgrade keyword_benchmark -j8 BUILD_TYPE=release
xt-size tensorflow/lite/micro/tools/make/gen/xtensa_fusion_f1_release/bin/keyword_benchmark
```

Gives:
```
   text	   data	    bss	    dec	    hex	filename
  70368	  41140	  24856	 136364	  214ac	tensorflow/lite/micro/tools/make/gen/xtensa_fusion_f1_release/bin/keyword_benchmark
```
  • Loading branch information
advaitjain committed May 19, 2021
1 parent e90f048 commit b359894
Show file tree
Hide file tree
Showing 11 changed files with 409 additions and 260 deletions.
3 changes: 2 additions & 1 deletion tensorflow/lite/micro/benchmarks/keyword_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/lite/micro/benchmarks/keyword_scrambled_model_data.h"
#include "tensorflow/lite/micro/benchmarks/micro_benchmark.h"
#include "tensorflow/lite/micro/kernels/fully_connected.h"
#include "tensorflow/lite/micro/kernels/softmax.h"
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
#include "tensorflow/lite/micro/micro_profiler.h"
Expand Down Expand Up @@ -60,7 +61,7 @@ KeywordBenchmarkRunner* CreateBenchmarkRunner(MicroProfiler* profiler) {
KeywordOpResolver* op_resolver = new (op_resolver_buffer) KeywordOpResolver();
op_resolver->AddFullyConnected(tflite::Register_FULLY_CONNECTED_INT8());
op_resolver->AddQuantize();
op_resolver->AddSoftmax();
op_resolver->AddSoftmax(tflite::Register_SOFTMAX_INT8_INT16());
op_resolver->AddSvdf();

return new (benchmark_runner_buffer)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ TF_LITE_MICRO_TEST(TestInvoke) {
micro_op_resolver.AddSoftmax();

// Create an area of memory to use for input, output, and intermediate arrays.
const int tensor_arena_size = 10 * 1024;
#if defined(XTENSA)
constexpr int tensor_arena_size = 15 * 1024;
#else
constexpr int tensor_arena_size = 10 * 1024;
#endif
uint8_t tensor_arena[tensor_arena_size];

// Build an interpreter to run the model with.
Expand Down
1 change: 0 additions & 1 deletion tensorflow/lite/micro/kernels/micro_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ TfLiteRegistration Register_LOG_SOFTMAX();
TfLiteRegistration Register_QUANTIZE();
TfLiteRegistration Register_RESIZE_BILINEAR();
TfLiteRegistration Register_SHAPE();
TfLiteRegistration Register_SOFTMAX();
TfLiteRegistration Register_SPACE_TO_BATCH_ND();
TfLiteRegistration Register_SQUEEZE();
TfLiteRegistration Register_SVDF();
Expand Down
15 changes: 15 additions & 0 deletions tensorflow/lite/micro/kernels/softmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@ void* SoftmaxInit(TfLiteContext* context, const char* buffer, size_t length);

TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node);

// This is the most generic TfLiteRegistration. The actual supported types may
// still be target dependent. The only requirement is that every implementation
// (reference or optimized) must define this function.
TfLiteRegistration Register_SOFTMAX();

#if defined(XTENSA)
// Returns a TfLiteRegistration struct for kernel variant that only supports
// int8 input and int16 output.
TfLiteRegistration Register_SOFTMAX_INT8_INT16();
#else
inline TfLiteRegistration Register_SOFTMAX_INT8_INT16() {
return Register_SOFTMAX();
}
#endif

} // namespace tflite

#endif // TENSORFLOW_LITE_MICRO_KERNELS_SOFTMAX_H_
8 changes: 0 additions & 8 deletions tensorflow/lite/micro/kernels/softmax_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ namespace tflite {
namespace testing {
namespace {

#if !defined(XTENSA)
// The Softmax kernel assumes an output in the range [0, 1.0], leading to these
// quantization parameters.
const float output_scale_int8 = 1.0f / 256.0f;
Expand All @@ -42,7 +41,6 @@ const float input_data_1d[] = {1.0, 2.0, 3.0, 4.0, 5.0};
const float golden_1d[] = {0.011656231, 0.031684921, 0.086128544, 0.234121657,
0.636408647};

#endif
// 2-dimensional test data.
const int flat_size_2d = 10;
int shape_2d[] = {2, 2, 5};
Expand All @@ -52,7 +50,6 @@ const float golden_2d[] = {0.011656231, 0.031684921, 0.086128544, 0.234121657,
0.636408647, 0.636408647, 0.234121657, 0.086128544,
0.031684921, 0.011656231};

#if !defined(XTENSA)
// 3-dimensional test data.
const int flat_size_3d = 60;
int shape_3d[] = {3, 3, 4, 5};
Expand Down Expand Up @@ -247,7 +244,6 @@ const float golden_4d[] = {
// h = 3
0.268866557, 0.000033181, 0.730855076, 0.000000011, 0.000245175};

#endif
template <typename T>
void ValidateSoftmaxGoldens(TfLiteTensor* tensors, const int tensor_count,
T* output_data, const T* expected_output,
Expand All @@ -271,7 +267,6 @@ void ValidateSoftmaxGoldens(TfLiteTensor* tensors, const int tensor_count,
}
}

#if !defined(XTENSA)
void TestSoftmaxFloat(int* input_dims_data, const float* input_data,
int* output_dims_data, const float* expected_output_data,
float* output_data) {
Expand All @@ -290,7 +285,6 @@ void TestSoftmaxFloat(int* input_dims_data, const float* input_data,
ValidateSoftmaxGoldens(tensors, tensors_size, output_data,
expected_output_data, output_dims_count, 1e-5);
}
#endif

template <typename inputT, typename outputT>
void TestSoftmaxQuantized(int* input_dims_data, const float* input_data,
Expand Down Expand Up @@ -326,7 +320,6 @@ void TestSoftmaxQuantized(int* input_dims_data, const float* input_data,

TF_LITE_MICRO_TESTS_BEGIN

#if !defined(XTENSA)
TF_LITE_MICRO_TEST(Softmax1DFloatShouldMatchGolden) {
float output_data[tflite::testing::flat_size_1d];
tflite::testing::TestSoftmaxFloat(
Expand Down Expand Up @@ -476,7 +469,6 @@ TF_LITE_MICRO_TEST(Softmax4DQuantizedInt16ShouldMatchGolden) {
tflite::testing::output_zero_point_int16, output_data,
tflite::testing::tolerance_int16);
}
#endif

TF_LITE_MICRO_TEST(Softmax2DQuantizedInt8InputInt16OutputShouldMatchGolden) {
const float input_scale = 0.1f;
Expand Down
Loading

0 comments on commit b359894

Please sign in to comment.