From da5a9f0bee05b4ddae362fdde9ccfba266a510dd Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 22 Mar 2021 12:08:46 -0700 Subject: [PATCH] Add Audio tensor metadata PiperOrigin-RevId: 364377123 --- .../metadata/cc/metadata_version.cc | 19 +++++++++++++++++++ .../lite/support/metadata/MetadataParser.java | 2 +- .../metadata/metadata_schema.fbs | 19 ++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/tensorflow_lite_support/metadata/cc/metadata_version.cc b/tensorflow_lite_support/metadata/cc/metadata_version.cc index 7679f6c44..7f863aedb 100644 --- a/tensorflow_lite_support/metadata/cc/metadata_version.cc +++ b/tensorflow_lite_support/metadata/cc/metadata_version.cc @@ -45,6 +45,7 @@ enum class SchemaMembers { kSubGraphMetadataInputTensorGroups = 5, kSubGraphMetadataOutputTensorGroups = 6, kProcessUnitOptionsRegexTokenizerOptions = 7, + kContentPropertiesAudioProperties = 8, }; // Helper class to compare semantic versions in terms of three integers, major, @@ -107,6 +108,8 @@ Version GetMemberVersion(SchemaMembers member) { return Version(1, 2, 0); case SchemaMembers::kProcessUnitOptionsRegexTokenizerOptions: return Version(1, 2, 1); + case SchemaMembers::kContentPropertiesAudioProperties: + return Version(1, 3, 0); default: // Should never happen. TFLITE_LOG(FATAL) << "Unsupported schema member: " @@ -176,6 +179,19 @@ void UpdateMinimumVersionForTable( } } +template <> +void UpdateMinimumVersionForTable(const tflite::Content* table, + Version* min_version) { + if (table == nullptr) return; + + // Checks the ContenProperties field. + if (table->content_properties_type() == ContentProperties_AudioProperties) { + UpdateMinimumVersion( + GetMemberVersion(SchemaMembers::kContentPropertiesAudioProperties), + min_version); + } +} + template <> void UpdateMinimumVersionForTable( const tflite::TensorMetadata* table, Version* min_version) { @@ -188,6 +204,9 @@ void UpdateMinimumVersionForTable( // Checks the process_units field. UpdateMinimumVersionForArray(table->process_units(), min_version); + + // Check the content field. + UpdateMinimumVersionForTable(table->content(), min_version); } template <> diff --git a/tensorflow_lite_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java b/tensorflow_lite_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java index 75766fb91..b6dd4a621 100644 --- a/tensorflow_lite_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java +++ b/tensorflow_lite_support/metadata/java/src/java/org/tensorflow/lite/support/metadata/MetadataParser.java @@ -21,7 +21,7 @@ public final class MetadataParser { * The version of the metadata parser that this metadata extractor library is depending on. The * value should match the value of "Schema Semantic version" in metadata_schema.fbs. */ - public static final String VERSION = "1.2.1"; + public static final String VERSION = "1.3.0"; private MetadataParser() {} } diff --git a/tensorflow_lite_support/metadata/metadata_schema.fbs b/tensorflow_lite_support/metadata/metadata_schema.fbs index 8faae0a8f..6ef1fc1fc 100644 --- a/tensorflow_lite_support/metadata/metadata_schema.fbs +++ b/tensorflow_lite_support/metadata/metadata_schema.fbs @@ -50,7 +50,7 @@ namespace tflite; // for which they were added. // // LINT.IfChange -// Schema Semantic version: 1.2.1 +// Schema Semantic version: 1.3.0 // LINT.ThenChange(//tensorflow_lite_support/\ // metadata/java/src/java/org/tensorflow/lite/support/metadata/\ // MetadataParser.java) @@ -69,6 +69,7 @@ file_identifier "M001"; // 1.2.0 - Added input_tensor_group to SubGraphMetadata. // Added output_tensor_group to SubGraphMetadata. // 1.2.1 - Added RegexTokenizerOptions to ProcessUnitOptions. +// 1.3.0 - Added AudioProperties to ContentProperties. // File extension of any written files. file_extension "tflitemeta"; @@ -232,6 +233,20 @@ enum BoundingBoxType : byte { } +// The properties for audio tensors. +// Added in: 1.3.0 +table AudioProperties { + // The sample rate in Hz when the audio was captured. + sample_rate:uint; + + // The channel count of the audio. + channels:uint; + + // The minimum required number of samples in order to run inference properly. + // Optional for fixed-size audio tensors. + min_required_samples:uint; +} + enum CoordinateType : byte { // The coordinates are float values from 0 to 1. RATIO = 0, @@ -267,6 +282,8 @@ union ContentProperties { FeatureProperties, ImageProperties, BoundingBoxProperties, + // Added in: 1.3.0 + AudioProperties, } table ValueRange {