Allow users to specify dynamic tensors to be released via Interpreter.

By default, dynamic tensors won't be released. If the user calls `interpreter.EnsureDynamicTensorsAreReleased()`, then all intermediate dynamic tensor will be released once they are not used. PiperOrigin-RevId: 405742628 Change-Id: I34f3cea4cdd1e9df69939ee804db15ca626f1f51
Marbck · Oct 26, 2021 · d24b2e9 · d24b2e9
1 parent b339ec5
commit d24b2e9
Show file tree

Hide file tree

Showing 6 changed files with 231 additions and 2 deletions.
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
@@ -784,6 +784,10 @@ TfLiteStatus Subgraph::AllocateTensors() {
   // instead.
   ResetVariableTensors();
 
+  // Initialize the mapping between tensor index and the last execution plan
+  // index that uses the tensor.
+  InitializeTensorReleaseMap();
+
   return kTfLiteOk;
 }
 
@@ -1248,6 +1252,8 @@ TfLiteStatus Subgraph::Invoke() {
         }
       }
     }
+    // Release dynamic tensor memory if configured by the user.
+    MaybeReleaseDynamicInputs(node, node_index);
   }
 
   return status;
@@ -1838,4 +1844,45 @@ std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
   return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
 }
 
+void Subgraph::InitializeTensorReleaseMap() {
+  for (int i = 0; i < execution_plan_.size(); ++i) {
+    int node_index = execution_plan_[i];
+    const TfLiteNode& node = nodes_and_registration_[node_index].first;
+    for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
+      int input_tensor_index = node.inputs->data[input_index];
+      TfLiteTensor* input_tensor = tensor(input_tensor_index);
+      if (!input_tensor) continue;
+      tensor_to_last_op_index_[input_tensor_index] = node_index;
+    }
+  }
+}
+
+void Subgraph::MaybeReleaseDynamicInputs(const TfLiteNode& node,
+                                         size_t node_index) {
+  if (!release_dynamic_tensors_if_unused_) return;
+  auto tensorIsInput = [&](int index) {
+    for (int idx : inputs_) {
+      if (idx == index) return true;
+    }
+    return false;
+  };
+  // Release dynamic tensor's memory if the current node is the last one that
+  // uses the tensor.
+  for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
+    int input_tensor_index = node.inputs->data[input_index];
+    TfLiteTensor* input_tensor = tensor(input_tensor_index);
+    if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic ||
+        input_tensor->type == kTfLiteString ||
+        input_tensor->type == kTfLiteResource ||
+        tensorIsInput(input_tensor_index))
+      continue;
+    auto it = tensor_to_last_op_index_.find(input_tensor_index);
+    if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
+      if (input_tensor->data.raw) {
+        TfLiteTensorDataFree(input_tensor);
+      }
+    }
+  }
+}
+
 }  // namespace tflite
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
@@ -366,6 +366,16 @@ class Subgraph {
   // information about tenosrs and ops.
   void DumpMemoryPlannerDebugInfo() const;
 
+  // WARNING: This is an experimental API and subject to change.
+  // Force all intermediate dynamic tensors to be released once they are not
+  // used by the model. Please use this configuration with caution, since it
+  // might reduce the peak memory usage of the model at the cost of a slower
+  // inference speed. This API needs to be called before calling
+  // `AllocateTensors`.
+  void EnsureDynamicTensorsAreReleased() {
+    release_dynamic_tensors_if_unused_ = true;
+  }
+
   // WARNING: This is an experimental API and subject to change.
   // Remove unused inputs of the subgraph. It checks usage of inputs and mark it
   // as kTfLiteOptionalTensor if the input is not used in graph execution.
@@ -679,6 +689,14 @@ class Subgraph {
   // Also sets relevant fields on context_ based on known metadata.
   TfLiteStatus SetMetadata(const std::map<std::string, std::string>* metadata);
 
+  // Initializes the mapping between tensor index to the index of the
+  // last operation that uses the tensor as input.
+  void InitializeTensorReleaseMap();
+
+  // Checks the options for releasing dynamic tensors and release dynamic
+  // tensors if configured.
+  void MaybeReleaseDynamicInputs(const TfLiteNode& node, size_t node_index);
+
   // The state of the Interpreter.
   enum State {
     // The interpreter isn't ready to be invoked.
@@ -834,6 +852,13 @@ class Subgraph {
 
   // Model-metadata owned by the Interpreter.
   const std::map<std::string, std::string>* metadata_ = nullptr;
+
+  // Release dynamic tensor's memory once they are not used by the graph.
+  bool release_dynamic_tensors_if_unused_ = false;
+
+  // Mapping between tensor index to the last index of the execution plan that
+  // uses this tensor.
+  std::map<int, int> tensor_to_last_op_index_;
 };
 
 }  // namespace tflite

diff --git a/tensorflow/lite/delegates/delegate_test.cc b/tensorflow/lite/delegates/delegate_test.cc
@@ -874,9 +874,9 @@ class TestDelegateWithDynamicTensors : public ::testing::Test {
       TfLiteIntArray* execution_plan;
       TF_LITE_ENSURE_STATUS(
           context->GetExecutionPlan(context, &execution_plan));
-      context->ReplaceNodeSubsetsWithDelegateKernels(
+      TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
           context, DelegateRegistration(), execution_plan, delegate);
-      return kTfLiteOk;
+      return status;
     };
     delegate_.flags = kTfLiteDelegateFlagsNone;
   }
@@ -993,6 +993,98 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) {
   ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError);
 }
 
+class TestReleaseDynamicTensorWithDelegate : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    interpreter_.reset(new Interpreter);
+
+    interpreter_->AddTensors(3);
+    interpreter_->SetInputs({0});
+    interpreter_->SetOutputs({2});
+    TfLiteQuantizationParams quant;
+    interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
+                                               quant);
+    interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
+                                               quant);
+    TfLiteRegistration reg = DynamicCopyOpRegistration();
+    interpreter_->AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, &reg);
+    interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, &reg);
+
+    delegate_.Prepare = [](TfLiteContext* context,
+                           TfLiteDelegate* delegate) -> TfLiteStatus {
+      TfLiteIntArray* execution_plan;
+      TF_LITE_ENSURE_STATUS(
+          context->GetExecutionPlan(context, &execution_plan));
+      // Only replace the second execution node with delegate.
+      TfLiteIntArray* nodes_to_replace = TfLiteIntArrayCreate(1);
+      nodes_to_replace->data[0] = execution_plan->data[1];
+      TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
+          context, DelegateRegistration(), nodes_to_replace, delegate);
+      TfLiteIntArrayFree(nodes_to_replace);
+      return status;
+    };
+    delegate_.flags = kTfLiteDelegateFlagsNone;
+  }
+
+  static TfLiteRegistration DynamicCopyOpRegistration() {
+    TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
+
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      // Output is dynamic and has the same size as input.
+      TfLiteTensor* output;
+      TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
+      SetTensorToDynamic(output);
+      const TfLiteTensor* input;
+      TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+      TfLiteTensorRealloc(input->bytes, output);
+      return kTfLiteOk;
+    };
+
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
+      // Not implemented since this isn't required in testing.
+      return kTfLiteOk;
+    };
+    return reg;
+  }
+
+  static TfLiteRegistration DelegateRegistration() {
+    TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};
+
+    reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
+      // Check that input is dynamic.
+      const TfLiteTensor* input;
+      TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
+      TF_LITE_ENSURE(context, IsDynamicTensor(input));
+      return kTfLiteOk;
+    };
+    reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
+      // Not implemented since this isn't required in testing.
+      return kTfLiteOk;
+    };
+    return reg;
+  }
+
+  std::unique_ptr<Interpreter> interpreter_;
+  TfLiteDelegate delegate_;
+};
+
+TEST_F(TestReleaseDynamicTensorWithDelegate, ShapePropagation_FlagNotSet) {
+  delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors;
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+  ASSERT_EQ(interpreter_->ModifyGraphWithDelegate(&delegate_), kTfLiteOk);
+
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  ASSERT_NE(interpreter_->tensor(1)->data.raw, nullptr);
+
+  interpreter_->EnsureDynamicTensorsAreReleased();
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+  ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
+  ASSERT_EQ(interpreter_->tensor(1)->data.raw, nullptr);
+}
+
 // Tests for FP16 graphs
 // =====================
 

diff --git a/tensorflow/lite/interpreter.h b/tensorflow/lite/interpreter.h
@@ -458,6 +458,14 @@ class Interpreter {
   /// WARNING: Experimental interface, subject to change
   TfLiteStatus ReleaseNonPersistentMemory();
 
+  /// WARNING: This is an experimental API and subject to change.
+  /// Force all intermediate dynamic tensors to be released once they are not
+  /// used by the model. Please use this configuration with caution, since it
+  /// might reduce the peak memory usage of the model at the cost of a slower
+  /// inference speed. `AllocateTensors` needs to be called right after this
+  /// API.
+  void EnsureDynamicTensorsAreReleased();
+
   // Update allocations for all tensors. This will redim dependent tensors
   // using the input tensor dimensionality as given. This is relatively
   // expensive. This *must be* called after the interpreter has been created

diff --git a/tensorflow/lite/interpreter_experimental.cc b/tensorflow/lite/interpreter_experimental.cc
@@ -48,6 +48,12 @@ TfLiteStatus Interpreter::ReleaseNonPersistentMemory() {
   return primary_subgraph().ReleaseNonPersistentMemory();
 }
 
+void Interpreter::EnsureDynamicTensorsAreReleased() {
+  for (auto& subgraph : subgraphs_) {
+    subgraph->EnsureDynamicTensorsAreReleased();
+  }
+}
+
 TfLiteStatus Interpreter::ResetVariableTensors() {
   for (auto& subgraph : subgraphs_) {
     TF_LITE_ENSURE_STATUS(subgraph->ResetVariableTensors());

diff --git a/tensorflow/lite/interpreter_test.cc b/tensorflow/lite/interpreter_test.cc
@@ -1056,6 +1056,57 @@ TEST(BasicInterpreter, DynamicTensorsResizeDescendants) {
   ASSERT_EQ(interpreter.tensor(3)->bytes, sizeof(float) * 10 * 14);
 }
 
+TEST(BasicInterpreter, ReleaseDynamicTensors) {
+  // Assemble a graph with a node that has dynamically sized output (via the
+  // pad op), followed by a node with a standard element-wise op (negate).
+  Interpreter interpreter;
+  interpreter.AddTensors(4);
+  interpreter.SetInputs({0, 1});
+  interpreter.SetOutputs({3});
+  TfLiteQuantizationParams quant;
+  interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0,
+                                           /*type=*/kTfLiteFloat32, /*name=*/"",
+                                           /*dims=*/{2, 2, 1, 1},
+                                           /*quantization=*/quant);
+  interpreter.SetTensorParametersReadWrite(
+      /*tensor_index=*/1, /*type=*/kTfLiteInt32, /*name=*/"", /*dims=*/{4, 2},
+      /*quantization=*/quant);
+  interpreter.SetTensorParametersReadWrite(/*tensor_index=*/2,
+                                           /*type=*/kTfLiteFloat32, /*name=*/"",
+                                           /*dims=*/{}, /*quantization=*/quant);
+  interpreter.SetTensorParametersReadWrite(/*tensor_index=*/3,
+                                           /*type=*/kTfLiteFloat32, /*name=*/"",
+                                           /*dims=*/{}, /*quantization=*/quant);
+
+  TfLiteRegistration* pad_op = tflite::ops::builtin::Register_PADV2();
+  TfLiteRegistration* neg_op = tflite::ops::builtin::Register_NEG();
+  interpreter.AddNodeWithParameters(
+      /*inputs=*/{0, 1}, /*outputs=*/{2}, /*init_data=*/nullptr,
+      /*init_data_size=*/0, /*builtin_data=*/nullptr, /*registration=*/pad_op);
+  interpreter.AddNodeWithParameters(
+      /*inputs=*/{2}, /*outputs=*/{3}, /*init_data=*/nullptr,
+      /*init_data_size=*/0, /*builtin_data=*/nullptr, /*registration=*/neg_op);
+  ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk);
+
+  // Configure [[2,2],[4,4]] padding and execute the graph.
+  const std::vector<int> padding = {2, 2, 2, 2, 0, 0, 0, 0};
+  int* tensor_value = interpreter.typed_tensor<int>(1);
+  for (int i = 0; i < padding.size(); ++i) {
+    tensor_value[i] = padding[i];
+  }
+
+  // Invoke without calling `EnsureDynamicTensorsAreReleased`.
+  ASSERT_EQ(interpreter.Invoke(), kTfLiteOk);
+  ASSERT_NE(interpreter.tensor(2)->data.raw, nullptr);
+
+  interpreter.EnsureDynamicTensorsAreReleased();
+  ASSERT_EQ(interpreter.Invoke(), kTfLiteOk);
+
+  // Check that the intermediate dynamic tensor's memory is released.
+  ASSERT_EQ(interpreter.tensor(2)->data.raw, nullptr);
+  ASSERT_EQ(interpreter.tensor(3)->bytes, sizeof(float) * 6 * 6);
+}
+
 TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) {
   Interpreter interpreter;
   ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity),