Skip to content

Commit

Permalink
Allow users to specify dynamic tensors to be released via Interpreter.
Browse files Browse the repository at this point in the history
By default, dynamic tensors won't be released. If the user calls `interpreter.EnsureDynamicTensorsAreReleased()`, then all intermediate dynamic tensor will be released once they are not used.

PiperOrigin-RevId: 405742628
Change-Id: I34f3cea4cdd1e9df69939ee804db15ca626f1f51
  • Loading branch information
haozha111 authored and tensorflower-gardener committed Oct 26, 2021
1 parent b339ec5 commit d24b2e9
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 2 deletions.
47 changes: 47 additions & 0 deletions tensorflow/lite/core/subgraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@ TfLiteStatus Subgraph::AllocateTensors() {
// instead.
ResetVariableTensors();

// Initialize the mapping between tensor index and the last execution plan
// index that uses the tensor.
InitializeTensorReleaseMap();

return kTfLiteOk;
}

Expand Down Expand Up @@ -1248,6 +1252,8 @@ TfLiteStatus Subgraph::Invoke() {
}
}
}
// Release dynamic tensor memory if configured by the user.
MaybeReleaseDynamicInputs(node, node_index);
}

return status;
Expand Down Expand Up @@ -1838,4 +1844,45 @@ std::unique_ptr<GraphInfo> Subgraph::CreateGraphInfo() {
return std::unique_ptr<GraphInfo>(new InterpreterInfo(this));
}

void Subgraph::InitializeTensorReleaseMap() {
for (int i = 0; i < execution_plan_.size(); ++i) {
int node_index = execution_plan_[i];
const TfLiteNode& node = nodes_and_registration_[node_index].first;
for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
int input_tensor_index = node.inputs->data[input_index];
TfLiteTensor* input_tensor = tensor(input_tensor_index);
if (!input_tensor) continue;
tensor_to_last_op_index_[input_tensor_index] = node_index;
}
}
}

void Subgraph::MaybeReleaseDynamicInputs(const TfLiteNode& node,
size_t node_index) {
if (!release_dynamic_tensors_if_unused_) return;
auto tensorIsInput = [&](int index) {
for (int idx : inputs_) {
if (idx == index) return true;
}
return false;
};
// Release dynamic tensor's memory if the current node is the last one that
// uses the tensor.
for (int input_index = 0; input_index < node.inputs->size; ++input_index) {
int input_tensor_index = node.inputs->data[input_index];
TfLiteTensor* input_tensor = tensor(input_tensor_index);
if (!input_tensor || input_tensor->allocation_type != kTfLiteDynamic ||
input_tensor->type == kTfLiteString ||
input_tensor->type == kTfLiteResource ||
tensorIsInput(input_tensor_index))
continue;
auto it = tensor_to_last_op_index_.find(input_tensor_index);
if (it != tensor_to_last_op_index_.end() && it->second == node_index) {
if (input_tensor->data.raw) {
TfLiteTensorDataFree(input_tensor);
}
}
}
}

} // namespace tflite
25 changes: 25 additions & 0 deletions tensorflow/lite/core/subgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,16 @@ class Subgraph {
// information about tenosrs and ops.
void DumpMemoryPlannerDebugInfo() const;

// WARNING: This is an experimental API and subject to change.
// Force all intermediate dynamic tensors to be released once they are not
// used by the model. Please use this configuration with caution, since it
// might reduce the peak memory usage of the model at the cost of a slower
// inference speed. This API needs to be called before calling
// `AllocateTensors`.
void EnsureDynamicTensorsAreReleased() {
release_dynamic_tensors_if_unused_ = true;
}

// WARNING: This is an experimental API and subject to change.
// Remove unused inputs of the subgraph. It checks usage of inputs and mark it
// as kTfLiteOptionalTensor if the input is not used in graph execution.
Expand Down Expand Up @@ -679,6 +689,14 @@ class Subgraph {
// Also sets relevant fields on context_ based on known metadata.
TfLiteStatus SetMetadata(const std::map<std::string, std::string>* metadata);

// Initializes the mapping between tensor index to the index of the
// last operation that uses the tensor as input.
void InitializeTensorReleaseMap();

// Checks the options for releasing dynamic tensors and release dynamic
// tensors if configured.
void MaybeReleaseDynamicInputs(const TfLiteNode& node, size_t node_index);

// The state of the Interpreter.
enum State {
// The interpreter isn't ready to be invoked.
Expand Down Expand Up @@ -834,6 +852,13 @@ class Subgraph {

// Model-metadata owned by the Interpreter.
const std::map<std::string, std::string>* metadata_ = nullptr;

// Release dynamic tensor's memory once they are not used by the graph.
bool release_dynamic_tensors_if_unused_ = false;

// Mapping between tensor index to the last index of the execution plan that
// uses this tensor.
std::map<int, int> tensor_to_last_op_index_;
};

} // namespace tflite
Expand Down
96 changes: 94 additions & 2 deletions tensorflow/lite/delegates/delegate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -874,9 +874,9 @@ class TestDelegateWithDynamicTensors : public ::testing::Test {
TfLiteIntArray* execution_plan;
TF_LITE_ENSURE_STATUS(
context->GetExecutionPlan(context, &execution_plan));
context->ReplaceNodeSubsetsWithDelegateKernels(
TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
context, DelegateRegistration(), execution_plan, delegate);
return kTfLiteOk;
return status;
};
delegate_.flags = kTfLiteDelegateFlagsNone;
}
Expand Down Expand Up @@ -993,6 +993,98 @@ TEST_F(TestDelegateWithDynamicTensors, ShapePropagation_FlagNotSet) {
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteError);
}

class TestReleaseDynamicTensorWithDelegate : public ::testing::Test {
protected:
void SetUp() override {
interpreter_.reset(new Interpreter);

interpreter_->AddTensors(3);
interpreter_->SetInputs({0});
interpreter_->SetOutputs({2});
TfLiteQuantizationParams quant;
interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3},
quant);
interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3},
quant);
TfLiteRegistration reg = DynamicCopyOpRegistration();
interpreter_->AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, &reg);
interpreter_->AddNodeWithParameters({1}, {2}, nullptr, 0, nullptr, &reg);

delegate_.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
TfLiteIntArray* execution_plan;
TF_LITE_ENSURE_STATUS(
context->GetExecutionPlan(context, &execution_plan));
// Only replace the second execution node with delegate.
TfLiteIntArray* nodes_to_replace = TfLiteIntArrayCreate(1);
nodes_to_replace->data[0] = execution_plan->data[1];
TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
context, DelegateRegistration(), nodes_to_replace, delegate);
TfLiteIntArrayFree(nodes_to_replace);
return status;
};
delegate_.flags = kTfLiteDelegateFlagsNone;
}

static TfLiteRegistration DynamicCopyOpRegistration() {
TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};

reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Output is dynamic and has the same size as input.
TfLiteTensor* output;
TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
SetTensorToDynamic(output);
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
TfLiteTensorRealloc(input->bytes, output);
return kTfLiteOk;
};

reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
// Not implemented since this isn't required in testing.
return kTfLiteOk;
};
return reg;
}

static TfLiteRegistration DelegateRegistration() {
TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr};

reg.prepare = [](TfLiteContext* context, TfLiteNode* node) {
// Check that input is dynamic.
const TfLiteTensor* input;
TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
TF_LITE_ENSURE(context, IsDynamicTensor(input));
return kTfLiteOk;
};
reg.invoke = [](TfLiteContext* context, TfLiteNode* node) {
// Not implemented since this isn't required in testing.
return kTfLiteOk;
};
return reg;
}

std::unique_ptr<Interpreter> interpreter_;
TfLiteDelegate delegate_;
};

TEST_F(TestReleaseDynamicTensorWithDelegate, ShapePropagation_FlagNotSet) {
delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors;
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
ASSERT_EQ(interpreter_->ModifyGraphWithDelegate(&delegate_), kTfLiteOk);

ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
ASSERT_NE(interpreter_->tensor(1)->data.raw, nullptr);

interpreter_->EnsureDynamicTensorsAreReleased();
ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
ASSERT_EQ(interpreter_->Invoke(), kTfLiteOk);
ASSERT_EQ(interpreter_->tensor(1)->data.raw, nullptr);
}

// Tests for FP16 graphs
// =====================

Expand Down
8 changes: 8 additions & 0 deletions tensorflow/lite/interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,14 @@ class Interpreter {
/// WARNING: Experimental interface, subject to change
TfLiteStatus ReleaseNonPersistentMemory();

/// WARNING: This is an experimental API and subject to change.
/// Force all intermediate dynamic tensors to be released once they are not
/// used by the model. Please use this configuration with caution, since it
/// might reduce the peak memory usage of the model at the cost of a slower
/// inference speed. `AllocateTensors` needs to be called right after this
/// API.
void EnsureDynamicTensorsAreReleased();

// Update allocations for all tensors. This will redim dependent tensors
// using the input tensor dimensionality as given. This is relatively
// expensive. This *must be* called after the interpreter has been created
Expand Down
6 changes: 6 additions & 0 deletions tensorflow/lite/interpreter_experimental.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ TfLiteStatus Interpreter::ReleaseNonPersistentMemory() {
return primary_subgraph().ReleaseNonPersistentMemory();
}

void Interpreter::EnsureDynamicTensorsAreReleased() {
for (auto& subgraph : subgraphs_) {
subgraph->EnsureDynamicTensorsAreReleased();
}
}

TfLiteStatus Interpreter::ResetVariableTensors() {
for (auto& subgraph : subgraphs_) {
TF_LITE_ENSURE_STATUS(subgraph->ResetVariableTensors());
Expand Down
51 changes: 51 additions & 0 deletions tensorflow/lite/interpreter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1056,6 +1056,57 @@ TEST(BasicInterpreter, DynamicTensorsResizeDescendants) {
ASSERT_EQ(interpreter.tensor(3)->bytes, sizeof(float) * 10 * 14);
}

TEST(BasicInterpreter, ReleaseDynamicTensors) {
// Assemble a graph with a node that has dynamically sized output (via the
// pad op), followed by a node with a standard element-wise op (negate).
Interpreter interpreter;
interpreter.AddTensors(4);
interpreter.SetInputs({0, 1});
interpreter.SetOutputs({3});
TfLiteQuantizationParams quant;
interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0,
/*type=*/kTfLiteFloat32, /*name=*/"",
/*dims=*/{2, 2, 1, 1},
/*quantization=*/quant);
interpreter.SetTensorParametersReadWrite(
/*tensor_index=*/1, /*type=*/kTfLiteInt32, /*name=*/"", /*dims=*/{4, 2},
/*quantization=*/quant);
interpreter.SetTensorParametersReadWrite(/*tensor_index=*/2,
/*type=*/kTfLiteFloat32, /*name=*/"",
/*dims=*/{}, /*quantization=*/quant);
interpreter.SetTensorParametersReadWrite(/*tensor_index=*/3,
/*type=*/kTfLiteFloat32, /*name=*/"",
/*dims=*/{}, /*quantization=*/quant);

TfLiteRegistration* pad_op = tflite::ops::builtin::Register_PADV2();
TfLiteRegistration* neg_op = tflite::ops::builtin::Register_NEG();
interpreter.AddNodeWithParameters(
/*inputs=*/{0, 1}, /*outputs=*/{2}, /*init_data=*/nullptr,
/*init_data_size=*/0, /*builtin_data=*/nullptr, /*registration=*/pad_op);
interpreter.AddNodeWithParameters(
/*inputs=*/{2}, /*outputs=*/{3}, /*init_data=*/nullptr,
/*init_data_size=*/0, /*builtin_data=*/nullptr, /*registration=*/neg_op);
ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk);

// Configure [[2,2],[4,4]] padding and execute the graph.
const std::vector<int> padding = {2, 2, 2, 2, 0, 0, 0, 0};
int* tensor_value = interpreter.typed_tensor<int>(1);
for (int i = 0; i < padding.size(); ++i) {
tensor_value[i] = padding[i];
}

// Invoke without calling `EnsureDynamicTensorsAreReleased`.
ASSERT_EQ(interpreter.Invoke(), kTfLiteOk);
ASSERT_NE(interpreter.tensor(2)->data.raw, nullptr);

interpreter.EnsureDynamicTensorsAreReleased();
ASSERT_EQ(interpreter.Invoke(), kTfLiteOk);

// Check that the intermediate dynamic tensor's memory is released.
ASSERT_EQ(interpreter.tensor(2)->data.raw, nullptr);
ASSERT_EQ(interpreter.tensor(3)->bytes, sizeof(float) * 6 * 6);
}

TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) {
Interpreter interpreter;
ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity),
Expand Down

0 comments on commit d24b2e9

Please sign in to comment.