Skip to content

Commit

Permalink
[onert] Fix deallocation bug (Samsung#6271)
Browse files Browse the repository at this point in the history
* [onert] Fix deallocation bug

Fix deallocation bug that was introduced by Samsung#5926 .

As there was no way to deallocate a dynamic tensor memory of tensors
defined by other backends, dynamic tensor deallocation part needs to
be changed.

- Do not deallocate in FunctionSequence
- Introduce `ITensor::deallocBuffer` which deallocates the buffer
- Introduce `DeallocFunction` class for deallocation of tensors that
  are no longer used(only for Linear Executor)
- `IDynamicTensorManager`'s method `planDealloc` and `deallocInput` are
  no longer used

Plus, it now deallocates all tensors correctly, even between backend
boundaries.

ONE-DCO-1.0-Signed-off-by: Hanjoung Lee <[email protected]>

* Update runtime/onert/core/src/exec/DynamicShapeInferer.cc

Co-authored-by: Hyun Sik Yoon <[email protected]>

Co-authored-by: Hyun Sik Yoon <[email protected]>
  • Loading branch information
wateret and hyunsik-yoon authored Mar 17, 2021
1 parent 47461ee commit 44ae2a8
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 20 deletions.
6 changes: 6 additions & 0 deletions runtime/onert/core/include/backend/ITensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ class ITensor
throw std::runtime_error("This backend does not support dynamic tensor");
}

/// @brief Dealloc the buffer (only for dynamic tensors)
virtual void deallocBuffer()
{
throw std::runtime_error("This backend does not support resetting buffer");
}

/**
* @brief Set the shape of tenser to new_shape
* @note Higer dimension will be placed on front.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,6 @@ template <typename T_BackendContext> void planTensors(const T_BackendContext &ct
{
// plan for deallocation of static tensornode
tensor_builder->notifyLastUse(ind);

// plan for deallocation of dynamic tensor
auto dyn_tensor_manager = tensor_builder->dynamicTensorManager();
auto *tensor = ctx.tensor_registry->getITensor(ind);
assert(tensor);
dyn_tensor_manager->planDealloc(op_ind, tensor);
}
}
}
Expand Down
9 changes: 2 additions & 7 deletions runtime/onert/core/include/backend/cpu_common/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,9 @@ class Tensor : public IPortableTensor
}

/**
* @brief Mark this tensor does not have memory.
* Real memory deallocation should be done by caller.
* @brief Reset the buffer and deallocate the allocation if it is managed by itself
*/
void resetBuffer()
{
_allocator.reset();
_buffer = nullptr;
}
void deallocBuffer() override;

public:
uint8_t *buffer() const override { return _buffer; }
Expand Down
13 changes: 13 additions & 0 deletions runtime/onert/core/include/exec/DynamicShapeInferer.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,19 @@ class DynamicShapeInferer : public ir::OperationVisitor
*/
void handleSimpleUnaryOp(const ir::Operation &op, const ir::OperandIndex input_idx);

// in case of output tensor of an op, it is possible that
// the output became dynamic although it had been static before.
// Once a tensor becomes dynamic, it will lost memory allocated for static.
// Therefore once output is dynamic, it should be treated as dynamic tensor. (memory should be
// allocated at runtime) `previously` means `dynamic` or `static` has been set in previous loop in
// WHILE of previous call of `nnfw_run()`
bool previously_static(backend::ITensor *op_output) { return !op_output->is_dynamic(); }

// helper function that check if op's input is static
// Note that input of n'th op has been set to static or dynamic by (n-1)th op.
// That's why it is called `currently_static`
bool currently_static(backend::ITensor *op_input) { return !op_input->is_dynamic(); }

private:
/**
* @brief To get operand-level info, e.g., ir::Operand::isConstant()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void DynamicTensorManager::deallocInput(ir::OperationIndex op_ind)
_dynamic_mem_mgr->deallocate(tensor);

auto *cpu_tensor = nnfw::misc::polymorphic_downcast<cpu_common::Tensor *>(tensor);
cpu_tensor->resetBuffer();
cpu_tensor->deallocBuffer();

VERBOSE(DynamicTensorManager) << "Deallocating tensor " << (void *)cpu_tensor
<< " (input of op_ind: " << op_ind << ")" << std::endl;
Expand Down
13 changes: 13 additions & 0 deletions runtime/onert/core/src/backend/cpu_common/Tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ bool Tensor::applyShape(const ir::Shape &new_shape)

ir::Shape Tensor::getShape() const { return _info.shape(); }

void Tensor::deallocBuffer()
{
if (_allocator)
{
_buffer = nullptr;
_allocator.reset();
if (_dynamic_mem_mgr)
{
_dynamic_mem_mgr->deallocate(this);
}
}
}

} // namespace cpu_common
} // namespace backend
} // namespace onert
Expand Down
78 changes: 78 additions & 0 deletions runtime/onert/core/src/compiler/ExecutorFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,27 @@ class SyncFunction final : public exec::IFunction
std::shared_ptr<backend::IConfig> _config;
};

using DeallocList = std::vector<backend::ITensor *>;
// Deallocation after execution of an operation used by Linear Executor
class DeallocFunction final : public exec::IFunction
{
public:
DeallocFunction(const DeallocList &tensors) : _dealloc_list{tensors} {}

void run() override
{
for (auto tensor : _dealloc_list)
{
if (!tensor->is_dynamic())
continue;
tensor->deallocBuffer();
}
}

private:
DeallocList _dealloc_list;
};

void initializeSubgraphIOTensors(compiler::LoweredGraph &lowered_graph,
const backend::BackendContexts &backend_contexts,
const ir::OperandIndexSequence &indices)
Expand Down Expand Up @@ -267,6 +288,8 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
const compiler::CompilerOptions &options,
const std::shared_ptr<exec::ExecutorMap> &executor_map)
{
auto graph = lowered_graph->graph();

backend::BackendContexts backend_contexts =
createBackendContexts(*lowered_graph, options.executor == "Linear");

Expand Down Expand Up @@ -316,6 +339,59 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
ordered_contexts.emplace_front(pair.first, pair.second.get());
}

// Simulate the execution for deallocation of tensors
std::unordered_map<ir::OperationIndex, DeallocList> dealloc_list_map;
{
ir::OperandIndexMap<uint32_t> uses_map;
ir::OperandIndexSequence constants;

auto model_io =
(graph.getInputs() + graph.getOutputs()) | ir::Remove::UNDEFINED | ir::Remove::DUPLICATED;

// Prepare scanning
graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
uses_map[ind] = obj.getUses().size();

if (obj.isConstant())
constants.append(ind);
});

// A trick to consider constants as an execption
for (const auto &ind : constants)
{
uses_map[ind]++;
}

for (const auto op_ind : order)
{
const auto &op = graph.operations().at(op_ind);
auto op_inputs = op.getInputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;
auto op_outputs = op.getOutputs() | ir::Remove::DUPLICATED | ir::Remove::UNDEFINED;

for (const auto &ind : op_inputs)
{
const auto &operand = graph.operands().at(ind);
assert(uses_map.find(ind) != uses_map.end());
assert(uses_map[ind] > 0);
uses_map[ind]--;
if (uses_map[ind] == 0 && !operand.info().isVariable() && !model_io.contains(ind))
{
dealloc_list_map[op_ind].emplace_back(tensor_regs.getITensor(ind));
}
}
}

// Dispose and validate
for (const auto &ind : constants)
{
--uses_map[ind];
}

assert(
std::all_of(uses_map.begin(), uses_map.end(),
[](std::pair<const ir::OperandIndex, uint32_t> it) { return it.second == 0; }));
}

// Generate kernels
for (auto &pair : ordered_contexts)
{
Expand All @@ -328,6 +404,8 @@ ExecutorFactory::createLinearExecutor(std::unique_ptr<compiler::LoweredGraph> lo
auto lower_info = lowered_graph->lower_info().operation.getRawPtr(op_ind);
if (options.he_profiling_mode)
fn_seq->wrap<SyncFunction>(lower_info->backend()->config());
if (!dealloc_list_map[op_ind].empty())
fn_seq->append(std::make_unique<DeallocFunction>(dealloc_list_map[op_ind]));
builder.append(op_ind, {op_ind, &op, lower_info, std::move(fn_seq)});
}
}
Expand Down
6 changes: 3 additions & 3 deletions runtime/onert/core/src/exec/DynamicShapeInferer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ void DynamicShapeInferer::handleBinaryArithmeticOp(const ir::Operation &op,
So, only when all inputs are static, we can skip dynamic shape inference.
*/
if ((!lhs->is_dynamic()) && (!rhs->is_dynamic()))
return;

auto output_idx = op.getOutputs().at(0);
auto output = _tensor_registry->getITensor(output_idx);

if ((currently_static(lhs) && currently_static(rhs)) && previously_static(output))
return;

ir::Shape new_shape = shape_inference::inferEltwiseShape(lhs_shape, rhs_shape);

output->applyShape(new_shape);
Expand Down
3 changes: 0 additions & 3 deletions runtime/onert/core/src/exec/FunctionSequence.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ void FunctionSequence::run()
// run kernel
function->run();
}

// deallocate input tensors which are no longer used
_dynamic_tensor_ctx->dynamic_tensor_manager->deallocInput(op_ind);
}
else
{
Expand Down

0 comments on commit 44ae2a8

Please sign in to comment.