Skip to content

Commit

Permalink
[Backend & Serving] Serving and Runtime support Clone (PaddlePaddle#464)
Browse files Browse the repository at this point in the history
* Add Serving and Runtime use Clone

* support TRT, OpenVINO and Paddle Backend

Co-authored-by: Jason <[email protected]>
  • Loading branch information
heliqi and jiangjiajun authored Nov 4, 2022
1 parent 61634ca commit 277bec3
Show file tree
Hide file tree
Showing 13 changed files with 341 additions and 148 deletions.
6 changes: 6 additions & 0 deletions fastdeploy/backends/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include "fastdeploy/backends/common/multiclass_nms.h"
#include "fastdeploy/core/fd_tensor.h"
#include "fastdeploy/core/fd_type.h"

namespace fastdeploy {

Expand Down Expand Up @@ -63,6 +64,11 @@ class BaseBackend {
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
virtual bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) = 0;
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) {
FDERROR << "Clone no support" << std::endl;
return nullptr;
}
};

} // namespace fastdeploy
45 changes: 37 additions & 8 deletions fastdeploy/backends/openvino/ov_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ ov::element::Type FDDataTypeToOV(const FDDataType& type) {
return ov::element::f32;
}

ov::Core OpenVINOBackend::core_;

void OpenVINOBackend::InitTensorInfo(
const std::vector<ov::Output<ov::Node>>& ov_outputs,
std::map<std::string, TensorInfo>* tensor_infos) {
Expand All @@ -96,10 +98,6 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
return false;
}
option_ = option;
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}

std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);

Expand Down Expand Up @@ -149,7 +147,19 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
output_infos_.push_back(iter->second);
}

ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
if (option_.ov_num_streams == -1) {
properties["NUM_STREAMS"] = ov::streams::AUTO;
} else if (option_.ov_num_streams == -2) {
properties["NUM_STREAMS"] = ov::streams::NUMA;
} else if (option_.ov_num_streams > 0) {
properties["NUM_STREAMS"] = option_.ov_num_streams;
}
compiled_model_ = core_.compile_model(model, "CPU", properties);

request_ = compiled_model_.create_infer_request();
initialized_ = true;
return true;
Expand Down Expand Up @@ -185,10 +195,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
return false;
}
option_ = option;
ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}

std::shared_ptr<ov::Model> model = core_.read_model(model_file);

Expand Down Expand Up @@ -238,8 +244,21 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
output_infos_.push_back(iter->second);
}

ov::AnyMap properties;
if (option_.cpu_thread_num > 0) {
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
}
if (option_.ov_num_streams == -1) {
properties["NUM_STREAMS"] = ov::streams::AUTO;
} else if (option_.ov_num_streams == -2) {
properties["NUM_STREAMS"] = ov::streams::NUMA;
} else if (option_.ov_num_streams > 0) {
properties["NUM_STREAMS"] = option_.ov_num_streams;
}
compiled_model_ = core_.compile_model(model, "CPU", properties);

request_ = compiled_model_.create_infer_request();

initialized_ = true;
return true;
}
Expand Down Expand Up @@ -281,4 +300,14 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}

std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void *stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<OpenVINOBackend>();
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
casted_backend->option_ = option_;
casted_backend->request_ = compiled_model_.create_infer_request();
casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
casted_backend->output_infos_.assign(output_infos_.begin(), output_infos_.end());
return new_backend;
}

} // namespace fastdeploy
10 changes: 8 additions & 2 deletions fastdeploy/backends/openvino/ov_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@
#include <vector>

#include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/unique_ptr.h"
#include "openvino/openvino.hpp"

namespace fastdeploy {

struct OpenVINOBackendOption {
int cpu_thread_num = 8;
int cpu_thread_num = -1;
int ov_num_streams = 1;
std::map<std::string, std::vector<int64_t>> shape_infos;
};

class OpenVINOBackend : public BaseBackend {
public:
static ov::Core core_;
OpenVINOBackend() {}
virtual ~OpenVINOBackend() = default;

Expand All @@ -54,10 +57,13 @@ class OpenVINOBackend : public BaseBackend {
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;

std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) override;

private:
void InitTensorInfo(const std::vector<ov::Output<ov::Node>>& ov_outputs,
std::map<std::string, TensorInfo>* tensor_infos);
ov::Core core_;

ov::CompiledModel compiled_model_;
ov::InferRequest request_;
OpenVINOBackendOption option_;
Expand Down
24 changes: 24 additions & 0 deletions fastdeploy/backends/paddle/paddle_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,30 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
return true;
}

std::unique_ptr<BaseBackend> PaddleBackend::Clone(void *stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<PaddleBackend>();
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
if(device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
auto clone_option = option_;
clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream;
casted_backend->InitFromPaddle(clone_option.model_file,
clone_option.params_file,
clone_option);
FDWARNING << "The target device id:"
<< device_id
<< " is different from current device id:"
<< option_.gpu_id
<< ", cannot share memory with current engine."
<< std::endl;
return new_backend;
}
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
casted_backend->predictor_ = std::move(predictor_->Clone(stream));
return new_backend;
}

#ifdef ENABLE_TRT_BACKEND
void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
std::map<std::string, std::vector<int>> max_shape;
Expand Down
7 changes: 7 additions & 0 deletions fastdeploy/backends/paddle/paddle_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "paddle2onnx/converter.h"
#endif
#include "paddle_inference_api.h" // NOLINT
#include "fastdeploy/utils/unique_ptr.h"

#ifdef ENABLE_TRT_BACKEND
#include "fastdeploy/backends/tensorrt/trt_backend.h"
Expand All @@ -43,6 +44,9 @@ struct IpuOption {
};

struct PaddleBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty

#ifdef WITH_GPU
bool use_gpu = true;
#else
Expand Down Expand Up @@ -110,6 +114,9 @@ class PaddleBackend : public BaseBackend {

int NumOutputs() const override { return outputs_desc_.size(); }

std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) override;

TensorInfo GetInputInfo(int index) override;
TensorInfo GetOutputInfo(int index) override;
std::vector<TensorInfo> GetInputInfos() override;
Expand Down
55 changes: 53 additions & 2 deletions fastdeploy/backends/tensorrt/trt_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
BuildTrtEngine();
}

cudaSetDevice(option_.gpu_id);
SetInputs(inputs);
AllocateOutputsBuffer(outputs);

Expand Down Expand Up @@ -356,13 +357,17 @@ void TrtBackend::GetInputOutputInfo() {
outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
casted_output_tensors_[name] = FDTensor();
}
io_name_index_[name] = i;
}
bindings_.resize(num_binds);
}

void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
for (const auto& item : inputs) {
auto idx = engine_->getBindingIndex(item.name.c_str());
// auto idx = engine_->getBindingIndex(item.name.c_str());
auto iter = io_name_index_.find(item.name);
FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str());
auto idx = iter->second;
std::vector<int> shape(item.shape.begin(), item.shape.end());
auto dims = ToDims(shape);
context_->setBindingDimensions(idx, dims);
Expand Down Expand Up @@ -410,7 +415,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs) {
outputs->resize(outputs_desc_.size());
}
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
// auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str());
auto idx = idx_iter->second;
auto output_dims = context_->getBindingDimensions(idx);

// find the original index of output
Expand Down Expand Up @@ -673,4 +681,47 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
return infos;
}

std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
if(device_id > 0 && device_id != option_.gpu_id) {
auto clone_option = option_;
clone_option.gpu_id = device_id;
clone_option.external_stream_ = stream;
if (option_.model_format == ModelFormat::ONNX) {
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
"Clone model from ONNX failed while initialize TrtBackend.");
} else {
FDASSERT(casted_backend->InitFromPaddle(option_.model_file,
option_.params_file, clone_option),
"Clone model from Paddle failed while initialize TrtBackend.");
}
FDWARNING << "The target device id:"
<< device_id
<< " is different from current device id:"
<< option_.gpu_id
<< ", cannot share memory with current engine."
<< std::endl;
return new_backend;
}
cudaSetDevice(option_.gpu_id);
casted_backend->option_.gpu_id = option_.gpu_id;
if (stream) {
casted_backend->stream_ = reinterpret_cast<cudaStream_t>(stream);
} else {
FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0,
"[ERROR] Error occurs while clone calling cudaStreamCreate().");
}
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end());
casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end());
casted_backend->engine_ = engine_;
casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
casted_backend->engine_->createExecutionContext());
casted_backend->GetInputOutputInfo();
FDINFO << "TRTBackend clone finish." << std::endl;
return new_backend;
}

} // namespace fastdeploy
11 changes: 10 additions & 1 deletion fastdeploy/backends/tensorrt/trt_backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "NvOnnxParser.h"
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/tensorrt/utils.h"
#include "fastdeploy/utils/unique_ptr.h"

class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
public:
Expand All @@ -45,7 +46,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {

void writeCalibrationCache(const void* cache,
size_t length) noexcept override {
std::cout << "NOT IMPLEMENT." << std::endl;
fastdeploy::FDERROR << "NOT IMPLEMENT." << std::endl;
}

private:
Expand All @@ -62,6 +63,11 @@ struct TrtValueInfo {
};

struct TrtBackendOption {
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
// format of input model
ModelFormat model_format = ModelFormat::AUTOREC;

int gpu_id = 0;
bool enable_fp16 = false;
bool enable_int8 = false;
Expand Down Expand Up @@ -99,6 +105,8 @@ class TrtBackend : public BaseBackend {
TensorInfo GetOutputInfo(int index);
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
int device_id = -1) override;

~TrtBackend() {
if (parser_) {
Expand All @@ -119,6 +127,7 @@ class TrtBackend : public BaseBackend {
std::vector<TrtValueInfo> outputs_desc_;
std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
std::map<std::string, int> io_name_index_;

std::string calibration_str_;

Expand Down
27 changes: 27 additions & 0 deletions fastdeploy/core/fd_type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,31 @@ const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
template <>
const FDDataType TypeToDataType<int8_t>::dtype = INT8;

std::string Str(const ModelFormat& f) {
if (f == ModelFormat::PADDLE) {
return "ModelFormat::PADDLE";
} else if (f == ModelFormat::ONNX) {
return "ModelFormat::ONNX";
}else if (f == ModelFormat::RKNN) {
return "ModelFormat::RKNN";
} else if (f == ModelFormat::TORCHSCRIPT) {
return "ModelFormat::TORCHSCRIPT";
}
return "UNKNOWN-ModelFormat";
}

std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
if (format == ModelFormat::PADDLE) {
out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX";
} else if (format == ModelFormat::RKNN) {
out << "ModelFormat::RKNN";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
}
out << "UNKNOWN-ModelFormat";
return out;
}

} // namespace fastdeploy
12 changes: 12 additions & 0 deletions fastdeploy/core/fd_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,16 @@ struct FASTDEPLOY_DECL TypeToDataType {
static const FDDataType dtype;
};

/*! Deep learning model format */
enum ModelFormat {
AUTOREC, ///< Auto recognize the model format by model file name
PADDLE, ///< Model with paddlepaddle format
ONNX, ///< Model with ONNX format
RKNN, ///< Model with RKNN format
TORCHSCRIPT, ///< Model with TorchScript format
};

FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
const ModelFormat& format);

} // namespace fastdeploy
Loading

0 comments on commit 277bec3

Please sign in to comment.