Skip to content

Commit

Permalink
[Benchmark] Add pure runtime benchmark bin (PaddlePaddle#1731)
Browse files Browse the repository at this point in the history
* [Benchmark] Add pure runtime benchmark bin

* [Benchmark] add cpu/gpu memory collect -> benchmark bin

* [Backend] update trt max_batch_size policy

* [backend] Update trt backend max_batch_size policy

* [Benchmark] Add more model format support -> benchmark bin
  • Loading branch information
DefTruth authored Mar 30, 2023
1 parent f4a7008 commit 7d1b706
Show file tree
Hide file tree
Showing 5 changed files with 351 additions and 2 deletions.
4 changes: 4 additions & 0 deletions benchmark/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)

add_executable(benchmark ${PROJECT_SOURCE_DIR}/benchmark.cc)

if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags pthread)
Expand All @@ -38,6 +40,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags pthread)
else()
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolov5 ${FASTDEPLOY_LIBS} gflags)
Expand All @@ -53,6 +56,7 @@ else()
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark ${FASTDEPLOY_LIBS} gflags)
endif()
# only for Android ADB test
if(ANDROID)
Expand Down
295 changes: 295 additions & 0 deletions benchmark/cpp/benchmark.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/function/functions.h"
#include "flags.h"
#include "macros.h"
#include "option.h"

namespace vision = fastdeploy::vision;
namespace benchmark = fastdeploy::benchmark;

DEFINE_string(shapes, "1,3,224,224", "Set input shape for model.");
DEFINE_string(names, "DEFAULT", "Set input names for model.");
DEFINE_string(dtypes, "FP32", "Set input dtypes for model.");
DEFINE_string(trt_shapes, "1,3,224,224:1,3,224,224:1,3,224,224",
"Set min/opt/max shape for trt/paddle_trt backend."
"eg:--trt_shape 1,3,224,224:1,3,224,224:1,3,224,224");
DEFINE_int32(batch, 1, "trt max batch size, default=1");
DEFINE_bool(dump, false, "whether to dump output tensors.");
DEFINE_bool(info, false, "only check the input infos of model");
DEFINE_bool(diff, false, "check the diff between two tensors.");
DEFINE_string(tensors, "tensor_a.txt:tensor_b.txt",
"The paths to dumped tensors.");
DEFINE_bool(mem, false, "Whether to force to collect memory info.");
DEFINE_int32(interval, -1, "Sampling interval for collect memory info.");
DEFINE_string(model_file, "UNKNOWN", "Optional, set specific model file,"
"eg, model.pdmodel, model.onnx");
DEFINE_string(params_file, "", "Optional, set specific params file,"
"eg, model.pdiparams.");
DEFINE_string(model_format, "PADDLE", "Optional, set specific model format,"
"eg, PADDLE/ONNX/RKNN/TORCHSCRIPT/SOPHGO");

static std::vector<int64_t> GetInt64Shape(const std::vector<int>& shape) {
std::vector<int64_t> new_shape;
new_shape.resize(shape.size());
for (int i = 0; i < shape.size(); ++i) {
new_shape[i] = static_cast<int64_t>(shape[i]);
}
return new_shape;
}

static fastdeploy::ModelFormat GetModelFormat(const std::string& model_format) {
if (model_format == "PADDLE") {
return fastdeploy::ModelFormat::PADDLE;
} else if (model_format == "ONNX") {
return fastdeploy::ModelFormat::ONNX;
} else if (model_format == "RKNN") {
return fastdeploy::ModelFormat::RKNN;
} else if (model_format == "TORCHSCRIPT") {
return fastdeploy::ModelFormat::TORCHSCRIPT;
} else if (model_format == "SOPHGO") {
return fastdeploy::ModelFormat::SOPHGO;
} else {
return fastdeploy::ModelFormat::PADDLE;
}
}

static void CheckTensorDiff(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
std::cout << "Check tensor diff ..." << std::endl;
std::vector<std::string> tensor_paths =
benchmark::ResultManager::SplitStr(FLAGS_tensors);
assert(tensor_paths.size() == 2);
fastdeploy::FDTensor tensor_a, tensor_b;
benchmark::ResultManager::LoadFDTensor(&tensor_a, tensor_paths[0]);
benchmark::ResultManager::LoadFDTensor(&tensor_b, tensor_paths[1]);
auto tensor_diff =
benchmark::ResultManager::CalculateDiffStatis(tensor_a, tensor_b);
std::cout << "Tensor diff: mean=" << tensor_diff.data.mean
<< ", max=" << tensor_diff.data.max
<< ", min=" << tensor_diff.data.min << std::endl;
}

static void RuntimeProfiling(int argc, char* argv[]) {
// Init runtime option
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return;
}
std::unordered_map<std::string, std::string> config_info;
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
&config_info);

// Init log recorder
std::stringstream ss;
ss.precision(6);

// Memory resource moniter
int sampling_interval = FLAGS_interval >= 1
? FLAGS_interval
: std::stoi(config_info["sampling_interval"]);

benchmark::ResourceUsageMonitor resource_moniter(
sampling_interval, std::stoi(config_info["device_id"]));

// Check model path and model format
std::string model_name, params_name, config_name;
std::string model_file, params_file;
auto model_format = fastdeploy::ModelFormat::PADDLE;
if (FLAGS_model_file != "UNKNOWN") {
// Set model file/param/format via command line
model_file = FLAGS_model + sep + FLAGS_model_file;
params_file = FLAGS_model + sep + FLAGS_params_file;
model_format = GetModelFormat(FLAGS_model_format);
if (model_format == fastdeploy::ModelFormat::PADDLE
&& FLAGS_params_file == "") {
std::cout << "[ERROR] params_file can not be empty for PADDLE"
<< " format, Please, set your custom params_file manually."
<< std::endl;
return;
}
} else {
// Set model file/param/format via model dir (only support
// for Paddle model format now)
if (!UpdateModelResourceName(&model_name, &params_name, &config_name,
&model_format, config_info, false)) {
return;
}
model_file = FLAGS_model + sep + model_name;
params_file = FLAGS_model + sep + params_name;
}

option.SetModelPath(model_file, params_file, model_format);

// Get input shapes/names/dtypes
std::vector<std::vector<int32_t>> input_shapes =
benchmark::ResultManager::GetInputShapes(FLAGS_shapes);
std::vector<std::string> input_names =
benchmark::ResultManager::GetInputNames(FLAGS_names);
std::vector<fastdeploy::FDDataType> input_dtypes =
benchmark::ResultManager::GetInputDtypes(FLAGS_dtypes);

// Set tensorrt shapes
if (config_info["backend"] == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
if (config_info["backend"] == "paddle_trt" ||
config_info["backend"] == "trt") {
option.trt_option.max_batch_size = FLAGS_batch;
std::vector<std::vector<int32_t>> trt_shapes =
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shapes);
if (input_names[0] == "DEFAULT") {
std::cout << "Please set the input names for TRT/Paddle-TRT backend!"
<< std::endl;
return;
}
assert(input_names.size() == (trt_shapes.size() / 3));
for (int i = 0; i < input_shapes.size(); ++i) {
option.trt_option.SetShape(input_names[i], trt_shapes[i * 3],
trt_shapes[i * 3 + 1], trt_shapes[i * 3 + 2]);
}
}

// Init runtime
fastdeploy::Runtime runtime;
if (!runtime.Init(option)) {
std::cout << "Initial Runtime failed!" << std::endl;
}

// Check default input names
if (input_names[0] == "DEFAULT") {
input_names.clear();
for (int i = 0; i < runtime.NumInputs(); ++i) {
input_names.push_back(runtime.GetInputInfo(i).name);
}
}

assert(runtime.NumInputs() == input_shapes.size());
assert(runtime.NumInputs() == input_names.size());
assert(runtime.NumInputs() == input_dtypes.size());

// Feed inputs, all values set as 1.
std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
for (int i = 0; i < inputs.size(); ++i) {
fastdeploy::function::Full(1, GetInt64Shape(input_shapes[i]), &inputs[i],
input_dtypes[i]);
inputs[i].name = input_names[i];
}

// Start memory resource moniter
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
resource_moniter.Start();
}

// Run runtime profiling
std::vector<fastdeploy::FDTensor> outputs;
if (!runtime.Infer(inputs, &outputs)) {
std::cerr << "Failed to predict." << std::endl;
ss << "Runtime(ms): Failed" << std::endl;
if (config_info["collect_memory_info"] == "true") {
ss << "cpu_rss_mb: Failed" << std::endl;
ss << "gpu_rss_mb: Failed" << std::endl;
ss << "gpu_util: Failed" << std::endl;
resource_moniter.Stop();
}
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
config_info["result_path"]);
return;
}

double profile_time = runtime.GetProfileTime() * 1000.0;
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
ss << "Runtime(ms): " << profile_time << "ms." << std::endl;

// Collect memory info
if (config_info["collect_memory_info"] == "true" || FLAGS_mem) {
float cpu_mem = resource_moniter.GetMaxCpuMem();
float gpu_mem = resource_moniter.GetMaxGpuMem();
float gpu_util = resource_moniter.GetMaxGpuUtil();
std::cout << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
ss << "cpu_rss_mb: " << cpu_mem << "MB." << std::endl;
std::cout << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
ss << "gpu_rss_mb: " << gpu_mem << "MB." << std::endl;
std::cout << "gpu_util: " << gpu_util << std::endl;
ss << "gpu_util: " << gpu_util << "MB." << std::endl;
resource_moniter.Stop();
}
benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
config_info["result_path"]);

// Dump output tensors
if (FLAGS_dump) {
for (int i = 0; i < outputs.size(); ++i) {
auto name_tokens =
benchmark::ResultManager::SplitStr(outputs[i].name, '/');
std::string out_name = name_tokens[0];
for (int j = 1; j < name_tokens.size(); ++j) {
out_name += "_";
out_name += name_tokens[j];
}
std::string out_file = config_info["backend"] + "_" + out_name + ".txt";
benchmark::ResultManager::SaveFDTensor(outputs[i], out_file);
outputs[i].PrintInfo();
std::cout << "Saved: " << out_file << std::endl;
}
}
}

static void showInputInfos(int argc, char* argv[]) {
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return;
}
std::unordered_map<std::string, std::string> config_info;
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
&config_info);
std::string model_name, params_name, config_name;
auto model_format = fastdeploy::ModelFormat::PADDLE;
if (!UpdateModelResourceName(&model_name, &params_name, &config_name,
&model_format, config_info, false)) {
return;
}
auto model_file = FLAGS_model + sep + model_name;
auto params_file = FLAGS_model + sep + params_name;

option.SetModelPath(model_file, params_file, model_format);

// Init runtime
fastdeploy::Runtime runtime;
if (!runtime.Init(option)) {
std::cout << "Initial Runtime failed!" << std::endl;
}
// Show input tensor infos
auto input_infos = runtime.GetInputInfos();
for (int i = 0; i < input_infos.size(); ++i) {
std::cout << input_infos[i] << std::endl;
}
}

int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
#if defined(ENABLE_BENCHMARK)
if (FLAGS_diff) {
CheckTensorDiff(argc, argv);
return 0;
} else if (FLAGS_info) {
showInputInfos(argc, argv);
return 0;
} else {
RuntimeProfiling(argc, argv);
return 0;
}
#endif
return 0;
}
41 changes: 41 additions & 0 deletions fastdeploy/benchmark/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,47 @@ std::vector<std::vector<int32_t>> ResultManager::GetInputShapes(
return shapes;
}

std::vector<std::string> ResultManager::GetInputNames(
const std::string& raw_names) {
std::vector<std::string> names_tokens;
Split(raw_names, names_tokens, ':');
return names_tokens;
}

std::vector<std::string> ResultManager::SplitStr(const std::string& raw_str,
char delim) {
std::vector<std::string> str_tokens;
Split(raw_str, str_tokens, delim);
return str_tokens;
}

std::vector<FDDataType> ResultManager::GetInputDtypes(
const std::string& raw_dtypes) {
std::vector<FDDataType> dtypes;
std::vector<std::string> dtypes_tokens;
Split(raw_dtypes, dtypes_tokens, ':');
for (auto dtype : dtypes_tokens) {
if (dtype == "FP32") {
dtypes.push_back(FDDataType::FP32);
} else if (dtype == "INT32") {
dtypes.push_back(FDDataType::INT32);
} else if (dtype == "INT64") {
dtypes.push_back(FDDataType::INT64);
} else if (dtype == "INT8") {
dtypes.push_back(FDDataType::INT8);
} else if (dtype == "UINT8") {
dtypes.push_back(FDDataType::UINT8);
} else if (dtype == "FP16") {
dtypes.push_back(FDDataType::FP16);
} else if (dtype == "FP64") {
dtypes.push_back(FDDataType::FP64);
} else {
dtypes.push_back(FDDataType::FP32); // default
}
}
return dtypes;
}

#if defined(ENABLE_VISION)
bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
const std::string& path) {
Expand Down
8 changes: 8 additions & 0 deletions fastdeploy/benchmark/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,14 @@ struct FASTDEPLOY_DECL ResultManager {
/// Get Input Shapes
static std::vector<std::vector<int32_t>> GetInputShapes(
const std::string& raw_shapes);
/// Get Input Names
static std::vector<std::string> GetInputNames(
const std::string& raw_names);
/// Get Input Dtypes
static std::vector<FDDataType> GetInputDtypes(const std::string& raw_dtypes);
/// Split string
static std::vector<std::string> SplitStr(const std::string& raw_str,
char delim = ':');
#if defined(ENABLE_VISION)
/// Save & Load functions for basic results.
static bool SaveDetectionResult(const vision::DetectionResult& res,
Expand Down
5 changes: 3 additions & 2 deletions fastdeploy/runtime/backends/tensorrt/trt_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,9 @@ bool TrtBackend::BuildTrtEngine() {
context_.reset();
engine_.reset();
}

builder_->setMaxBatchSize(option_.max_batch_size);
if (option_.max_batch_size >= 1) {
builder_->setMaxBatchSize(option_.max_batch_size);
}
config->setMaxWorkspaceSize(option_.max_workspace_size);
auto profile = builder_->createOptimizationProfile();
for (const auto& item : shape_range_info_) {
Expand Down

0 comments on commit 7d1b706

Please sign in to comment.