forked from linghu8812/tensorrt_inference
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
19d727a
commit fd95ad2
Showing
18 changed files
with
2,631 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ | |
*.pt | ||
*symbol.json | ||
*.params | ||
*.weights |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
cmake_minimum_required(VERSION 3.5) | ||
|
||
project(Yolov4_trt) | ||
|
||
set(CMAKE_CXX_STANDARD 14) | ||
|
||
# CUDA | ||
find_package(CUDA REQUIRED) | ||
message(STATUS "Find CUDA include at ${CUDA_INCLUDE_DIRS}") | ||
message(STATUS "Find CUDA libraries: ${CUDA_LIBRARIES}") | ||
|
||
# TensorRT | ||
set(TENSORRT_ROOT /usr/src/tensorrt/) | ||
find_path(TENSORRT_INCLUDE_DIR NvInfer.h | ||
HINTS ${TENSORRT_ROOT} PATH_SUFFIXES include/) | ||
message(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}") | ||
find_library(TENSORRT_LIBRARY_INFER nvinfer | ||
HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR} | ||
PATH_SUFFIXES lib lib64 lib/x64) | ||
find_library(TENSORRT_LIBRARY_ONNXPARSER nvonnxparser | ||
HINTS ${TENSORRT_ROOT} ${TENSORRT_BUILD} ${CUDA_TOOLKIT_ROOT_DIR} | ||
PATH_SUFFIXES lib lib64 lib/x64) | ||
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_ONNXPARSER}) | ||
message(STATUS "Find TensorRT libs: ${TENSORRT_LIBRARY}") | ||
|
||
# OpenCV | ||
find_package(OpenCV REQUIRED) | ||
message(STATUS "Find OpenCV include at ${OpenCV_INCLUDE_DIRS}") | ||
message(STATUS "Find OpenCV libraries: ${OpenCV_LIBRARIES}") | ||
|
||
set(COMMON_INCLUDE ../includes/common) | ||
set(YAML_INCLUDE ../includes/yaml-cpp/include) | ||
set(YAML_LIB_DIR ../includes/yaml-cpp/libs) | ||
|
||
include_directories(${CUDA_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${OpenCV_INCLUDE_DIRS} ${COMMON_INCLUDE} ${YAML_INCLUDE}) | ||
link_directories(${YAML_LIB_DIR}) | ||
|
||
add_executable(Yolov4_trt main.cpp Yolov4.cpp) | ||
target_link_libraries(Yolov4_trt ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES} ${TENSORRT_LIBRARY} yaml-cpp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# YOLOv4 Network darknet=>ONNX=>TensorRT | ||
|
||
## 1.Reference | ||
- **YOLOv4:** [YOLOv4: Optimal Speed and Accuracy of Object Detection](https://arxiv.org/abs/2004.10934) | ||
- **darknet:** [https://github.com/AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) | ||
- get YOLOv4 weights from here: [yolov4.weights](https://drive.google.com/open?id=1sWNozS0emz7bmQTUWDLvsubLGnCwUiIS) | ||
|
||
## 2.Export ONNX Model | ||
``` | ||
python3 export_onnx.py | ||
``` | ||
|
||
## 3.Build Yolov4_trt Project | ||
``` | ||
mkdir build && cd build | ||
cmake .. | ||
make -j | ||
``` | ||
|
||
## 4.Run Yolov4_trt | ||
``` | ||
./Yolov4_trt ../config.yaml ../samples | ||
``` | ||
|
||
## 5.Inference Time Benchmark | ||
resolution|darknet|TensorRT| | ||
---|---|--- | ||
416x416||4.2ms | ||
608x608||6.3ms | ||
|
||
## 6.Results | ||
 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,284 @@ | ||
#include "Yolov4.h" | ||
#include "yaml-cpp/yaml.h" | ||
#include "common.hpp" | ||
|
||
YOLOv4::YOLOv4(const std::string &config_file) { | ||
YAML::Node root = YAML::LoadFile(config_file); | ||
YAML::Node config = root["seresnext"]; | ||
onnx_file = config["onnx_file"].as<std::string>(); | ||
engine_file = config["engine_file"].as<std::string>(); | ||
labels_file = config["labels_file"].as<std::string>(); | ||
BATCH_SIZE = config["BATCH_SIZE"].as<int>(); | ||
INPUT_CHANNEL = config["INPUT_CHANNEL"].as<int>(); | ||
IMAGE_WIDTH = config["IMAGE_WIDTH"].as<int>(); | ||
IMAGE_HEIGHT = config["IMAGE_HEIGHT"].as<int>(); | ||
obj_threshold = config["obj_threshold"].as<float>(); | ||
nms_threshold = config["nms_threshold"].as<float>(); | ||
stride = config["stride"].as<std::vector<int>>(); | ||
anchors = config["anchors"].as<std::vector<std::vector<int>>>(); | ||
coco_labels = readCOCOLabel(labels_file); | ||
CATEGORY = coco_labels.size(); | ||
grids = { | ||
{3, int(IMAGE_WIDTH / stride[0]), int(IMAGE_HEIGHT / stride[0])}, | ||
{3, int(IMAGE_WIDTH / stride[1]), int(IMAGE_HEIGHT / stride[1])}, | ||
{3, int(IMAGE_WIDTH / stride[2]), int(IMAGE_HEIGHT / stride[2])}, | ||
}; | ||
class_colors.resize(CATEGORY); | ||
srand((int) time(nullptr)); | ||
for (cv::Scalar &class_color : class_colors) | ||
class_color = cv::Scalar(rand() % 255, rand() % 255, rand() % 255); | ||
} | ||
|
||
YOLOv4::~YOLOv4() = default; | ||
|
||
void YOLOv4::LoadEngine() { | ||
// create and load engine | ||
std::fstream existEngine; | ||
existEngine.open(engine_file, std::ios::in); | ||
if (existEngine) { | ||
readTrtFile(engine_file, engine); | ||
assert(engine != nullptr); | ||
} else { | ||
onnxToTRTModel(onnx_file, engine_file, engine, BATCH_SIZE); | ||
assert(engine != nullptr); | ||
} | ||
} | ||
|
||
bool YOLOv4::InferenceFolder(const std::string &folder_name) { | ||
std::vector<std::string> sample_images = readFolder(folder_name); | ||
//get context | ||
assert(engine != nullptr); | ||
context = engine->createExecutionContext(); | ||
assert(context != nullptr); | ||
|
||
//get buffers | ||
assert(engine->getNbBindings() == 2); | ||
void *buffers[2]; | ||
std::vector<int64_t> bufferSize; | ||
int nbBindings = engine->getNbBindings(); | ||
bufferSize.resize(nbBindings); | ||
|
||
for (int i = 0; i < nbBindings; ++i) { | ||
nvinfer1::Dims dims = engine->getBindingDimensions(i); | ||
nvinfer1::DataType dtype = engine->getBindingDataType(i); | ||
int64_t totalSize = volume(dims) * 1 * getElementSize(dtype); | ||
bufferSize[i] = totalSize; | ||
std::cout << "binding" << i << ": " << totalSize << std::endl; | ||
cudaMalloc(&buffers[i], totalSize); | ||
} | ||
|
||
//get stream | ||
cudaStream_t stream; | ||
cudaStreamCreate(&stream); | ||
|
||
int outSize = bufferSize[1] / sizeof(float) / BATCH_SIZE; | ||
|
||
EngineInference(sample_images, outSize, buffers, bufferSize, stream); | ||
|
||
// release the stream and the buffers | ||
cudaStreamDestroy(stream); | ||
cudaFree(buffers[0]); | ||
cudaFree(buffers[1]); | ||
|
||
// destroy the engine | ||
context->destroy(); | ||
engine->destroy(); | ||
} | ||
|
||
void YOLOv4::EngineInference(const std::vector<std::string> &image_list, const int &outSize, void **buffers, | ||
const std::vector<int64_t> &bufferSize, cudaStream_t stream) { | ||
int index = 0; | ||
int batch_id = 0; | ||
std::vector<cv::Mat> vec_Mat(BATCH_SIZE); | ||
std::vector<std::string> vec_name(BATCH_SIZE); | ||
float total_time = 0; | ||
for (const std::string &image_name : image_list) | ||
{ | ||
index++; | ||
std::cout << "Processing: " << image_name << std::endl; | ||
cv::Mat src_img = cv::imread(image_name); | ||
if (src_img.data) | ||
{ | ||
cv::cvtColor(src_img, src_img, cv::COLOR_BGR2RGB); | ||
vec_Mat[batch_id] = src_img.clone(); | ||
vec_name[batch_id] = image_name; | ||
batch_id++; | ||
} | ||
if (batch_id == BATCH_SIZE or index == image_list.size()) | ||
{ | ||
auto t_start_pre = std::chrono::high_resolution_clock::now(); | ||
std::cout << "prepareImage" << std::endl; | ||
std::vector<float>curInput = prepareImage(vec_Mat); | ||
auto t_end_pre = std::chrono::high_resolution_clock::now(); | ||
float total_pre = std::chrono::duration<float, std::milli>(t_end_pre - t_start_pre).count(); | ||
std::cout << "prepare image take: " << total_pre << " ms." << std::endl; | ||
total_time += total_pre; | ||
batch_id = 0; | ||
if (!curInput.data()) { | ||
std::cout << "prepare images ERROR!" << std::endl; | ||
continue; | ||
} | ||
// DMA the input to the GPU, execute the batch asynchronously, and DMA it back: | ||
std::cout << "host2device" << std::endl; | ||
cudaMemcpyAsync(buffers[0], curInput.data(), bufferSize[0], cudaMemcpyHostToDevice, stream); | ||
|
||
// do inference | ||
std::cout << "execute" << std::endl; | ||
auto t_start = std::chrono::high_resolution_clock::now(); | ||
context->execute(BATCH_SIZE, buffers); | ||
auto t_end = std::chrono::high_resolution_clock::now(); | ||
float total_inf = std::chrono::duration<float, std::milli>(t_end - t_start).count(); | ||
std::cout << "Inference take: " << total_inf << " ms." << std::endl; | ||
total_time += total_inf; | ||
std::cout << "execute success" << std::endl; | ||
std::cout << "device2host" << std::endl; | ||
std::cout << "post process" << std::endl; | ||
auto r_start = std::chrono::high_resolution_clock::now(); | ||
// float out[outSize * BATCH_SIZE * 2]; | ||
float *out = new float[outSize * BATCH_SIZE]; | ||
cudaMemcpyAsync(out, buffers[1], bufferSize[1], cudaMemcpyDeviceToHost, stream); | ||
cudaStreamSynchronize(stream); | ||
auto boxes = postProcess(vec_Mat, out, outSize); | ||
auto r_end = std::chrono::high_resolution_clock::now(); | ||
float total_res = std::chrono::duration<float, std::milli>(r_end - r_start).count(); | ||
std::cout << "Post process take: " << total_res << " ms." << std::endl; | ||
total_time += total_res; | ||
for (int i = 0; i < (int)vec_Mat.size(); i++) | ||
{ | ||
auto org_img = vec_Mat[i]; | ||
auto rects = boxes[i]; | ||
cv::cvtColor(org_img, org_img, cv::COLOR_BGR2RGB); | ||
for(const auto &rect : rects) | ||
{ | ||
char t[256]; | ||
sprintf(t, "%.2f", rect.prob); | ||
std::string name = coco_labels[rect.classes] + "-" + t; | ||
cv::putText(org_img, name, cv::Point(rect.x - rect.w / 2, rect.y - rect.h / 2 - 5), cv::FONT_HERSHEY_COMPLEX, 0.7, class_colors[rect.classes], 2); | ||
cv::Rect rst(rect.x - rect.w / 2, rect.y - rect.h / 2, rect.w, rect.h); | ||
cv::rectangle(org_img, rst, class_colors[rect.classes], 2, cv::LINE_8, 0); | ||
} | ||
int pos = vec_name[i].find_last_of("."); | ||
std::string rst_name = vec_name[i].insert(pos, "_"); | ||
std::cout << rst_name << std::endl; | ||
cv::imwrite(rst_name, org_img); | ||
} | ||
vec_Mat = std::vector<cv::Mat>(BATCH_SIZE); | ||
delete[] out; | ||
} | ||
} | ||
std::cout << "Average processing time is " << total_time / image_list.size() << "ms" << std::endl; | ||
} | ||
|
||
std::vector<float> YOLOv4::prepareImage(std::vector<cv::Mat> &vec_img) { | ||
std::vector<float> result(BATCH_SIZE * IMAGE_WIDTH * IMAGE_HEIGHT * INPUT_CHANNEL); | ||
float *data = result.data(); | ||
for (const cv::Mat &src_img : vec_img) | ||
{ | ||
if (!src_img.data) | ||
continue; | ||
cv::Mat flt_img; | ||
cv::resize(src_img, flt_img, cv::Size(IMAGE_WIDTH, IMAGE_HEIGHT)); | ||
flt_img.convertTo(flt_img, CV_32FC3, 1.0 / 255); | ||
|
||
//HWC TO CHW | ||
std::vector<cv::Mat> split_img(INPUT_CHANNEL); | ||
cv::split(flt_img, split_img); | ||
|
||
int channelLength = IMAGE_WIDTH * IMAGE_HEIGHT; | ||
for (int i = 0; i < INPUT_CHANNEL; ++i) | ||
{ | ||
memcpy(data, split_img[i].data, channelLength * sizeof(float)); | ||
data += channelLength; | ||
} | ||
} | ||
return result; | ||
} | ||
|
||
std::vector<std::vector<YOLOv4::DetectRes>> YOLOv4::postProcess(const std::vector<cv::Mat> &vec_Mat, float *output, | ||
const int &outSize) { | ||
std::vector<std::vector<DetectRes>> vec_result; | ||
int index = 0; | ||
for (const cv::Mat &src_img : vec_Mat) | ||
{ | ||
std::vector<DetectRes> result; | ||
float *out = output + index * outSize; | ||
int position = 0; | ||
for (int n = 0; n < (int)grids.size(); n++) | ||
{ | ||
for (int c = 0; c < grids[n][0]; c++) | ||
{ | ||
std::vector<int> anchor = anchors[n * grids[n][0] + c]; | ||
for (int h = 0; h < grids[n][1]; h++) | ||
for (int w = 0; w < grids[n][2]; w++) | ||
{ | ||
float *row = out + position * (CATEGORY + 5); | ||
position++; | ||
DetectRes box; | ||
auto max_pos = std::max_element(row + 5, row + CATEGORY + 5); | ||
box.prob = sigmoid(row[4]) * sigmoid(row[max_pos - row]); | ||
if (box.prob < obj_threshold) | ||
continue; | ||
box.classes = max_pos - row - 5; | ||
box.x = (sigmoid(row[0]) + w) / grids[n][1] * src_img.cols; | ||
box.y = (sigmoid(row[1]) + h) / grids[n][2] * src_img.rows; | ||
box.w = exponential(row[2]) * anchor[0] / IMAGE_WIDTH * src_img.cols; | ||
box.h = exponential(row[3]) * anchor[1] / IMAGE_HEIGHT * src_img.rows; | ||
result.push_back(box); | ||
} | ||
} | ||
} | ||
NmsDetect(result); | ||
vec_result.push_back(result); | ||
index++; | ||
} | ||
return vec_result; | ||
} | ||
|
||
void YOLOv4::NmsDetect(std::vector<DetectRes> &detections) { | ||
sort(detections.begin(), detections.end(), [=](const DetectRes &left, const DetectRes &right) { | ||
return left.prob > right.prob; | ||
}); | ||
|
||
for (int i = 0; i < (int)detections.size(); i++) | ||
for (int j = i + 1; j < (int)detections.size(); j++) | ||
{ | ||
if (detections[i].classes == detections[j].classes) | ||
{ | ||
float iou = IOUCalculate(detections[i], detections[j]); | ||
if (iou > nms_threshold) | ||
detections[j].prob = 0; | ||
} | ||
} | ||
|
||
detections.erase(std::remove_if(detections.begin(), detections.end(), [](const DetectRes &det) | ||
{ return det.prob == 0; }), detections.end()); | ||
} | ||
|
||
float YOLOv4::IOUCalculate(const YOLOv4::DetectRes &det_a, const YOLOv4::DetectRes &det_b) { | ||
cv::Point2f center_a(det_a.x + det_a.w / 2, det_a.y + det_a.h / 2); | ||
cv::Point2f center_b(det_b.x + det_b.w / 2, det_b.y + det_b.h / 2); | ||
cv::Point2f left_up(std::min(det_a.x, det_b.x),std::min(det_a.y, det_b.y)); | ||
cv::Point2f right_down(std::max(det_a.x + det_a.w, det_b.x + det_b.w),std::max(det_a.y + det_a.h, det_b.y + det_b.h)); | ||
float distance_d = (center_a - center_b).x * (center_a - center_b).x + (center_a - center_b).y * (center_a - center_b).y; | ||
float distance_c = (left_up - right_down).x * (left_up - right_down).x + (left_up - right_down).y * (left_up - right_down).y; | ||
float inter_l = det_a.x > det_b.x ? det_a.x : det_b.x; | ||
float inter_t = det_a.y > det_b.y ? det_a.y : det_b.y; | ||
float inter_r = det_a.x + det_a.w < det_b.x + det_b.w ? det_a.x + det_a.w : det_b.x + det_b.w; | ||
float inter_b = det_a.y + det_a.h < det_b.y + det_b.h ? det_a.y + det_a.h : det_b.y + det_b.h; | ||
if (inter_b < inter_t || inter_r < inter_l) | ||
return 0; | ||
float inter_area = (inter_b - inter_t) * (inter_r - inter_l); | ||
float union_area = det_a.w * det_a.h + det_b.w * det_b.h - inter_area; | ||
if (union_area == 0) | ||
return 0; | ||
else | ||
return inter_area / union_area - distance_d / distance_c; | ||
} | ||
|
||
float YOLOv4::sigmoid(float in){ | ||
return 1.f / (1.f + exp(-in)); | ||
} | ||
|
||
float YOLOv4::exponential(float in){ | ||
return exp(in); | ||
} |
Oops, something went wrong.