fix box bug

syswyl · Oct 30, 2020 · 631e04b · 631e04b
1 parent e090f3b
commit 631e04b
Show file tree

Hide file tree

Showing 9 changed files with 83 additions and 35 deletions.
diff --git a/modules/class_yolo_detector.hpp b/modules/class_yolo_detector.hpp
@@ -57,10 +57,12 @@ class YoloDectector
 		{
 			auto curImage = vec_ds_images.at(i);
 			auto binfo = _p_net->decodeDetections(i, curImage.getImageHeight(), curImage.getImageWidth());
+			Timer timer;
 			auto remaining = nmsAllClasses(_p_net->getNMSThresh(),
 				binfo,
 				_p_net->getNumClasses(),
 				_vec_net_type[_config.net_type]);
+			timer.out("nms");
 			if (0 == remaining.size())
 			{
 				continue;

diff --git a/modules/ds_image.cpp b/modules/ds_image.cpp
@@ -58,6 +58,7 @@ DsImage::DsImage(const cv::Mat& mat_image_, const int& inputH, const int& inputW
 		assert(0);
 	}
 
+	m_OrigImage.copyTo(m_MarkedImage);
 	m_Height = m_OrigImage.rows;
 	m_Width = m_OrigImage.cols;
 
@@ -66,6 +67,7 @@ DsImage::DsImage(const cv::Mat& mat_image_, const int& inputH, const int& inputW
 	int resizeH = ((m_Height / dim) * inputH);
 	int resizeW = ((m_Width / dim) * inputW);
 	m_ScalingFactor = static_cast<float>(resizeH) / static_cast<float>(m_Height);
+	float	m_ScalingFactorw = static_cast<float>(resizeW) / static_cast<float>(m_Width);
 
 	// Additional checks for images with non even dims
 	if ((inputW - resizeW) % 2) resizeW--;
@@ -80,10 +82,13 @@ DsImage::DsImage(const cv::Mat& mat_image_, const int& inputH, const int& inputW
 	assert(2 * m_YOffset + resizeH == inputH);
 
 	// resizing
-	cv::resize(mat_image_, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR);
+	cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC);
+	// letterboxing
 	cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset,
 		m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));
-    cv::cvtColor(m_LetterboxImage, m_LetterboxImage,cv::COLOR_BGR2RGB);
+	cv::imwrite("letter.jpg", m_LetterboxImage);
+	// converting to RGB
+	cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB);
 }
 DsImage::DsImage(const std::string& path, const int& inputH, const int& inputW) :
     m_Height(0),
@@ -132,7 +137,7 @@ DsImage::DsImage(const std::string& path, const int& inputH, const int& inputW)
     assert(2 * m_YOffset + resizeH == inputH);
 
     // resizing
-    cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC);
+    cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC);
     // letterboxing
 	cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset,
 					   m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));

diff --git a/modules/trt_utils.cpp b/modules/trt_utils.cpp
@@ -113,21 +113,23 @@ BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const
 }
 
 void convertBBoxImgRes(const float scalingFactor,
-	const float& xOffset,
-	const float& yOffset,
+	const float xOffset,
+	const float yOffset,
     BBox& bbox)
 {
-    //// Undo Letterbox
+	    //// Undo Letterbox
     bbox.x1 -= xOffset;
     bbox.x2 -= xOffset;
     bbox.y1 -= yOffset;
     bbox.y2 -= yOffset;
-
-    //// Restore to input resolution
-    bbox.x1 /= scalingFactor;
-    bbox.x2 /= scalingFactor;
-    bbox.y1 /= scalingFactor;
-    bbox.y2 /= scalingFactor;
+//// Restore to input resolution
+	bbox.x1 /= scalingFactor;
+	bbox.x2 /= scalingFactor;
+	bbox.y1 /= scalingFactor;
+	bbox.y2 /= scalingFactor;
+	std::cout << "convertBBoxImgRes" << std::endl;
+
+
 }
 
 void printPredictions(const BBoxInfo& b, const std::string& className)

diff --git a/modules/trt_utils.h b/modules/trt_utils.h
@@ -140,8 +140,8 @@ bool fileExists(const std::string fileName, bool verbose = true);
 BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const float& bh,
                        const uint32_t& stride, const uint32_t& netW, const uint32_t& netH);
 void convertBBoxImgRes(const float scalingFactor,
-	const float& xOffset,
-	const float& yOffset,
+	const float xOffset,
+	const float yOffset,
 	BBox& bbox);
 void printPredictions(const BBoxInfo& info, const std::string& className);
 std::vector<std::string> loadListFromTextFile(const std::string filename);

diff --git a/modules/yolo.cpp b/modules/yolo.cpp
@@ -919,7 +919,7 @@ void Yolo::load_weights_v5(const std::string s_weights_path_,
 }
 void Yolo::doInference(const unsigned char* input, const uint32_t batchSize)
 {
-	//Timer timer;
+	Timer timer;
     assert(batchSize <= m_BatchSize && "Image batch size exceeds TRT engines batch size");
     NV_CUDA_CHECK(cudaMemcpyAsync(m_DeviceBuffers.at(m_InputBindingIndex), input,
                                   batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice,
@@ -933,19 +933,21 @@ void Yolo::doInference(const unsigned char* input, const uint32_t batchSize)
                                       cudaMemcpyDeviceToHost, m_CudaStream));
     }
     cudaStreamSynchronize(m_CudaStream);
-//	timer.out("inference");
+	timer.out("inference");
 }
 
 std::vector<BBoxInfo> Yolo::decodeDetections(const int& imageIdx,
 										     const int& imageH,
                                              const int& imageW)
 {
+	Timer timer;
     std::vector<BBoxInfo> binfo;
     for (auto& tensor : m_OutputTensors)
     {
         std::vector<BBoxInfo> curBInfo = decodeTensor(imageIdx, imageH, imageW, tensor);
         binfo.insert(binfo.end(), curBInfo.begin(), curBInfo.end());
     }
+	timer.out("decodeDetections");
     return binfo;
 }
 

diff --git a/modules/yolo.h b/modules/yolo.h
@@ -183,6 +183,23 @@ class Yolo
         binfo.push_back(bbi);
     }
 
+	void calcuate_letterbox_message(const int m_InputH, const int m_InputW,
+		const int imageH, const int imageW,
+		float &sh,float &sw,
+		int &xOffset,int &yOffset)
+	{
+		float dim = std::max(imageW, imageH);
+		int resizeH = ((imageH / dim) * m_InputH);
+		int resizeW = ((imageW / dim) * m_InputW);
+		sh = static_cast<float>(resizeH) / static_cast<float>(imageH);
+		sw = static_cast<float>(resizeW) / static_cast<float>(imageW);
+		if ((m_InputW - resizeW) % 2) resizeW--;
+		if ((m_InputH - resizeH) % 2) resizeH--;
+		assert((m_InputW - resizeW) % 2 == 0);
+		assert((m_InputH - resizeH) % 2 == 0);
+		xOffset = (m_InputW - resizeW) / 2;
+		 yOffset = (m_InputH - resizeH) / 2;
+	}
 	BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh,
 		const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH)
 	{
@@ -204,8 +221,27 @@ class Yolo
 
 		return b;
 	}
+
+	inline void cvt_box(const float sh,
+		const float sw,
+		const float xOffset,
+		const float yOffset,
+		BBox& bbox)
+	{
+		//// Undo Letterbox
+		bbox.x1 -= xOffset;
+		bbox.x2 -= xOffset;
+		bbox.y1 -= yOffset;
+		bbox.y2 -= yOffset;
+		//// Restore to input resolution
+		bbox.x1 /= sw;
+		bbox.x2 /= sw;
+		bbox.y1 /= sh;
+		bbox.y2 /= sh;
+	}
+
 	inline void add_bbox_proposal(const float bx, const float by, const float bw, const float bh,
-		const uint32_t stride_h_, const uint32_t stride_w_,const float scale,const float xoffset_,const float yoffset, const int maxIndex, const float maxProb,
+		const uint32_t stride_h_, const uint32_t stride_w_,const float scaleH, const float scaleW, const float xoffset_,const float yoffset, const int maxIndex, const float maxProb,
 		const uint32_t 	image_w, const uint32_t image_h,
 		std::vector<BBoxInfo>& binfo)
 	{
@@ -215,7 +251,7 @@ class Yolo
 		{
 			return;
 		}
-		convertBBoxImgRes(scale,xoffset_,yoffset, bbi.box);
+		cvt_box(scaleH,scaleW,xoffset_,yoffset, bbi.box);
 		bbi.label = maxIndex;
 		bbi.prob = maxProb;
 		bbi.classId = getClassId(maxIndex);

diff --git a/modules/yolov3.cpp b/modules/yolov3.cpp
@@ -33,10 +33,11 @@ std::vector<BBoxInfo> YoloV3::decodeTensor(const int imageIdx,
 										   const int imageW,
                                            const TensorInfo& tensor)
 {
-	float scalingFactor
-		= std::min(static_cast<float>(m_InputW) / imageW, static_cast<float>(m_InputH) / imageH);
-	float xOffset = (m_InputW - scalingFactor * imageW) / 2;
-	float yOffset = (m_InputH - scalingFactor * imageH) / 2;
+	float	scale_h = 1.f;
+	float	scale_w = 1.f;
+	int	xOffset = 0;
+	int yOffset = 0;
+	calcuate_letterbox_message(m_InputH, m_InputW, imageH, imageW, scale_h, scale_w, xOffset, yOffset);
 
     const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
 
@@ -84,7 +85,7 @@ std::vector<BBoxInfo> YoloV3::decodeTensor(const int imageIdx,
 
                 if (maxProb > m_ProbThresh)
                 {
-					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w,scalingFactor,xOffset,yOffset,maxIndex, maxProb, imageW, imageH, binfo);
+					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
                 }
             }
         }

diff --git a/modules/yolov4.cpp b/modules/yolov4.cpp
@@ -7,10 +7,11 @@ YoloV4::YoloV4(	const NetworkInfo &network_info_,
 
 std::vector<BBoxInfo> YoloV4::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor)
 {
-	float scalingFactor
-		= std::min(static_cast<float>(m_InputW) / imageW, static_cast<float>(m_InputH) / imageH);
-	float xOffset = (m_InputW - scalingFactor * imageW) / 2;
-	float yOffset = (m_InputH - scalingFactor * imageH) / 2;
+	float	scale_h = 1.f;
+	float	scale_w = 1.f;
+	int	xOffset = 0;
+	int yOffset = 0;
+	calcuate_letterbox_message(m_InputH, m_InputW, imageH, imageW, scale_h, scale_w, xOffset, yOffset);
 
 	const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
 
@@ -57,7 +58,7 @@ std::vector<BBoxInfo> YoloV4::decodeTensor(const int imageIdx, const int imageH,
 
 				if (maxProb > m_ProbThresh)
 				{
-					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scalingFactor, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
+					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
 				}
 			}
 		}

diff --git a/modules/yolov5.cpp b/modules/yolov5.cpp
@@ -12,12 +12,11 @@ YoloV5::YoloV5(
 
 std::vector<BBoxInfo> YoloV5::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor)
 {
-
-	float scalingFactor
-		= std::min(static_cast<float>(m_InputW) / imageW, static_cast<float>(m_InputH) / imageH);
-	float xOffset = (m_InputW - scalingFactor * imageW) / 2;
-	float yOffset = (m_InputH - scalingFactor * imageH) / 2;
-
+	float	scale_h = 1.f;
+	float	scale_w = 1.f;
+	int	xOffset = 0;
+	int yOffset = 0;
+	calcuate_letterbox_message(m_InputH, m_InputW, imageH, imageW, scale_h, scale_w, xOffset, yOffset);
 	const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume];
 
 	std::vector<BBoxInfo> binfo;
@@ -64,7 +63,7 @@ std::vector<BBoxInfo> YoloV5::decodeTensor(const int imageIdx, const int imageH,
 
 				if (maxProb > m_ProbThresh)
 				{
-					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scalingFactor, xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
+					add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w, scale_h, scale_w,xOffset, yOffset, maxIndex, maxProb, imageW, imageH, binfo);
 				}
 			}
 		}