Increase a few performance of yolov3 and change tab to space (Tencent…

…#767) * Fixed a yolov3 resolution bug * Set yolo defalut mean to 1.0 * Fix coding style and increase a few performance * Update mobilenet yolov3 benchmark param
try-android · Jan 25, 2019 · e6b1412 · e6b1412
1 parent 10b8ac6
commit e6b1412
Show file tree

Hide file tree

Showing 2 changed files with 133 additions and 130 deletions.
diff --git a/benchmark/mobilenet_yolov3.param b/benchmark/mobilenet_yolov3.param
@@ -1,5 +1,5 @@
 7767517
-74 76
+75 77
 Input            data             0 1 data 0=416 1=416 2=3
 Convolution      conv0            1 1 data conv0 0=32 1=3 2=1 3=2 4=1 5=1 6=864
 ReLU             conv0/relu       1 1 conv0 conv0_conv0/relu
@@ -56,21 +56,22 @@ ConvolutionDepthWise conv13/dw        1 1 conv12_conv12/relu conv13/dw 0=1024 1=
 ReLU             conv13/dw/relu   1 1 conv13/dw conv13/dw_conv13/dw/relu
 Convolution      conv13           1 1 conv13/dw_conv13/dw/relu conv13 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
 ReLU             conv13/relu      1 1 conv13 conv13_conv13/relu
-ConvolutionDepthWise conv16/dw        1 1 conv13_conv13/relu conv16/dw 0=1024 1=3 2=1 3=1 4=1 5=1 6=9216 7=1024
-ReLU             conv16/dw/relu   1 1 conv16/dw conv16/dw_conv16/dw/relu
-Convolution      conv16           1 1 conv16/dw_conv16/dw/relu conv16 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
-ReLU             conv16/relu      1 1 conv16 conv16_conv16/relu
-Split            splitncnn_1      1 2 conv16_conv16/relu conv16_conv16/relu_splitncnn_0 conv16_conv16/relu_splitncnn_1
-DeconvolutionDepthWise upsample         1 1 conv16_conv16/relu_splitncnn_1 upsample 0=512 1=4 2=1 3=2 4=1 5=1 6=16384 7=512
+ConvolutionDepthWise conv15/dw        1 1 conv13_conv13/relu conv15/dw 0=1024 1=3 2=1 3=1 4=1 5=1 6=9216 7=1024
+ReLU             conv15/dw/relu   1 1 conv15/dw conv15/dw_conv15/dw/relu
+Convolution      conv15           1 1 conv15/dw_conv15/dw/relu conv15 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
+ReLU             conv15/relu      1 1 conv15 conv15_conv15/relu
+Split            splitncnn_1      1 2 conv15_conv15/relu conv15_conv15/relu_splitncnn_0 conv15_conv15/relu_splitncnn_1
+DeconvolutionDepthWise upsample         1 1 conv15_conv15/relu_splitncnn_1 upsample 0=512 1=1 2=1 3=2 4=0 5=0 6=1024 7=512
+Pooling          maxpool          1 1 upsample maxpool 0=0 1=2 2=1 3=1 4=0
 ConvolutionDepthWise conv17/dw        1 1 conv11_conv11/relu_splitncnn_0 conv17/dw 0=512 1=3 2=1 3=1 4=1 5=1 6=4608 7=512
 ReLU             conv17/dw/relu   1 1 conv17/dw conv17/dw_conv17/dw/relu
 Convolution      conv17           1 1 conv17/dw_conv17/dw/relu conv17 0=512 1=1 2=1 3=1 4=0 5=1 6=262144
 ReLU             conv17/relu      1 1 conv17 conv17_conv17/relu
-Eltwise          conv17/sum       2 1 conv17_conv17/relu upsample conv17/sum 0=1 -23301=0
+Eltwise          conv17/sum       2 1 maxpool conv17_conv17/relu conv17/sum 0=1 -23301=0
 ConvolutionDepthWise conv18/dw        1 1 conv17/sum conv18/dw 0=512 1=3 2=1 3=1 4=1 5=1 6=4608 7=512
 ReLU             conv18/dw/relu   1 1 conv18/dw conv18/dw_conv18/dw/relu
-Convolution      conv18           1 1 conv18/dw_conv18/dw/relu conv18 0=1024 1=1 2=1 3=1 4=0 5=1 6=524288
-ReLU             conv18/relu      1 1 conv18 conv18_conv18/relu
-Convolution      conv20           1 1 conv16_conv16/relu_splitncnn_0 conv20 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
-Convolution      conv21           1 1 conv18_conv18/relu conv21 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
-Yolov3DetectionOutput detection_out    2 1 conv20 conv21 detection_out 0=20 1=3 2=0.30000 3=0.450000 -23304=12,10.000000,14.000000,23.000000,27.000000,37.000000,58.000000,81.000000,82.000000,135.000000,169.000000,344.000000,319.000000 -23305=6,3.000000,4.000000,5.000000,0.000000,1.000000,2.000000 -23306=2,32.000000,16.000000 7=16.000000
+Convolution      conv18_new       1 1 conv18/dw_conv18/dw/relu conv18_new 0=1024 1=1 2=1 3=1 4=0 5=1 6=524288
+ReLU             conv18_new/relu  1 1 conv18_new conv18_new_conv18_new/relu
+Convolution      conv19           1 1 conv15_conv15/relu_splitncnn_0 conv19 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
+Convolution      conv20           1 1 conv18_new_conv18_new/relu conv20 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
+Yolov3DetectionOutput detection_out    2 1 conv19 conv20 detection_out 0=20 1=3 2=0.30000 3=0.450000 -23304=12,10.000000,14.000000,23.000000,27.000000,37.000000,58.000000,81.000000,82.000000,135.000000,169.000000,344.000000,319.000000 -23305=6,3.000000,4.000000,5.000000,0.000000,1.000000,2.000000 -23306=2,32.000000,16.000000 7=2
diff --git a/src/layer/yolov3detectionoutput.cpp b/src/layer/yolov3detectionoutput.cpp
@@ -23,9 +23,9 @@ DEFINE_LAYER_CREATOR(Yolov3DetectionOutput)
 
 Yolov3DetectionOutput::Yolov3DetectionOutput()
 {
-	one_blob_only = false;
-	support_inplace = false;
-	
+    one_blob_only = false;
+    support_inplace = false;
+    
     //softmax = ncnn::create_layer(ncnn::LayerType::Softmax);
 
     // set param
@@ -47,8 +47,8 @@ int Yolov3DetectionOutput::load_param(const ParamDict& pd)
     confidence_threshold = pd.get(2, 0.01f);
     nms_threshold = pd.get(3, 0.45f);
     biases = pd.get(4, Mat());
-	mask = pd.get(5, Mat());
-	anchors_scale = pd.get(6, Mat());
+    mask = pd.get(5, Mat());
+    anchors_scale = pd.get(6, Mat());
     return 0;
 }
 
@@ -163,117 +163,119 @@ static inline float sigmoid(float x)
 
 int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
 {
-	// gather all box
-	std::vector<BBoxRect> all_bbox_rects;
-	std::vector<float> all_bbox_scores;
-
-	for (size_t b = 0; b < bottom_blobs.size(); b++)
-	{
-		std::vector< std::vector<BBoxRect> > all_box_bbox_rects;
-		std::vector< std::vector<float> > all_box_bbox_scores;
-		all_box_bbox_rects.resize(num_box);
-		all_box_bbox_scores.resize(num_box);
-		const Mat& bottom_top_blobs = bottom_blobs[b];
-
-		int w = bottom_top_blobs.w;
-		int h = bottom_top_blobs.h;
-		int channels = bottom_top_blobs.c;
-		//printf("%d %d %d\n", w, h, channels);
-		const int channels_per_box = channels / num_box;
-
-		// anchor coord + box score + num_class
-		if (channels_per_box != 4 + 1 + num_class)
-			return -1;
-		int mask_offset = b * num_box;
-		int net_w = (int)(anchors_scale[b] * w);
-		int net_h = (int)(anchors_scale[b] * h);
-		//printf("%d %d\n", net_w, net_h);
-
-		//printf("%d %d %d\n", w, h, channels);
+    // gather all box
+    std::vector<BBoxRect> all_bbox_rects;
+    std::vector<float> all_bbox_scores;
+
+    for (size_t b = 0; b < bottom_blobs.size(); b++)
+    {
+        std::vector< std::vector<BBoxRect> > all_box_bbox_rects;
+        std::vector< std::vector<float> > all_box_bbox_scores;
+        all_box_bbox_rects.resize(num_box);
+        all_box_bbox_scores.resize(num_box);
+        const Mat& bottom_top_blobs = bottom_blobs[b];
+
+        int w = bottom_top_blobs.w;
+        int h = bottom_top_blobs.h;
+        int channels = bottom_top_blobs.c;
+        //printf("%d %d %d\n", w, h, channels);
+        const int channels_per_box = channels / num_box;
+
+        // anchor coord + box score + num_class
+        if (channels_per_box != 4 + 1 + num_class)
+            return -1;
+        int mask_offset = b * num_box;
+        int net_w = (int)(anchors_scale[b] * w);
+        int net_h = (int)(anchors_scale[b] * h);
+        //printf("%d %d\n", net_w, net_h);
+
+        //printf("%d %d %d\n", w, h, channels);
 #pragma omp parallel for num_threads(opt.num_threads)
-		for (int pp = 0; pp < num_box; pp++)
-		{
-			int p = pp * channels_per_box;
-			int biases_index = mask[pp + mask_offset];
-			//printf("%d\n", biases_index);
-			const float bias_w = biases[biases_index * 2];
-			const float bias_h = biases[biases_index * 2 + 1];
-			//printf("%f %f\n", bias_w, bias_h);
-			const float* xptr = bottom_top_blobs.channel(p);
-			const float* yptr = bottom_top_blobs.channel(p + 1);
-			const float* wptr = bottom_top_blobs.channel(p + 2);
-			const float* hptr = bottom_top_blobs.channel(p + 3);
-
-			const float* box_score_ptr = bottom_top_blobs.channel(p + 4);
-
-			// softmax class scores
-			Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
-			//softmax->forward_inplace(scores, opt);
-
-			for (int i = 0; i < h; i++)
-			{
-				for (int j = 0; j < w; j++)
-				{
-					// region box
-					float bbox_cx = (j + sigmoid(xptr[0])) / w;
-					float bbox_cy = (i + sigmoid(yptr[0])) / h;
-					float bbox_w = exp(wptr[0]) * bias_w / net_w;
-					float bbox_h = exp(hptr[0]) * bias_h / net_h;
-
-					float bbox_xmin = bbox_cx - bbox_w * 0.5f;
-					float bbox_ymin = bbox_cy - bbox_h * 0.5f;
-					float bbox_xmax = bbox_cx + bbox_w * 0.5f;
-					float bbox_ymax = bbox_cy + bbox_h * 0.5f;
-
-					// box score
-					float box_score = sigmoid(box_score_ptr[0]);
-
-					// find class index with max class score
-					int class_index = 0;
-					float class_score = 0.f;
-					for (int q = 0; q < num_class; q++)
-					{
-						float score = sigmoid(scores.channel(q).row(i)[j]);
-						if (score > class_score)
-						{
-							class_index = q;
-							class_score = score;
-						}
-					}
-
-					//printf( "%d %f %f\n", class_index, box_score, class_score);
-
-					float confidence = box_score * class_score;
-					if (confidence >= confidence_threshold)
-					{
-						BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };
-						all_box_bbox_rects[pp].push_back(c);
-						all_box_bbox_scores[pp].push_back(confidence);
-					}
-
-					xptr++;
-					yptr++;
-					wptr++;
-					hptr++;
-
-					box_score_ptr++;
-				}
-			}
-		}
-
-
-
-		for (int i = 0; i < num_box; i++)
-		{
-			const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
-			const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];
-
-			all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
-			all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
-		}
-
-	}
-
+        for (int pp = 0; pp < num_box; pp++)
+        {
+            int p = pp * channels_per_box;
+            int biases_index = mask[pp + mask_offset];
+            //printf("%d\n", biases_index);
+            const float bias_w = biases[biases_index * 2];
+            const float bias_h = biases[biases_index * 2 + 1];
+            //printf("%f %f\n", bias_w, bias_h);
+            const float* xptr = bottom_top_blobs.channel(p);
+            const float* yptr = bottom_top_blobs.channel(p + 1);
+            const float* wptr = bottom_top_blobs.channel(p + 2);
+            const float* hptr = bottom_top_blobs.channel(p + 3);
+
+            const float* box_score_ptr = bottom_top_blobs.channel(p + 4);
+
+            // softmax class scores
+            Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
+            //softmax->forward_inplace(scores, opt);
+
+            for (int i = 0; i < h; i++)
+            {
+                for (int j = 0; j < w; j++)
+                {
+
+
+                    // box score
+                    float box_score = sigmoid(box_score_ptr[0]);
+
+                    // find class index with max class score
+                    int class_index = 0;
+                    float class_score = 0.f;
+                    for (int q = 0; q < num_class; q++)
+                    {
+                        float score = sigmoid(scores.channel(q).row(i)[j]);
+                        if (score > class_score)
+                        {
+                            class_index = q;
+                            class_score = score;
+                        }
+                    }
+
+                    //printf( "%d %f %f\n", class_index, box_score, class_score);
+
+                    float confidence = box_score * class_score;
+                    if (confidence >= confidence_threshold)
+                    {
+                                            // region box
+                        float bbox_cx = (j + sigmoid(xptr[0])) / w;
+                        float bbox_cy = (i + sigmoid(yptr[0])) / h;
+                        float bbox_w = exp(wptr[0]) * bias_w / net_w;
+                        float bbox_h = exp(hptr[0]) * bias_h / net_h;
+
+                        float bbox_xmin = bbox_cx - bbox_w * 0.5f;
+                        float bbox_ymin = bbox_cy - bbox_h * 0.5f;
+                        float bbox_xmax = bbox_cx + bbox_w * 0.5f;
+                        float bbox_ymax = bbox_cy + bbox_h * 0.5f;
+
+                        BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };
+                        all_box_bbox_rects[pp].push_back(c);
+                        all_box_bbox_scores[pp].push_back(confidence);
+                    }
+
+                    xptr++;
+                    yptr++;
+                    wptr++;
+                    hptr++;
+
+                    box_score_ptr++;
+                }
+            }
+        }
+
+
+
+        for (int i = 0; i < num_box; i++)
+        {
+            const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
+            const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];
+
+            all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
+            all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
+        }
+
+    }
+
 
     // global sort inplace
     qsort_descent_inplace(all_bbox_rects, all_bbox_scores);
@@ -295,8 +297,8 @@ int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::ve
 
     // fill result
     int num_detected = bbox_rects.size();
-	Mat& top_blob = top_blobs[0];
-	top_blob.create(6, num_detected, 4u, opt.blob_allocator);
+    Mat& top_blob = top_blobs[0];
+    top_blob.create(6, num_detected, 4u, opt.blob_allocator);
     if (top_blob.empty())
         return -100;