Skip to content

Commit

Permalink
Increase a few performance of yolov3 and change tab to space (Tencent…
Browse files Browse the repository at this point in the history
…#767)

* Fixed a yolov3 resolution bug

* Set yolo defalut mean to 1.0

* Fix coding style and increase a few performance

* Update mobilenet yolov3 benchmark param
  • Loading branch information
eric612 authored and nihui committed Jan 25, 2019
1 parent 10b8ac6 commit e6b1412
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 130 deletions.
27 changes: 14 additions & 13 deletions benchmark/mobilenet_yolov3.param
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
7767517
74 76
75 77
Input data 0 1 data 0=416 1=416 2=3
Convolution conv0 1 1 data conv0 0=32 1=3 2=1 3=2 4=1 5=1 6=864
ReLU conv0/relu 1 1 conv0 conv0_conv0/relu
Expand Down Expand Up @@ -56,21 +56,22 @@ ConvolutionDepthWise conv13/dw 1 1 conv12_conv12/relu conv13/dw 0=1024 1=
ReLU conv13/dw/relu 1 1 conv13/dw conv13/dw_conv13/dw/relu
Convolution conv13 1 1 conv13/dw_conv13/dw/relu conv13 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
ReLU conv13/relu 1 1 conv13 conv13_conv13/relu
ConvolutionDepthWise conv16/dw 1 1 conv13_conv13/relu conv16/dw 0=1024 1=3 2=1 3=1 4=1 5=1 6=9216 7=1024
ReLU conv16/dw/relu 1 1 conv16/dw conv16/dw_conv16/dw/relu
Convolution conv16 1 1 conv16/dw_conv16/dw/relu conv16 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
ReLU conv16/relu 1 1 conv16 conv16_conv16/relu
Split splitncnn_1 1 2 conv16_conv16/relu conv16_conv16/relu_splitncnn_0 conv16_conv16/relu_splitncnn_1
DeconvolutionDepthWise upsample 1 1 conv16_conv16/relu_splitncnn_1 upsample 0=512 1=4 2=1 3=2 4=1 5=1 6=16384 7=512
ConvolutionDepthWise conv15/dw 1 1 conv13_conv13/relu conv15/dw 0=1024 1=3 2=1 3=1 4=1 5=1 6=9216 7=1024
ReLU conv15/dw/relu 1 1 conv15/dw conv15/dw_conv15/dw/relu
Convolution conv15 1 1 conv15/dw_conv15/dw/relu conv15 0=1024 1=1 2=1 3=1 4=0 5=1 6=1048576
ReLU conv15/relu 1 1 conv15 conv15_conv15/relu
Split splitncnn_1 1 2 conv15_conv15/relu conv15_conv15/relu_splitncnn_0 conv15_conv15/relu_splitncnn_1
DeconvolutionDepthWise upsample 1 1 conv15_conv15/relu_splitncnn_1 upsample 0=512 1=1 2=1 3=2 4=0 5=0 6=1024 7=512
Pooling maxpool 1 1 upsample maxpool 0=0 1=2 2=1 3=1 4=0
ConvolutionDepthWise conv17/dw 1 1 conv11_conv11/relu_splitncnn_0 conv17/dw 0=512 1=3 2=1 3=1 4=1 5=1 6=4608 7=512
ReLU conv17/dw/relu 1 1 conv17/dw conv17/dw_conv17/dw/relu
Convolution conv17 1 1 conv17/dw_conv17/dw/relu conv17 0=512 1=1 2=1 3=1 4=0 5=1 6=262144
ReLU conv17/relu 1 1 conv17 conv17_conv17/relu
Eltwise conv17/sum 2 1 conv17_conv17/relu upsample conv17/sum 0=1 -23301=0
Eltwise conv17/sum 2 1 maxpool conv17_conv17/relu conv17/sum 0=1 -23301=0
ConvolutionDepthWise conv18/dw 1 1 conv17/sum conv18/dw 0=512 1=3 2=1 3=1 4=1 5=1 6=4608 7=512
ReLU conv18/dw/relu 1 1 conv18/dw conv18/dw_conv18/dw/relu
Convolution conv18 1 1 conv18/dw_conv18/dw/relu conv18 0=1024 1=1 2=1 3=1 4=0 5=1 6=524288
ReLU conv18/relu 1 1 conv18 conv18_conv18/relu
Convolution conv20 1 1 conv16_conv16/relu_splitncnn_0 conv20 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
Convolution conv21 1 1 conv18_conv18/relu conv21 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
Yolov3DetectionOutput detection_out 2 1 conv20 conv21 detection_out 0=20 1=3 2=0.30000 3=0.450000 -23304=12,10.000000,14.000000,23.000000,27.000000,37.000000,58.000000,81.000000,82.000000,135.000000,169.000000,344.000000,319.000000 -23305=6,3.000000,4.000000,5.000000,0.000000,1.000000,2.000000 -23306=2,32.000000,16.000000 7=16.000000
Convolution conv18_new 1 1 conv18/dw_conv18/dw/relu conv18_new 0=1024 1=1 2=1 3=1 4=0 5=1 6=524288
ReLU conv18_new/relu 1 1 conv18_new conv18_new_conv18_new/relu
Convolution conv19 1 1 conv15_conv15/relu_splitncnn_0 conv19 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
Convolution conv20 1 1 conv18_new_conv18_new/relu conv20 0=75 1=1 2=1 3=1 4=0 5=1 6=76800
Yolov3DetectionOutput detection_out 2 1 conv19 conv20 detection_out 0=20 1=3 2=0.30000 3=0.450000 -23304=12,10.000000,14.000000,23.000000,27.000000,37.000000,58.000000,81.000000,82.000000,135.000000,169.000000,344.000000,319.000000 -23305=6,3.000000,4.000000,5.000000,0.000000,1.000000,2.000000 -23306=2,32.000000,16.000000 7=2
236 changes: 119 additions & 117 deletions src/layer/yolov3detectionoutput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ DEFINE_LAYER_CREATOR(Yolov3DetectionOutput)

Yolov3DetectionOutput::Yolov3DetectionOutput()
{
one_blob_only = false;
support_inplace = false;
one_blob_only = false;
support_inplace = false;
//softmax = ncnn::create_layer(ncnn::LayerType::Softmax);

// set param
Expand All @@ -47,8 +47,8 @@ int Yolov3DetectionOutput::load_param(const ParamDict& pd)
confidence_threshold = pd.get(2, 0.01f);
nms_threshold = pd.get(3, 0.45f);
biases = pd.get(4, Mat());
mask = pd.get(5, Mat());
anchors_scale = pd.get(6, Mat());
mask = pd.get(5, Mat());
anchors_scale = pd.get(6, Mat());
return 0;
}

Expand Down Expand Up @@ -163,117 +163,119 @@ static inline float sigmoid(float x)

int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
{
// gather all box
std::vector<BBoxRect> all_bbox_rects;
std::vector<float> all_bbox_scores;

for (size_t b = 0; b < bottom_blobs.size(); b++)
{
std::vector< std::vector<BBoxRect> > all_box_bbox_rects;
std::vector< std::vector<float> > all_box_bbox_scores;
all_box_bbox_rects.resize(num_box);
all_box_bbox_scores.resize(num_box);
const Mat& bottom_top_blobs = bottom_blobs[b];

int w = bottom_top_blobs.w;
int h = bottom_top_blobs.h;
int channels = bottom_top_blobs.c;
//printf("%d %d %d\n", w, h, channels);
const int channels_per_box = channels / num_box;

// anchor coord + box score + num_class
if (channels_per_box != 4 + 1 + num_class)
return -1;
int mask_offset = b * num_box;
int net_w = (int)(anchors_scale[b] * w);
int net_h = (int)(anchors_scale[b] * h);
//printf("%d %d\n", net_w, net_h);

//printf("%d %d %d\n", w, h, channels);
// gather all box
std::vector<BBoxRect> all_bbox_rects;
std::vector<float> all_bbox_scores;

for (size_t b = 0; b < bottom_blobs.size(); b++)
{
std::vector< std::vector<BBoxRect> > all_box_bbox_rects;
std::vector< std::vector<float> > all_box_bbox_scores;
all_box_bbox_rects.resize(num_box);
all_box_bbox_scores.resize(num_box);
const Mat& bottom_top_blobs = bottom_blobs[b];

int w = bottom_top_blobs.w;
int h = bottom_top_blobs.h;
int channels = bottom_top_blobs.c;
//printf("%d %d %d\n", w, h, channels);
const int channels_per_box = channels / num_box;

// anchor coord + box score + num_class
if (channels_per_box != 4 + 1 + num_class)
return -1;
int mask_offset = b * num_box;
int net_w = (int)(anchors_scale[b] * w);
int net_h = (int)(anchors_scale[b] * h);
//printf("%d %d\n", net_w, net_h);

//printf("%d %d %d\n", w, h, channels);
#pragma omp parallel for num_threads(opt.num_threads)
for (int pp = 0; pp < num_box; pp++)
{
int p = pp * channels_per_box;
int biases_index = mask[pp + mask_offset];
//printf("%d\n", biases_index);
const float bias_w = biases[biases_index * 2];
const float bias_h = biases[biases_index * 2 + 1];
//printf("%f %f\n", bias_w, bias_h);
const float* xptr = bottom_top_blobs.channel(p);
const float* yptr = bottom_top_blobs.channel(p + 1);
const float* wptr = bottom_top_blobs.channel(p + 2);
const float* hptr = bottom_top_blobs.channel(p + 3);

const float* box_score_ptr = bottom_top_blobs.channel(p + 4);

// softmax class scores
Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
//softmax->forward_inplace(scores, opt);

for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{
// region box
float bbox_cx = (j + sigmoid(xptr[0])) / w;
float bbox_cy = (i + sigmoid(yptr[0])) / h;
float bbox_w = exp(wptr[0]) * bias_w / net_w;
float bbox_h = exp(hptr[0]) * bias_h / net_h;

float bbox_xmin = bbox_cx - bbox_w * 0.5f;
float bbox_ymin = bbox_cy - bbox_h * 0.5f;
float bbox_xmax = bbox_cx + bbox_w * 0.5f;
float bbox_ymax = bbox_cy + bbox_h * 0.5f;

// box score
float box_score = sigmoid(box_score_ptr[0]);

// find class index with max class score
int class_index = 0;
float class_score = 0.f;
for (int q = 0; q < num_class; q++)
{
float score = sigmoid(scores.channel(q).row(i)[j]);
if (score > class_score)
{
class_index = q;
class_score = score;
}
}

//printf( "%d %f %f\n", class_index, box_score, class_score);

float confidence = box_score * class_score;
if (confidence >= confidence_threshold)
{
BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };
all_box_bbox_rects[pp].push_back(c);
all_box_bbox_scores[pp].push_back(confidence);
}

xptr++;
yptr++;
wptr++;
hptr++;

box_score_ptr++;
}
}
}



for (int i = 0; i < num_box; i++)
{
const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];

all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
}

}

for (int pp = 0; pp < num_box; pp++)
{
int p = pp * channels_per_box;
int biases_index = mask[pp + mask_offset];
//printf("%d\n", biases_index);
const float bias_w = biases[biases_index * 2];
const float bias_h = biases[biases_index * 2 + 1];
//printf("%f %f\n", bias_w, bias_h);
const float* xptr = bottom_top_blobs.channel(p);
const float* yptr = bottom_top_blobs.channel(p + 1);
const float* wptr = bottom_top_blobs.channel(p + 2);
const float* hptr = bottom_top_blobs.channel(p + 3);

const float* box_score_ptr = bottom_top_blobs.channel(p + 4);

// softmax class scores
Mat scores = bottom_top_blobs.channel_range(p + 5, num_class);
//softmax->forward_inplace(scores, opt);

for (int i = 0; i < h; i++)
{
for (int j = 0; j < w; j++)
{


// box score
float box_score = sigmoid(box_score_ptr[0]);

// find class index with max class score
int class_index = 0;
float class_score = 0.f;
for (int q = 0; q < num_class; q++)
{
float score = sigmoid(scores.channel(q).row(i)[j]);
if (score > class_score)
{
class_index = q;
class_score = score;
}
}

//printf( "%d %f %f\n", class_index, box_score, class_score);

float confidence = box_score * class_score;
if (confidence >= confidence_threshold)
{
// region box
float bbox_cx = (j + sigmoid(xptr[0])) / w;
float bbox_cy = (i + sigmoid(yptr[0])) / h;
float bbox_w = exp(wptr[0]) * bias_w / net_w;
float bbox_h = exp(hptr[0]) * bias_h / net_h;

float bbox_xmin = bbox_cx - bbox_w * 0.5f;
float bbox_ymin = bbox_cy - bbox_h * 0.5f;
float bbox_xmax = bbox_cx + bbox_w * 0.5f;
float bbox_ymax = bbox_cy + bbox_h * 0.5f;

BBoxRect c = { bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax, class_index };
all_box_bbox_rects[pp].push_back(c);
all_box_bbox_scores[pp].push_back(confidence);
}

xptr++;
yptr++;
wptr++;
hptr++;

box_score_ptr++;
}
}
}



for (int i = 0; i < num_box; i++)
{
const std::vector<BBoxRect>& box_bbox_rects = all_box_bbox_rects[i];
const std::vector<float>& box_bbox_scores = all_box_bbox_scores[i];

all_bbox_rects.insert(all_bbox_rects.end(), box_bbox_rects.begin(), box_bbox_rects.end());
all_bbox_scores.insert(all_bbox_scores.end(), box_bbox_scores.begin(), box_bbox_scores.end());
}

}


// global sort inplace
qsort_descent_inplace(all_bbox_rects, all_bbox_scores);
Expand All @@ -295,8 +297,8 @@ int Yolov3DetectionOutput::forward(const std::vector<Mat>& bottom_blobs, std::ve

// fill result
int num_detected = bbox_rects.size();
Mat& top_blob = top_blobs[0];
top_blob.create(6, num_detected, 4u, opt.blob_allocator);
Mat& top_blob = top_blobs[0];
top_blob.create(6, num_detected, 4u, opt.blob_allocator);
if (top_blob.empty())
return -100;

Expand Down

0 comments on commit e6b1412

Please sign in to comment.