FastestDet: it has higher accuracy and faster speed than Yolo-fastest
# -*- coding: UTF-8 -*-
# @Project :group_first
# @File
# @IDE :PyCharm
# @Author :[email protected]
# @Date :2024/5/7 15:55
import json
import random
from PIL import ImageDraw, Image
import os
data_path = './data/annotations.json'
train_path = './data/train'
test_path = './data/test'
def save_json(save_path,data):
assert save_path.split('.')[-1] == 'json'
with open(save_path,'w') as file:
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] + box[2] / 2.0
y = box[1] + box[3] / 2.0
w = box[2]
h = box[3]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def print_progress_bar(current, total, prefix='', suffix='', decimals=1, length=50, fill='█', print_end="\r"):
current - 当前的进度值(Int)
total - 总进度值(Int)
prefix - 前缀字符串(Str)
suffix - 后缀字符串(Str)
decimals - 百分比精度(Int)
length - 进度条长度(Int)
fill - 填充字符(Str)
print_end - 结束打印字符(Str)
percent = ("{0:." + str(decimals) + "f}").format(100 * (current / float(total)))
filled_length = int(length * current // total)
bar = fill * filled_length + '-' * (length - filled_length)
print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end)
# 当进度条完成时换行
if current == total:
def normalize_bbox(bbox, image_width, image_height):
将边界框归一化到 [0, 1] 的范围。
bbox (list or tuple): 边界框的坐标,格式为 [x_min, y_min, x_max, y_max]。
image_width (int or float): 图像的宽度。
image_height (int or float): 图像的高度。
list: 归一化后的边界框坐标。
x_min, y_min, x_max, y_max = bbox
x_min_normalized = x_min / image_width
y_min_normalized = y_min / image_height
x_max_normalized = x_max / image_width
y_max_normalized = y_max / image_height
return [x_min_normalized, y_min_normalized, x_max_normalized, y_max_normalized]
# 获取训练图片图片id
trains_id_ = os.listdir(train_path)
trains_id = [] # 图像id
for train_id in trains_id_:
# 获取测试图片图片id
tests_id_ = os.listdir(test_path)
tests_id = [] # 图像id
for test_id in tests_id_:
# 获取标签
f = open(data_path, 'r')
content =
annotations = json.loads(content)
# lable_name = []
with open('./dataset/trc.txt','r',encoding='utf-8') as f:
lable_name = [line.strip() for line in f.readlines()]
for key,value in enumerate(lable_name):
width, height = 320,240
owidth,oheight = 2048,2048
def preprocessing(s_id,is_train):
if is_train is True:
open_path = train_path
save_path = "./dataset/train"
open_path = test_path
save_path = "./dataset/val"
sum_id = s_id
for id in s_id:
print_progress_bar(len(sum_id), len(s_id), prefix='Progress:', suffix='Complete', length=50)
# print(f'处理:{id}中。。。')
target_dir = annotations['imgs'][f'{id}']
path = target_dir['path']
path = path.split('.')[0]
object = target_dir['objects']
is_save = False
encode = 0
for obj in object:
encode +=1
# if object[0]['category'] not in lable_name:
# lable_name.append(object[0]['category'])
xmin = obj['bbox']['xmin']
ymin = obj['bbox']['ymin']
# 右下点
ymax = obj['bbox']['ymax']
xmax = obj['bbox']['xmax']
# print('原坐标:',xmin, ymin, xmax, ymax)
# 计算中心点
x_center = (xmin + xmax) / 2 + random.randint(-100, 100)
y_center = (ymin + ymax) / 2 + random.randint(-100, 100)
# print('原标签框位置:',xmin,ymax,xmax,ymin)
# 计算图像边缘
img_xmin = x_center - 160
img_xmax = x_center + 160
img_ymin = y_center - 120
img_ymax = y_center + 120
new_xmin = abs(xmin -img_xmin)
new_ymin = abs(ymin -img_ymin)
new_xmax = abs(new_xmin + (xmax - xmin))
new_ymax = abs(new_ymin + (ymax - ymin))
index = lable_name.index(obj['category'])
boxx = [new_xmin, new_ymin, new_xmax, new_ymax]
size = (width, height)
# x, y, w, h = convert(size, boxx)
x, y, w, h = normalize_bbox(boxx, size[0], size[1])
# print('归一化:',x, y, w, h)
xmin = str(x)
ymin = str(y)
xmax = str(w)
ymax = str(h)
content = str(index + 1) + ' ' + xmin + ' ' + ymin + ' ' + xmax + ' ' + ymax
with open(f"{save_path}/{id}_{encode}.txt", 'a') as f:
f.write(content + '\n')
except ValueError:
img =, f'{id}.jpg'))
cropped_img = img.crop((img_xmin, img_ymin, img_xmax, img_ymax))
#'./dataset/{open_path}/{train_id}.jpg'), f"{id}_{encode}.jpg"))
is_save = True
if is_save is True:
# todo:写入文件路径
if is_train is True:
with open(f'./dataset/train.txt', 'a', encoding='utf-8') as f:
f.write(path+ f"_{encode}.jpg" + '\n')
with open(f'./dataset/val.txt', 'a', encoding='utf-8') as f:
f.write(path+ f"_{encode}.jpg" + '\n')
except FileNotFoundError:
print(f'{open_path}\{id}.jpg 不存在')
if __name__ == '__main__':
for id,is_train in (trains_id,True),(tests_id,False):
# print(len(lable_name))
# with open(".\dataset\data.txt", 'w') as f:
# for i in lable_name:
# f.write(i + '\n')
D:\anaconda\envs\gd32ai\lib\site-packages\torch\ UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\TensorShape.cpp:3588.)
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Evaluation model:: 100%|██████████| 60/60 [00:38<00:00, 1.54it/s]
computer PR...
Evaluation model:: 100%|██████████| 60/60 [00:10<00:00, 5.74it/s]
Precision:0.281061 Recall:0.445965 AP:0.318521 F1:0.337140
- Simple, fast, compact, easy to transplant
- Less resource occupation, excellent single-core performance, lower power consumption
- Faster and smaller:Trade 0.3% loss of accuracy for 30% increase in inference speed, reducing the amount of parameters by 25%
- Fast training speed, low computing power requirements, training only requires 3GB video memory, gtx1660ti training COCO 1 epoch only takes 4 minutes
- 算法介绍: 交流qq群:1062122604
Network | COCO mAP(0.5) | Resolution | Run Time(4xCore) | Run Time(1xCore) | FLOPs(G) | Params(M) |
Yolo-FastestV2 | 24.10 % | 352X352 | 3.29 ms | 5.37 ms | 0.212 | 0.25M |
Yolo-FastestV1.1 | 24.40 % | 320X320 | 4.23 ms | 7.54 ms | 0.252 | 0.35M |
Yolov4-Tiny | 40.2% | 416X416 | 26.00ms | 55.44ms | 6.9 | 5.77M |
- Test platform Mate 30 Kirin 990 CPU,Based on NCNN
- Different loss weights for different scale output layers
- The backbone is replaced with a more lightweight shufflenetV2
- Anchor matching mechanism and loss are replaced by YoloV5, and the classification loss is replaced by softmax cross entropy from sigmoid
- Decouple the detection head, distinguish obj (foreground background classification), cls (category classification), reg (detection frame regression) 3 branches,
pip3 install -r requirements.txt
- Picture test
python3 --data data/ --weights modelzoo/coco2017-0.241078ap-model.pth --img img/000139.jpg
The format of the data set is the same as that of Darknet Yolo, Each image corresponds to a .txt label file. The label format is also based on Darknet Yolo's data set label format: "category cx cy wh", where category is the category subscript, cx, cy are the coordinates of the center point of the normalized label box, and w, h are the normalized label box The width and height, .txt label file content example as follows:
11 0.344192634561 0.611 0.416430594901 0.262 14 0.509915014164 0.51 0.974504249292 0.972
The image and its corresponding label file have the same name and are stored in the same directory. The data file structure is as follows:
. ├── train │ ├── 000001.jpg │ ├── 000001.txt │ ├── 000002.jpg │ ├── 000002.txt │ ├── 000003.jpg │ └── 000003.txt └── val ├── 000043.jpg ├── 000043.txt ├── 000057.jpg ├── 000057.txt ├── 000070.jpg └── 000070.txt
Generate a dataset path .txt file, the example content is as follows:
/home/qiuqiu/Desktop/dataset/train/000001.jpg /home/qiuqiu/Desktop/dataset/train/000002.jpg /home/qiuqiu/Desktop/dataset/train/000003.jpg
/home/qiuqiu/Desktop/dataset/val/000070.jpg /home/qiuqiu/Desktop/dataset/val/000043.jpg /home/qiuqiu/Desktop/dataset/val/000057.jpg
Generate the .names category label file, the sample content is as follows:
person bicycle car motorbike ...
The directory structure of the finally constructed training data set is as follows:
. ├── category.names # .names category label file ├── train # train dataset │ ├── 000001.jpg │ ├── 000001.txt │ ├── 000002.jpg │ ├── 000002.txt │ ├── 000003.jpg │ └── 000003.txt ├── train.txt # train dataset path .txt file ├── val # val dataset │ ├── 000043.jpg │ ├── 000043.txt │ ├── 000057.jpg │ ├── 000057.txt │ ├── 000070.jpg │ └── 000070.txt └── val.txt # val dataset path .txt file
- Generate anchor based on current dataset
python3 --traintxt ./train.txt
- The anchors6.txt file will be generated in the current directory,the sample content of the anchors6.txt is as follows:
12.64,19.39, 37.88,51.48, 55.71,138.31, 126.91,78.23, 131.57,214.55, 279.92,258.87 # anchor bias 0.636158 # iou
- Reference./data/
[name] model_name=coco # model name [train-configure] epochs=300 # train epichs steps=150,250 # Declining learning rate steps batch_size=64 # batch size subdivisions=1 # Same as the subdivisions of the darknet cfg file learning_rate=0.001 # learning rate [model-configure] pre_weights=None # The path to load the model, if it is none, then restart the training classes=80 # Number of detection categories width=352 # The width of the model input image height=352 # The height of the model input image anchor_num=3 # anchor num anchors=12.64,19.39, 37.88,51.48, 55.71,138.31, 126.91,78.23, 131.57,214.55, 279.92,258.87 #anchor bias [data-configure] train=/media/qiuqiu/D/coco/train2017.txt # train dataset path .txt file val=/media/qiuqiu/D/coco/val2017.txt # val dataset path .txt file names=./data/coco.names # .names category label file
- Perform training tasks
python3 --data data/
- Calculate map evaluation
python3 --data data/ --weights modelzoo/coco2017-0.241078ap-model.pth
- Convert onnx
python3 --data data/ --weights modelzoo/coco2017-0.241078ap-model.pth --output yolo-fastestv2.onnx
- onnx-sim
python3 -m onnxsim yolo-fastestv2.onnx yolo-fastestv2-opt.onnx
- Build NCNN
git clone cd ncnn mkdir build cd build cmake .. make make install cp -rf ./ncnn/build/install/* ~/Yolo-FastestV2/sample/ncnn
- Covert ncnn param and bin
cd ncnn/build/tools/onnx ./onnx2ncnn yolo-fastestv2-opt.onnx yolo-fastestv2.param yolo-fastestv2.bin cp yolo-fastestv2* ../ cd ../ ./ncnnoptimize yolo-fastestv2.param yolo-fastestv2.bin yolo-fastestv2-opt.param yolo-fastestv2-opt.bin 1 cp yolo-fastestv2-opt* ~/Yolo-FastestV2/sample/ncnn/model
- run sample
cd ~/Yolo-FastestV2/sample/ncnn sh ./demo