-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtrain_mobilenet.py
172 lines (138 loc) · 6.95 KB
/
train_mobilenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import os
import torch
import torchvision
from torchvision.ops import misc
import transforms
from network_files.faster_rcnn_framework import FasterRCNN
from network_files.rpn_function import AnchorsGenerator
from backbone.mobilenetv2_model import MobileNetV2
from my_dataset import VOC2012DataSet
from train_utils import train_eval_utils as utils
def create_model(num_classes):
# https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
backbone = MobileNetV2(weights_path="./backbone/mobilenet_v2.pth").features
backbone.out_channels = 1280
anchor_generator = AnchorsGenerator(sizes=((32, 64, 128, 256, 512),),
aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], # 在哪些特征层上进行roi pooling
output_size=[7, 7], # roi_pooling输出特征矩阵尺寸
sampling_ratio=2) # 采样率
model = FasterRCNN(backbone=backbone,
num_classes=num_classes,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler)
return model
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using {} device training.".format(device.type))
# 检查保存权重文件夹是否存在,不存在则创建
if not os.path.exists("save_weights"):
os.makedirs("save_weights")
data_transform = {
"train": transforms.Compose([transforms.ToTensor(),
transforms.RandomHorizontalFlip(0.5)]),
"val": transforms.Compose([transforms.ToTensor()])
}
VOC_root = "./"
# check voc root
if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False:
raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root))
# load train data set
train_data_set = VOC2012DataSet(VOC_root, data_transform["train"], True)
# 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch
batch_size = 8
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using %g dataloader workers' % nw)
train_data_loader = torch.utils.data.DataLoader(train_data_set,
batch_size=batch_size,
shuffle=True,
num_workers=nw,
collate_fn=train_data_set.collate_fn)
# load validation data set
val_data_set = VOC2012DataSet(VOC_root, data_transform["val"], False)
val_data_set_loader = torch.utils.data.DataLoader(val_data_set,
batch_size=batch_size,
shuffle=False,
num_workers=nw,
collate_fn=train_data_set.collate_fn)
# create model num_classes equal background + 20 classes
model = create_model(num_classes=21)
# print(model)
model.to(device)
train_loss = []
learning_rate = []
val_mAP = []
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# first frozen backbone and train 5 epochs #
# 首先冻结前置特征提取网络权重(backbone),训练rpn以及最终预测网络部分 #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
for param in model.backbone.parameters():
param.requires_grad = False
# define optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
num_epochs = 5
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
utils.train_one_epoch(model, optimizer, train_data_loader,
device, epoch, print_freq=50,
train_loss=train_loss, train_lr=learning_rate)
# evaluate on the test dataset
utils.evaluate(model, val_data_set_loader, device=device, mAP_list=val_mAP)
torch.save(model.state_dict(), "./save_weights/pretrain.pth")
# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# second unfrozen backbone and train all network #
# 解冻前置特征提取网络权重(backbone),接着训练整个网络权重 #
# # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 冻结backbone部分底层权重
for name, parameter in model.backbone.named_parameters():
split_name = name.split(".")[0]
if split_name in ["0", "1", "2", "3"]:
parameter.requires_grad = False
else:
parameter.requires_grad = True
# define optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
momentum=0.9, weight_decay=0.0005)
# learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=5,
gamma=0.33)
num_epochs = 20
for epoch in range(num_epochs):
# train for one epoch, printing every 50 iterations
utils.train_one_epoch(model, optimizer, train_data_loader,
device, epoch, print_freq=50,
train_loss=train_loss, train_lr=learning_rate)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
utils.evaluate(model, val_data_set_loader, device=device, mAP_list=val_mAP)
# save weights
if epoch > 10:
save_files = {
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(),
'epoch': epoch}
torch.save(save_files, "./save_weights/mobile-model-{}.pth".format(epoch))
# plot loss and lr curve
if len(train_loss) != 0 and len(learning_rate) != 0:
from plot_curve import plot_loss_and_lr
plot_loss_and_lr(train_loss, learning_rate)
# plot mAP curve
if len(val_mAP) != 0:
from plot_curve import plot_map
plot_map(val_mAP)
# model.eval()
# x = [torch.rand(3, 300, 400), torch.rand(3, 400, 400)]
# predictions = model(x)
# print(predictions)
if __name__ == "__main__":
version = torch.version.__version__[:5] # example: 1.6.0
# 因为使用的官方的混合精度训练是1.6.0后才支持的,所以必须大于等于1.6.0
if version < "1.6.0":
raise EnvironmentError("pytorch version must be 1.6.0 or above")
main()