-
Notifications
You must be signed in to change notification settings - Fork 0
/
transformer-qkv.py
executable file
·82 lines (74 loc) · 2.84 KB
/
transformer-qkv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import mqbench,torch,torchvision,numpy as np,matplotlib.pyplot as plt,torchvision,torchvision.models as models,timm,timm.models as models,torch.nn as nn
from torchmetrics import ConfusionMatrix
from mqbench.prepare_by_platform import prepare_by_platform
from mqbench.prepare_by_platform import BackendType
from mqbench.utils.state import enable_calibration
from mqbench.utils.state import enable_quantization
from mqbench.convert_deploy import convert_deploy
from tqdm import tqdm
from mqbench.utils.logger import logger as log
from mqbench.fake_quantize.lsq import LearnableFakeQuantize
from dataset import get_dataloader
from timm.models.swin_transformer import SwinTransformer
from logger import get_logger
from mqbench.utils.registry import DEFAULT_MODEL_QUANTIZER
from torch.fx.graph_module import GraphModule
print(DEFAULT_MODEL_QUANTIZER)
logger,workdir = get_logger("SwinQuant-qkv+Conv+Linear(tensorrt-default)")
log = logger
device = torch.device('cuda')
mean=np.array([123.675, 116.28, 103.53])/255
std=np.array([58.395, 57.12, 57.375])/255
class My(nn.Module):
def forward(self,x):
x = x*3
return x+x
# dataloader = get_dataloader() #
extra_qconfig_dict = {
'w_observer': 'MinMaxObserver',
'a_observer': 'MSEObserver',
'w_fakequantize': 'FixedFakeQuantize',
'a_fakequantize': 'LearnableFakeQuantize',
# 'a_fakequantize': 'FixedFakeQuantize',
'w_qscheme': {
'bit': 8,
'symmetry': True,
'per_channel': True,
'pot_scale': False
},
'a_qscheme': {
'bit': 8,
'symmetry': True,
'per_channel': False,
'pot_scale': False
}
}
logger.info(extra_qconfig_dict) #
model = timm.create_model('swin_base_patch4_window7_224',pretrained=True).to(device) # 创建模型
# model = timm.create_model('resnet18',pretrained=True).to(device) # 创建模型
prepare_custom_config_dict = {'extra_qconfig_dict': extra_qconfig_dict}
model = prepare_by_platform(model, BackendType.Tensorrt,prepare_custom_config_dict).to(device)
ori= timm.create_model('swin_base_patch4_window7_224',pretrained=True).to(device) #
model.eval() # 进行PTQ
enable_calibration(model) # 打开校准
dataloader = get_dataloader(model) #
with torch.no_grad():
for i,(img,label) in enumerate(tqdm(dataloader)):
if i>=32:
break
img = img.to(device)
model(img)
enable_quantization(model) # 打开量化,准备好模拟后台推断的量化
ori.to(device)
ori.eval()
from torchmetrics import Accuracy
# model = ori
acc = Accuracy().to(device)
with torch.no_grad():
for i,(img,label) in enumerate(tqdm(dataloader)):
if i>32:
break
img = img.to(device)
# acc.update(torch.argmax(model(img),dim=-1),torch.argmax(ori(img),dim=-1))
acc.update(model(img).detach().cpu(),label.cpu())
logger.info(f"最终的精度是:{acc.compute()}")