Skip to content

Commit

Permalink
support output all combination of models
Browse files Browse the repository at this point in the history
  • Loading branch information
jakc4103 committed Mar 5, 2020
1 parent b1907f9 commit 93d6014
Showing 1 changed file with 141 additions and 87 deletions.
228 changes: 141 additions & 87 deletions convert_ncnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,137 +31,191 @@ def get_argument():
parser.add_argument("--dis_num_batch", type=int, default=8)
parser.add_argument("--ncnn_build", type=str, default='/home/jakc4103/Documents/ncnn/build')
parser.add_argument("--image_path", type=str, default='/home/jakc4103/workspace/DFQ/cali_images/')
parser.add_argument("--param", type=str, default='modeling/ncnn/model_int8.param', help='filename of .param')
parser.add_argument("--bin", type=str, default='modeling/ncnn/model_int8.bin', help='filename of .bin')
parser.add_argument("--table", type=str, default='modeling/ncnn/model_int8.table', help='filename of .table')
return parser.parse_args()

class ProbModel(torch.nn.Module):
def __init__(self, model):
super(ProbModel, self).__init__()
self.model = model

def forward(self, x):
x = self.model(x)
x = torch.softmax(x, 1)

return x

def main():
args = get_argument()
# An instance of your model
if args.resnet:
import torchvision.models as models
model = models.resnet18(pretrained=True)
model = ProbModel(model)
else:
model = mobilenet_v2('modeling/classification/mobilenetv2_1.0-f2a8633.pth.tar')
model = ProbModel(model)
model.eval()

data = torch.ones((4, 3, 224, 224))#.cuda()
if args.quantize:
data = torch.ones((4, 3, 224, 224))#.cuda()

if args.distill_range:
import copy
# define FP32 model
model_original = copy.deepcopy(model)
model_original.eval()
transformer = TorchTransformer()
transformer._build_graph(model_original, data, [QuantMeasure])
graph = transformer.log.getGraph()
bottoms = transformer.log.getBottoms()

data_distill = getDistilData(model_original, 'imagenet', args.dis_batch_size, bn_merged=False,\
num_batch=args.dis_num_batch, gpu=True, value_range=[-2.11790393, 2.64], size=[224, 224], early_break_factor=1.2 if args.resnet else 0.5)

if args.distill_range:
import copy
# define FP32 model
model_original = copy.deepcopy(model)
model_original.eval()
transformer = TorchTransformer()
transformer._build_graph(model_original, data, [QuantMeasure])
graph = transformer.log.getGraph()
bottoms = transformer.log.getBottoms()

data_distill = getDistilData(model_original, 'imagenet', args.dis_batch_size, bn_merged=False,\
num_batch=args.dis_num_batch, gpu=True, value_range=[-2.11790393, 2.64], size=[224, 224], early_break_factor=1.2 if args.resnet else 0.5)

transformer = TorchTransformer()
module_dict = {}
module_dict = {}

if args.distill_range:
module_dict[1] = [(torch.nn.Conv2d, QConv2d), (torch.nn.Linear, QLinear)]
else:
module_dict[1] = [(torch.nn.Conv2d, QuantNConv2d), (torch.nn.Linear, QuantNLinear)]

if args.relu or args.equalize:
module_dict[0] = [(torch.nn.ReLU6, torch.nn.ReLU)]

# transformer.summary(model, data)
# transformer.visualize(model, data, 'graph_cls', graph_size=120)

model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=True)
if args.distill_range:
module_dict[1] = [(torch.nn.Conv2d, QConv2d), (torch.nn.Linear, QLinear)]
else:
module_dict[1] = [(torch.nn.Conv2d, QuantNConv2d), (torch.nn.Linear, QuantNLinear)]

graph = transformer.log.getGraph()
bottoms = transformer.log.getBottoms()
if args.distill_range:
targ_layer = [QConv2d, QLinear]
else:
targ_layer = [QuantNConv2d, QuantNLinear]
if args.relu or args.equalize:
module_dict[0] = [(torch.nn.ReLU6, torch.nn.ReLU)]

set_layer_bits(graph, args.bits_weight, args.bits_activation, args.bits_bias, targ_layer)
# transformer.summary(model, data)
# transformer.visualize(model, data, 'graph_cls', graph_size=120)

model = merge_batchnorm(model, graph, bottoms, targ_layer)
model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=True)

#create relations
if args.equalize or args.distill_range:
res = create_relation(graph, bottoms, targ_layer, delete_single=False)
if args.equalize:
cross_layer_equalization(graph, res, targ_layer, visualize_state=False, converge_thres=2e-7)
graph = transformer.log.getGraph()
bottoms = transformer.log.getBottoms()
if args.distill_range:
targ_layer = [QConv2d, QLinear]
else:
targ_layer = [QuantNConv2d, QuantNLinear]

set_layer_bits(graph, args.bits_weight, args.bits_activation, args.bits_bias, targ_layer)

model = merge_batchnorm(model, graph, bottoms, targ_layer)

#create relations
if args.equalize or args.distill_range:
res = create_relation(graph, bottoms, targ_layer, delete_single=False)
if args.equalize:
cross_layer_equalization(graph, res, targ_layer, visualize_state=False, converge_thres=2e-7, signed=True)

if args.clip_weight:
clip_weight(graph, range_clip=[-15, 15], targ_type=targ_layer)

if args.correction:
bias_correction(graph, bottoms, targ_layer, bits_weight=args.bits_weight, signed=True)

if args.distill_range:
set_update_stat(model, [QuantMeasure], True)
model = update_quant_range(model.cuda(), data_distill, graph, bottoms)
set_update_stat(model, [QuantMeasure], False)
else:
set_quant_minmax(graph, bottoms)

torch.cuda.empty_cache()

# restore custom conv layer to torch.nn.conv2d
module_dict = {}
if args.distill_range:
module_dict[1] = [(QConv2d, torch.nn.Conv2d), (QLinear, torch.nn.Linear)]
else:
module_dict[1] = [(QuantNConv2d, torch.nn.Conv2d), (QuantNLinear, torch.nn.Linear)]

model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=False)
graph = transformer.log.getGraph()
bottoms = transformer.log.getBottoms()

if args.clip_weight:
clip_weight(graph, range_clip=[-15, 15], targ_type=targ_layer)

if args.correction:
bias_correction(graph, bottoms, targ_layer, bits_weight=args.bits_weight)

if args.distill_range:
set_update_stat(model, [QuantMeasure], True)
model = update_quant_range(model.cuda(), data_distill, graph, bottoms)
set_update_stat(model, [QuantMeasure], False)
else:
set_quant_minmax(graph, bottoms)

torch.cuda.empty_cache()

# restore custom conv layer to torch.nn.conv2d
module_dict = {}
if args.distill_range:
module_dict[1] = [(QConv2d, torch.nn.Conv2d), (QLinear, torch.nn.Linear)]
else:
module_dict[1] = [(QuantNConv2d, torch.nn.Conv2d), (QuantNLinear, torch.nn.Linear)]

model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=False)

# An example input you would normally provide to your model's forward() method
x = torch.rand(1, 3, 224, 224)

# Export the model
# Export the onnx model
torch_out = torch.onnx._export(model, x, "model.onnx", export_params=True)

# Simplify model using onnx-simplifier
os.system("python3 -m onnxsim model.onnx model-sim.onnx")
os.system("rm model.onnx")

cur_path = os.path.abspath(os.getcwd())
os.system("cp model-sim.onnx {}".format(os.path.join(args.ncnn_build, 'tools/onnx', 'model-sim.onnx')))
os.system("mv model-sim.onnx {}".format(os.path.join(args.ncnn_build, 'tools/onnx', 'model-sim.onnx')))
os.chdir(os.path.join(args.ncnn_build, 'tools/onnx'))

# Convert onnx to ncnn
os.system("./onnx2ncnn model-sim.onnx model.param model.bin")

# Add input image size to .param
lines = [line.strip() for line in open("model.param", "r")]
with open("model.param", 'w') as ww:
for idx, line in enumerate(lines):
if idx == 3 and 'input' in line:
if idx == 2 and 'input' in line.lower():
line += ' 0=224 1=224 2=3'
ww.write(line+'\n')

os.system("mv model.param {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.param')))
os.system("mv model.bin {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.bin')))
if not os.path.exists(os.path.join(cur_path, 'modeling/ncnn')):
os.makedirs(os.path.join(cur_path, 'modeling/ncnn'))

os.system("rm model-sim.onnx")
os.chdir(os.path.join(args.ncnn_build, 'tools/quantize'))
os.system("./ncnn2table --param=model.param --bin=model.bin\
--images={} --output=model_int8.table\
--mean={},{},{} --norm={},{},{} --size=224,224 --thread=2".format(
args.image_path, 0.485*255, 0.456*255, 0.406*255, 1/(0.229*255), 1/(0.224*255), 1/(0.225*255)))

lines = [line.strip() for line in open("model.table", 'r')]

if args.quantize:
os.system("./ncnn2int8 model.param model.bin model_int8.param model_int8.bin model_int8.table")
lines = [line.strip() for line in open("model_int8.param", "r")]
with open("model_int8.param", 'w') as ww:
for idx, line in enumerate(lines):
if idx == 3 and 'input' in line:
line += ' 0=224 1=224 2=3'
os.system("mv model.param {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.param')))
os.system("mv model.bin {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.bin')))
os.chdir(os.path.join(args.ncnn_build, 'tools/quantize'))

# Estimate activation range using https://github.com/Tencent/ncnn/tree/master/tools/quantize
os.system("./ncnn2table --param=model.param --bin=model.bin\
--images={} --output=model_int8_channel.table\
--mean={},{},{} --norm={},{},{} --size=224,224 --thread=2".format(
args.image_path, 0.485*255, 0.456*255, 0.406*255, 1/(0.229*255), 1/(0.224*255), 1/(0.225*255)))

# modify activation min/max range and weight min/max range to values calculated in DFQ
table_old = [line.strip() for line in open("model_int8_channel.table", 'r')]
table_new = []
count = 0
for ii in range(2):
for idx in graph:
if type(graph[idx]) in [torch.nn.Conv2d, torch.nn.Linear]:
if ii == 0:#min/max for layer weight
mi = float(torch.min(graph[idx].weight))
ma = float(torch.max(graph[idx].weight))
else:
mi = float(torch.min(graph[idx].quant.running_min))
ma = float(torch.max(graph[idx].quant.running_max))
scale = 128. / (max(abs(ma), abs(mi)))

if ii == 0:#min/max for activation
table_new.append(' '.join(table_old[count].split(' ')[0:1] + [str(scale)] * graph[idx].weight.shape[0]))
else:
table_new.append(' '.join(table_old[count].split(' ')[0:1] + [str(scale)]))
count += 1

with open("model_int8_tensor.table", 'w') as ww:
for line in table_new:
ww.write(line+'\n')

os.system("cp model_int8.param {}".format(os.path.join(cur_path, 'model_int8.param')))
os.system("cp model_int8.bin {}".format(os.path.join(cur_path, 'model_int8.bin')))
os.system("cp model_int8.table {}".format(os.path.join(cur_path, 'model_int8.table')))
# Convert to Int8 model
os.system("./ncnn2int8 model.param model.bin model_int8.param model_int8.bin model_int8_tensor.table")
lines = [line.strip() for line in open("model_int8.param", "r")]

os.system("cp model_int8.param {}".format(os.path.join(cur_path, args.param)))
os.system("cp model_int8.bin {}".format(os.path.join(cur_path, args.bin)))
os.system("cp model_int8_tensor.table {}".format(os.path.join(cur_path, args.table)))
else:
os.system("cp model.param {}".format(os.path.join(cur_path, 'model.param')))
os.system("cp model.bin {}".format(os.path.join(cur_path, 'model.bin')))
os.system("mv model.param {}".format(os.path.join(cur_path, args.param)))
os.system("mv model.bin {}".format(os.path.join(cur_path, args.bin)))

os.chdir(cur_path)
line = ' '.join([l.strip() for l in open(args.param, 'r')][-1].split()).split(' ')[1]
print("="*100)
print("Target layer name '{}'".format(line))
print("="*100)

if __name__ == '__main__':
main()

0 comments on commit 93d6014

Please sign in to comment.