support output all combination of models

mucaoshen · Mar 5, 2020 · 93d6014 · 93d6014
1 parent b1907f9
commit 93d6014
Showing 1 changed file with 141 additions and 87 deletions.
diff --git a/convert_ncnn.py b/convert_ncnn.py
@@ -31,137 +31,191 @@ def get_argument():
     parser.add_argument("--dis_num_batch", type=int, default=8)
     parser.add_argument("--ncnn_build", type=str, default='/home/jakc4103/Documents/ncnn/build')
     parser.add_argument("--image_path", type=str, default='/home/jakc4103/workspace/DFQ/cali_images/')
+    parser.add_argument("--param", type=str, default='modeling/ncnn/model_int8.param', help='filename of .param')
+    parser.add_argument("--bin", type=str, default='modeling/ncnn/model_int8.bin', help='filename of .bin')
+    parser.add_argument("--table", type=str, default='modeling/ncnn/model_int8.table', help='filename of .table')
     return parser.parse_args()
 
+class ProbModel(torch.nn.Module):
+    def __init__(self, model):
+        super(ProbModel, self).__init__()
+        self.model = model
+
+    def forward(self, x):
+        x = self.model(x)
+        x = torch.softmax(x, 1)
+
+        return x
+
 def main():
     args = get_argument()
     # An instance of your model
     if args.resnet:
         import torchvision.models as models
         model = models.resnet18(pretrained=True)
+        model = ProbModel(model)
     else:
         model = mobilenet_v2('modeling/classification/mobilenetv2_1.0-f2a8633.pth.tar')
+        model = ProbModel(model)
     model.eval()
 
-    data = torch.ones((4, 3, 224, 224))#.cuda()
+    if args.quantize:
+        data = torch.ones((4, 3, 224, 224))#.cuda()
+
+        if args.distill_range:
+            import copy
+            # define FP32 model 
+            model_original = copy.deepcopy(model)
+            model_original.eval()
+            transformer = TorchTransformer()
+            transformer._build_graph(model_original, data, [QuantMeasure])
+            graph = transformer.log.getGraph()
+            bottoms = transformer.log.getBottoms()
+
+            data_distill = getDistilData(model_original, 'imagenet', args.dis_batch_size, bn_merged=False,\
+                num_batch=args.dis_num_batch, gpu=True, value_range=[-2.11790393, 2.64], size=[224, 224], early_break_factor=1.2 if args.resnet else 0.5)
 
-    if args.distill_range:
-        import copy
-        # define FP32 model 
-        model_original = copy.deepcopy(model)
-        model_original.eval()
         transformer = TorchTransformer()
-        transformer._build_graph(model_original, data, [QuantMeasure])
-        graph = transformer.log.getGraph()
-        bottoms = transformer.log.getBottoms()
-
-        data_distill = getDistilData(model_original, 'imagenet', args.dis_batch_size, bn_merged=False,\
-            num_batch=args.dis_num_batch, gpu=True, value_range=[-2.11790393, 2.64], size=[224, 224], early_break_factor=1.2 if args.resnet else 0.5)
-
-    transformer = TorchTransformer()
-    module_dict = {}
+        module_dict = {}
 
-    if args.distill_range:
-        module_dict[1] = [(torch.nn.Conv2d, QConv2d), (torch.nn.Linear, QLinear)]
-    else:
-        module_dict[1] = [(torch.nn.Conv2d, QuantNConv2d), (torch.nn.Linear, QuantNLinear)]
-
-    if args.relu or args.equalize:
-        module_dict[0] = [(torch.nn.ReLU6, torch.nn.ReLU)]
-
-    # transformer.summary(model, data)
-    # transformer.visualize(model, data, 'graph_cls', graph_size=120)
-
-    model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=True)
+        if args.distill_range:
+            module_dict[1] = [(torch.nn.Conv2d, QConv2d), (torch.nn.Linear, QLinear)]
+        else:
+            module_dict[1] = [(torch.nn.Conv2d, QuantNConv2d), (torch.nn.Linear, QuantNLinear)]
 
-    graph = transformer.log.getGraph()
-    bottoms = transformer.log.getBottoms()
-    if args.distill_range:
-        targ_layer = [QConv2d, QLinear]
-    else:
-        targ_layer = [QuantNConv2d, QuantNLinear]
+        if args.relu or args.equalize:
+            module_dict[0] = [(torch.nn.ReLU6, torch.nn.ReLU)]
 
-    set_layer_bits(graph, args.bits_weight, args.bits_activation, args.bits_bias, targ_layer)
+        # transformer.summary(model, data)
+        # transformer.visualize(model, data, 'graph_cls', graph_size=120)
 
-    model = merge_batchnorm(model, graph, bottoms, targ_layer)
+        model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=True)
 
-    #create relations
-    if args.equalize or args.distill_range:
-        res = create_relation(graph, bottoms, targ_layer, delete_single=False)
-        if args.equalize:
-            cross_layer_equalization(graph, res, targ_layer, visualize_state=False, converge_thres=2e-7)
+        graph = transformer.log.getGraph()
+        bottoms = transformer.log.getBottoms()
+        if args.distill_range:
+            targ_layer = [QConv2d, QLinear]
+        else:
+            targ_layer = [QuantNConv2d, QuantNLinear]
+
+        set_layer_bits(graph, args.bits_weight, args.bits_activation, args.bits_bias, targ_layer)
+
+        model = merge_batchnorm(model, graph, bottoms, targ_layer)
+
+        #create relations
+        if args.equalize or args.distill_range:
+            res = create_relation(graph, bottoms, targ_layer, delete_single=False)
+            if args.equalize:
+                cross_layer_equalization(graph, res, targ_layer, visualize_state=False, converge_thres=2e-7, signed=True)
+
+        if args.clip_weight:
+            clip_weight(graph, range_clip=[-15, 15], targ_type=targ_layer)
+
+        if args.correction:
+            bias_correction(graph, bottoms, targ_layer, bits_weight=args.bits_weight, signed=True)
+
+        if args.distill_range:
+            set_update_stat(model, [QuantMeasure], True)
+            model = update_quant_range(model.cuda(), data_distill, graph, bottoms)
+            set_update_stat(model, [QuantMeasure], False)
+        else:
+            set_quant_minmax(graph, bottoms)
+
+        torch.cuda.empty_cache()
+
+        # restore custom conv layer to torch.nn.conv2d
+        module_dict = {}
+        if args.distill_range:
+            module_dict[1] = [(QConv2d, torch.nn.Conv2d), (QLinear, torch.nn.Linear)]
+        else:
+            module_dict[1] = [(QuantNConv2d, torch.nn.Conv2d), (QuantNLinear, torch.nn.Linear)]
+
+        model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=False)
+        graph = transformer.log.getGraph()
+        bottoms = transformer.log.getBottoms()
 
-    if args.clip_weight:
-        clip_weight(graph, range_clip=[-15, 15], targ_type=targ_layer)
-
-    if args.correction:
-        bias_correction(graph, bottoms, targ_layer, bits_weight=args.bits_weight)
-
-    if args.distill_range:
-        set_update_stat(model, [QuantMeasure], True)
-        model = update_quant_range(model.cuda(), data_distill, graph, bottoms)
-        set_update_stat(model, [QuantMeasure], False)
-    else:
-        set_quant_minmax(graph, bottoms)
-
-    torch.cuda.empty_cache()
-
-    # restore custom conv layer to torch.nn.conv2d
-    module_dict = {}
-    if args.distill_range:
-        module_dict[1] = [(QConv2d, torch.nn.Conv2d), (QLinear, torch.nn.Linear)]
-    else:
-        module_dict[1] = [(QuantNConv2d, torch.nn.Conv2d), (QuantNLinear, torch.nn.Linear)]
-
-    model, transformer = switch_layers(model, transformer, data, module_dict, ignore_layer=[QuantMeasure], quant_op=False)
-
     # An example input you would normally provide to your model's forward() method
     x = torch.rand(1, 3, 224, 224)
 
-    # Export the model    
+    # Export the onnx model
     torch_out = torch.onnx._export(model, x, "model.onnx", export_params=True)
 
+    # Simplify model using onnx-simplifier
     os.system("python3 -m onnxsim model.onnx model-sim.onnx")
+    os.system("rm model.onnx")
 
     cur_path = os.path.abspath(os.getcwd())
-    os.system("cp model-sim.onnx {}".format(os.path.join(args.ncnn_build, 'tools/onnx', 'model-sim.onnx')))
+    os.system("mv model-sim.onnx {}".format(os.path.join(args.ncnn_build, 'tools/onnx', 'model-sim.onnx')))
     os.chdir(os.path.join(args.ncnn_build, 'tools/onnx'))
+
+    # Convert onnx to ncnn
     os.system("./onnx2ncnn model-sim.onnx model.param model.bin")
+
+    # Add input image size to .param
     lines = [line.strip() for line in open("model.param", "r")]
     with open("model.param", 'w') as ww:
         for idx, line in enumerate(lines):
-            if idx == 3 and 'input' in line:
+            if idx == 2 and 'input' in line.lower():
                 line += ' 0=224 1=224 2=3'
             ww.write(line+'\n')
 
-    os.system("mv model.param {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.param')))
-    os.system("mv model.bin {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.bin')))
+    if not os.path.exists(os.path.join(cur_path, 'modeling/ncnn')):
+        os.makedirs(os.path.join(cur_path, 'modeling/ncnn'))
+
     os.system("rm model-sim.onnx")
-    os.chdir(os.path.join(args.ncnn_build, 'tools/quantize'))
-    os.system("./ncnn2table --param=model.param --bin=model.bin\
-             --images={} --output=model_int8.table\
-             --mean={},{},{} --norm={},{},{} --size=224,224 --thread=2".format(
-                 args.image_path, 0.485*255, 0.456*255, 0.406*255, 1/(0.229*255), 1/(0.224*255), 1/(0.225*255)))
-
-    lines = [line.strip() for line in open("model.table", 'r')]
 
     if args.quantize:
-        os.system("./ncnn2int8 model.param model.bin model_int8.param model_int8.bin model_int8.table")
-        lines = [line.strip() for line in open("model_int8.param", "r")]
-        with open("model_int8.param", 'w') as ww:
-            for idx, line in enumerate(lines):
-                if idx == 3 and 'input' in line:
-                    line += ' 0=224 1=224 2=3'
+        os.system("mv model.param {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.param')))
+        os.system("mv model.bin {}".format(os.path.join(args.ncnn_build, 'tools/quantize', 'model.bin')))
+        os.chdir(os.path.join(args.ncnn_build, 'tools/quantize'))
+
+        # Estimate activation range using https://github.com/Tencent/ncnn/tree/master/tools/quantize
+        os.system("./ncnn2table --param=model.param --bin=model.bin\
+                --images={} --output=model_int8_channel.table\
+                --mean={},{},{} --norm={},{},{} --size=224,224 --thread=2".format(
+                    args.image_path, 0.485*255, 0.456*255, 0.406*255, 1/(0.229*255), 1/(0.224*255), 1/(0.225*255)))
+
+        # modify activation min/max range and weight min/max range to values calculated in DFQ
+        table_old = [line.strip() for line in open("model_int8_channel.table", 'r')]
+        table_new = []
+        count = 0
+        for ii in range(2):
+            for idx in graph:
+                if type(graph[idx]) in [torch.nn.Conv2d, torch.nn.Linear]:
+                    if ii == 0:#min/max for layer weight
+                        mi = float(torch.min(graph[idx].weight))
+                        ma = float(torch.max(graph[idx].weight))
+                    else:
+                        mi = float(torch.min(graph[idx].quant.running_min))
+                        ma = float(torch.max(graph[idx].quant.running_max))
+                    scale = 128. / (max(abs(ma), abs(mi)))
+
+                    if ii == 0:#min/max for activation
+                        table_new.append(' '.join(table_old[count].split(' ')[0:1] + [str(scale)] * graph[idx].weight.shape[0]))
+                    else:
+                        table_new.append(' '.join(table_old[count].split(' ')[0:1] + [str(scale)]))
+                    count += 1
+
+        with open("model_int8_tensor.table", 'w') as ww:
+            for line in table_new:
                 ww.write(line+'\n')
 
-        os.system("cp model_int8.param {}".format(os.path.join(cur_path, 'model_int8.param')))
-        os.system("cp model_int8.bin {}".format(os.path.join(cur_path, 'model_int8.bin')))
-        os.system("cp model_int8.table {}".format(os.path.join(cur_path, 'model_int8.table')))
+        # Convert to Int8 model
+        os.system("./ncnn2int8 model.param model.bin model_int8.param model_int8.bin model_int8_tensor.table")
+        lines = [line.strip() for line in open("model_int8.param", "r")]
+
+        os.system("cp model_int8.param {}".format(os.path.join(cur_path, args.param)))
+        os.system("cp model_int8.bin {}".format(os.path.join(cur_path, args.bin)))
+        os.system("cp model_int8_tensor.table {}".format(os.path.join(cur_path, args.table)))
     else:
-        os.system("cp model.param {}".format(os.path.join(cur_path, 'model.param')))
-        os.system("cp model.bin {}".format(os.path.join(cur_path, 'model.bin')))
+        os.system("mv model.param {}".format(os.path.join(cur_path, args.param)))
+        os.system("mv model.bin {}".format(os.path.join(cur_path, args.bin)))
 
     os.chdir(cur_path)
+    line = ' '.join([l.strip() for l in open(args.param, 'r')][-1].split()).split(' ')[1]
+    print("="*100)
+    print("Target layer name '{}'".format(line))
+    print("="*100)
 
 if __name__ == '__main__':
     main()