[clean fluid api] replace fluid/contrib/slim api. (PaddlePaddle#1615)

qfyinbd · Jan 5, 2023 · c7ffb3a · c7ffb3a
1 parent 00f1104
commit c7ffb3a
Show file tree

Hide file tree

Showing 21 changed files with 129 additions and 146 deletions.
diff --git a/ce_tests/dygraph/quant/src/qat.py b/ce_tests/dygraph/quant/src/qat.py
@@ -31,7 +31,7 @@
 import paddle.vision.models as models
 
 from paddleslim import QAT
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+from paddle.quantization import ImperativeQuantAware
 
 from imagenet_dataset import ImageNetDataset
 

diff --git a/ce_tests/dygraph/quant/src/save_quant_model.py b/ce_tests/dygraph/quant/src/save_quant_model.py
@@ -23,7 +23,7 @@
 import time
 import paddle
 from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
+from paddle.quantization import Quant2Int8MkldnnPass
 from paddle.framework import core
 
 paddle.enable_static()

diff --git a/demo/mkldnn_quant/README.md b/demo/mkldnn_quant/README.md
@@ -54,7 +54,7 @@ import numpy as np
 
 为了部署在CPU上，我们将保存的quant模型，通过一个转化脚本，移除fake_quantize/fake_dequantize op，进行算子融合和优化并且转化为INT8模型。
 
-脚本在官网的位置为[save_quant_model.py](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/save_quant_model.py)。
+脚本在官网的位置为[save_quant_model.py](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/static/quantization/tests/save_quant_model.py)。
 
 复制脚本到本样例所在目录(`/PATH_TO_PaddleSlim/demo/mkldnn_quant/`)，并执行如下命令：
 ```
@@ -181,4 +181,4 @@ INT8模型精度和性能结果参考[CPU部署预测INT8模型的精度和性
 ## FAQ
 
 - 自然语言处理模型在CPU上的部署和预测参考样例[ERNIE 模型 QUANT INT8 精度与性能复现](https://github.com/PaddlePaddle/benchmark/tree/master/Inference/c++/ernie/mkldnn)
-- 具体DNNL量化原理可以查看[SLIM Quant for INT8 MKLDNN](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/README.md)。
+- 具体DNNL量化原理可以查看[SLIM Quant for INT8 MKLDNN](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/static/quantization/tests/README.md)。
diff --git a/demo/mkldnn_quant/README_en.md b/demo/mkldnn_quant/README_en.md
@@ -45,7 +45,7 @@ To generate fake quantized model with quant-aware strategy, see [Quant-aware tra
 To generate post-training fake quantized model, see [Offline post-training quantization tutorial](https://paddleslim.readthedocs.io/en/latest/quick_start/index_en.html)
 
 ## 3. Convert the fake quantized model to DNNL INT8 model
-In order to deploy an INT8 model on the CPU, we need to collect scales, remove all fake_quantize/fake_dequantize operators, optimize the graph and quantize it, turning it into the final DNNL INT8 model. This is done by the script [save_quant_model.py](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/save_quant_model.py). Copy the script to the directory where the demo is located: `/PATH_TO_PaddleSlim/demo/mkldnn_quant/` and run it as follows:
+In order to deploy an INT8 model on the CPU, we need to collect scales, remove all fake_quantize/fake_dequantize operators, optimize the graph and quantize it, turning it into the final DNNL INT8 model. This is done by the script [save_quant_model.py](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/static/quantization/tests/save_quant_model.py). Copy the script to the directory where the demo is located: `/PATH_TO_PaddleSlim/demo/mkldnn_quant/` and run it as follows:
 ```
 python save_quant_model.py --quant_model_path=/PATH/TO/SAVE/FLOAT32/quant/MODEL --int8_model_save_path=/PATH/TO/SAVE/INT8/MODEL
 ```
@@ -176,4 +176,4 @@ For INT8 models accuracy and performance results see [CPU deployment predicts th
 ## FAQ
 
 - For deploying INT8 NLP models on CPU, see [ERNIE model quant INT8 accuracy and performance reproduction](https://github.com/PaddlePaddle/benchmark/tree/master/Inference/c++/ernie/mkldnn)
-- The detailed DNNL quantification process can be viewed in [SLIM quant for INT8 DNNL](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/README.md)
+- The detailed DNNL quantification process can be viewed in [SLIM quant for INT8 DNNL](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/static/quantization/tests/README.md)
diff --git a/docs/zh_cn/FAQ/quantization_FAQ.md b/docs/zh_cn/FAQ/quantization_FAQ.md
@@ -35,9 +35,9 @@ config->EnableTensorRtEngine(1 << 20      /* workspace_size*/,
                         false             /* use_calib_mode*/);
 ```
 
--  如果量化模型在x86上线，需要使用[INT8 MKL-DNN](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/fluid/contrib/slim/tests)
+-  如果量化模型在x86上线，需要使用[INT8 MKL-DNN](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/static/quantization/tests)
 
-    - 首先对模型进行转化，可以参考[脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/save_quant_model.py)
+    - 首先对模型进行转化，可以参考[脚本](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/static/quantization/tests/save_quant_model.py)
 
     - 转化之后可使用预测部署API进行加载。比如[c++ API](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/native_infer.html)
 

diff --git a/docs/zh_cn/tutorials/quant/AnalysisQAT.md b/docs/zh_cn/tutorials/quant/AnalysisQAT.md
@@ -43,7 +43,7 @@ import paddle
 from PIL import Image
 from paddle.vision.datasets import DatasetFolder
 from paddle.vision.transforms import transforms
-from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
+from paddle.quantization import PostTrainingQuantization
 from paddleslim.quant.analysis_qat import AnalysisQAT
 
 paddle.enable_static()

diff --git a/example/auto_compression/hyperparameter_tutorial.md b/example/auto_compression/hyperparameter_tutorial.md
@@ -276,7 +276,7 @@ print(f"Operators in inference model:\n{op_types.keys()}")
 
 执行以下代码，查看当前PaddlePaddle版本的量化功能所支持的OP类型：
 ```
-from paddle.fluid.contrib.slim.quantization.utils import _weight_supported_quantizable_op_type, _act_supported_quantizable_op_type
+from paddle.static.quantization.utils import _weight_supported_quantizable_op_type, _act_supported_quantizable_op_type
 print(f"_supported_quantizable_op_type:\n{_weight_supported_quantizable_op_type}")
 print(f"_supported_quantizable_op_type:\n{_act_supported_quantizable_op_type}")
 ```

diff --git a/paddleslim/analysis/_utils.py b/paddleslim/analysis/_utils.py
@@ -19,9 +19,11 @@
 import paddleslim
 import subprocess
 import time
-import ssl
 import requests
 import shutil
+import logging
+from ..common import get_logger
+_logger = get_logger(__name__, level=logging.INFO)
 __all__ = [
     "save_cls_model", "save_det_model", "nearest_interpolate", "opt_model",
     "load_predictor"
@@ -36,7 +38,7 @@ def _get_download(url, fullname):
     try:
         req = requests.get(url, stream=True)
     except Exception as e:  # requests.exceptions.ConnectionError
-        logger.info("Downloading {} from {} failed with exception {}".format(
+        _logger.info("Downloading {} from {} failed with exception {}".format(
             fname, url, str(e)))
         return False
 

diff --git a/paddleslim/auto_compression/compressor.py b/paddleslim/auto_compression/compressor.py
@@ -804,7 +804,7 @@ def _start_train(self, train_program_info, test_program_info, strategy,
                 else:
                     logging_iter = train_config.logging_iter
                 if batch_id % int(logging_iter) == 0:
-                    print_info = "Total iter: {}, epoch: {}, batch: {}, loss: {}".format(
+                    print_info = "Total iter: {}, epoch: {}, batch: {}, loss: {} ".format(
                         total_train_iter, epoch_id, batch_id, loss[0])
                     for idx, loss_value in enumerate(loss[1:]):
                         print_info += '{}: {} '.format(loss_names[idx],

diff --git a/paddleslim/auto_compression/utils/fake_ptq.py b/paddleslim/auto_compression/utils/fake_ptq.py
@@ -2,10 +2,10 @@
 import paddle
 from paddle.fluid.framework import IrGraph
 from paddle.framework import core
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass, QuantizationTransformPassV2, AddQuantDequantPass, AddQuantDequantPassV2, QuantizationFreezePass, QuantWeightPass
+from paddle.static.quantization import QuantizationTransformPass, QuantizationTransformPassV2, AddQuantDequantPass, AddQuantDequantPassV2, QuantizationFreezePass, QuantWeightPass
 
 try:
-    from paddle.fluid.contrib.slim.quantization import utils
+    from paddle.static.quantization import utils
     TRANSFORM_PASS_OP_TYPES = utils._weight_supported_quantizable_op_type
     QUANT_DEQUANT_PASS_OP_TYPES = utils._act_supported_quantizable_op_type
 except:

diff --git a/paddleslim/dygraph/quant/ptq.py b/paddleslim/dygraph/quant/ptq.py
@@ -17,11 +17,16 @@
 
 import paddle
 import paddle.nn as nn
-import paddle.fluid.contrib.slim.quantization as Q
-from paddle.fluid.contrib.slim.quantization import AbsmaxQuantizer
-from paddle.fluid.contrib.slim.quantization import HistQuantizer
-from paddle.fluid.contrib.slim.quantization import KLQuantizer
-from paddle.fluid.contrib.slim.quantization import PerChannelAbsmaxQuantizer
+
+from paddle.quantization import (
+    PTQConfig,
+    ImperativePTQ,
+    AbsmaxQuantizer,
+    HistQuantizer,
+    KLQuantizer,
+    PerChannelAbsmaxQuantizer,
+    SUPPORT_ACT_QUANTIZERS,
+    SUPPORT_WT_QUANTIZERS, )
 from ...common import get_logger
 
 _logger = get_logger(__name__, level=logging.INFO)
@@ -56,14 +61,14 @@ def __init__(self,
         print("activation_quantizer", activation_quantizer)
         activation_quantizer = eval(activation_quantizer)(**kwargs)
         weight_quantizer = eval(weight_quantizer)()
-        assert isinstance(activation_quantizer, tuple(Q.SUPPORT_ACT_QUANTIZERS))
-        assert isinstance(weight_quantizer, tuple(Q.SUPPORT_WT_QUANTIZERS))
+        assert isinstance(activation_quantizer, tuple(SUPPORT_ACT_QUANTIZERS))
+        assert isinstance(weight_quantizer, tuple(SUPPORT_WT_QUANTIZERS))
 
-        quant_config = Q.PTQConfig(
+        quant_config = PTQConfig(
             activation_quantizer=activation_quantizer,
             weight_quantizer=weight_quantizer)
 
-        self.ptq = Q.ImperativePTQ(quant_config=quant_config)
+        self.ptq = ImperativePTQ(quant_config=quant_config)
 
     def quantize(self, model, inplace=False, fuse=False, fuse_list=None):
         """

diff --git a/paddleslim/dygraph/quant/qat.py b/paddleslim/dygraph/quant/qat.py
@@ -203,7 +203,7 @@ def __init__(self,
 
         # TODO: remove try-except when the version is stable
         try:
-            self.imperative_qat = paddle.fluid.contrib.slim.quantization.ImperativeQuantAware(
+            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                 weight_bits=self.config['weight_bits'],
                 activation_bits=self.config['activation_bits'],
                 weight_quantize_type=self.config['weight_quantize_type'],
@@ -220,7 +220,7 @@ def __init__(self,
                 onnx_format=self.config['onnx_format'],  # support Paddle >= 2.4
             )
         except:
-            self.imperative_qat = paddle.fluid.contrib.slim.quantization.ImperativeQuantAware(
+            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                 weight_bits=self.config['weight_bits'],
                 activation_bits=self.config['activation_bits'],
                 weight_quantize_type=self.config['weight_quantize_type'],
@@ -291,7 +291,7 @@ def save_quantized_model(self, model, path, input_spec=None):
     def _remove_preprocess(self, model):
         state_dict = model.state_dict()
         try:
-            self.imperative_qat = paddle.fluid.contrib.slim.quantization.ImperativeQuantAware(
+            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                 weight_bits=self.config['weight_bits'],
                 activation_bits=self.config['activation_bits'],
                 weight_quantize_type=self.config['weight_quantize_type'],
@@ -302,7 +302,7 @@ def _remove_preprocess(self, model):
                 onnx_format=self.config['onnx_format'],  # support Paddle >= 2.4
             )
         except:
-            self.imperative_qat = paddle.fluid.contrib.slim.quantization.ImperativeQuantAware(
+            self.imperative_qat = paddle.quantization.ImperativeQuantAware(
                 weight_bits=self.config['weight_bits'],
                 activation_bits=self.config['activation_bits'],
                 weight_quantize_type=self.config['weight_quantize_type'],

diff --git a/paddleslim/quant/analysis_ptq.py b/paddleslim/quant/analysis_ptq.py
@@ -165,7 +165,7 @@ def save_csv(self, data, save_name, csv_columns):
         _logger.info('Activation Statistic is saved in {}'.format(save_path))
 
     def create_ptq(self, executor, skip_tensor_list):
-        return paddle.fluid.contrib.slim.quantization.PostTrainingQuantization(
+        return paddle.static.quantization.PostTrainingQuantization(
             executor=executor,
             data_loader=self.data_loader,
             model_dir=self.model_dir,
@@ -331,7 +331,7 @@ def metric_error_analyse(self):
     def collect_vars(self, scope, var_names):
         all_vars = {}
         for var_name in var_names:
-            var_tensor = paddle.fluid.contrib.slim.quantization.utils.load_variable_data(
+            var_tensor = paddle.static.quantization.utils.load_variable_data(
                 scope, var_name)
             all_vars[var_name] = var_tensor
         return all_vars
@@ -446,7 +446,7 @@ def get_weight_act_map(self, program, weight_names, persistable_var_names):
         for op_name in weight_names:
             for block_id in range(len(program.blocks)):
                 for op in program.blocks[block_id].ops:
-                    var_name_list = paddle.fluid.contrib.slim.quantization.utils._get_op_input_var_names(
+                    var_name_list = paddle.static.quantization.utils._get_op_input_var_names(
                         op)
                     if op_name in var_name_list:
                         for var_name in var_name_list:

diff --git a/paddleslim/quant/post_quant_hpo.py b/paddleslim/quant/post_quant_hpo.py
@@ -68,8 +68,8 @@ def __init__(self,
                  eval_function=None,
                  model_filename=None,
                  params_filename=None,
-                 save_model_filename='__model__',
-                 save_params_filename='__params__',
+                 save_model_filename='model.pdmodel',
+                 save_params_filename='model.pdiparams',
                  scope=None,
                  quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
                  is_full_quantize=False,
@@ -190,14 +190,14 @@ def eval_quant_model():
     quant_scope = paddle.static.Scope()
     with paddle.static.scope_guard(float_scope):
         [infer_prog_float, feed_target_names_float, fetch_targets_float] = \
-            paddle.fluid.io.load_inference_model(dirname=g_quant_config.float_infer_model_path, \
+            paddle.static.load_inference_model(path_prefix=g_quant_config.float_infer_model_path, \
             model_filename=g_quant_config.model_filename, \
             params_filename=g_quant_config.params_filename, \
             executor=g_quant_config.executor)
 
     with paddle.static.scope_guard(quant_scope):
         [infer_prog_quant, feed_target_names_quant, fetch_targets_quant] = \
-            paddle.fluid.io.load_inference_model(dirname=g_quant_model_cache_path, \
+            paddle.static.load_inference_model(path_prefix=g_quant_model_cache_path, \
             model_filename=g_quant_config.save_model_filename, \
             params_filename=g_quant_config.save_params_filename, \
             executor=g_quant_config.executor)
@@ -304,7 +304,7 @@ def quantize(cfg):
         quant_scope = paddle.static.Scope()
         with paddle.static.scope_guard(float_scope):
             [float_inference_program, feed_target_names, fetch_targets]= paddle.static.load_inference_model( \
-                    dirname=g_quant_config.float_infer_model_path, \
+                    path_prefix=g_quant_config.float_infer_model_path, \
                     model_filename=g_quant_config.model_filename, params_filename=g_quant_config.params_filename,
                     executor=g_quant_config.executor)
             float_metric = g_quant_config.eval_function(
@@ -313,7 +313,7 @@ def quantize(cfg):
 
         with paddle.static.scope_guard(quant_scope):
             [quant_inference_program, feed_target_names, fetch_targets] = paddle.static.load_inference_model( \
-                    dirname=g_quant_model_cache_path, \
+                    path_prefix=g_quant_model_cache_path, \
                     model_filename=g_quant_config.model_filename, params_filename=g_quant_config.params_filename,
                     executor=g_quant_config.executor)
             quant_metric = g_quant_config.eval_function(
@@ -344,8 +344,8 @@ def quant_post_hpo(
         eval_function=None,
         model_filename=None,
         params_filename=None,
-        save_model_filename='__model__',
-        save_params_filename='__params__',
+        save_model_filename='model,pdmodel',
+        save_params_filename='model.pdiparams',
         scope=None,
         quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"],
         is_full_quantize=False,
@@ -388,9 +388,8 @@ def quant_post_hpo(
                 When all parameters are saved in a single file, set it
                 as filename. If parameters are saved in separate files,
                 set it as 'None'. Default : 'None'.
-        save_model_filename(str): The name of model file to save the quantized inference program.  Default: '__model__'.
-        save_params_filename(str): The name of file to save all related parameters.
-                If it is set None, parameters will be saved in separate files. Default: '__params__'.
+        save_model_filename(str): The name of model file to save the quantized inference program.  Default: 'model.pdmodel'.
+        save_params_filename(str): The name of file to save all related parameters. Default: 'model.pdiparams'.
         scope(paddle.static.Scope, optional): The scope to run program, use it to load
                         and save variables. If scope is None, will use paddle.static.global_scope().
         quantizable_op_type(list[str], optional): The list of op types