Skip to content

Commit

Permalink
[Paddle TensorRT] add pd_op.logical_not converter (PaddlePaddle#70267)
Browse files Browse the repository at this point in the history
* pd_op.logical_not

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* add optimization_level
  • Loading branch information
lizexu123 authored Dec 19, 2024
1 parent 5bda9d3 commit ab1043a
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 51 deletions.
23 changes: 3 additions & 20 deletions paddle/fluid/pir/transforms/tensorrt/trt_op_marker_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,8 @@ class ActOpPattern : public pir::OpRewritePattern<OpType> {
};
using TanhOpPattern = ActOpPattern<paddle::dialect::TanhOp>;
using CeluOpPattern = ActOpPattern<paddle::dialect::CeluOp>;
using LogicalNotOpPattern = ActOpPattern<paddle::dialect::LogicalNotOp>;
using LogicalNot_OpPattern = ActOpPattern<paddle::dialect::LogicalNot_Op>;

class Pool2dOpPattern
: public pir::OpRewritePattern<paddle::dialect::Pool2dOp> {
Expand Down Expand Up @@ -550,26 +552,6 @@ class SignOpPattern : public pir::OpRewritePattern<paddle::dialect::SignOp> {
}
};

class LogicalNotOpPattern
: public pir::OpRewritePattern<paddle::dialect::LogicalNotOp> {
public:
using pir::OpRewritePattern<paddle::dialect::LogicalNotOp>::OpRewritePattern;
bool MatchAndRewrite(paddle::dialect::LogicalNotOp op,
pir::PatternRewriter &rewriter) const override {
if (op->HasAttribute(kCanRunTrtAttr) &&
op->attribute<pir::BoolAttribute>(kCanRunTrtAttr).data()) {
return false;
}
#if IS_TRT_VERSION_LT(8400)
VLOG(3) << "logical_not op is only supported by tensorrt8.4 above because "
"of cast op ";
return false;
#endif
op->set_attribute(kCanRunTrtAttr, rewriter.bool_attr(true));
return true;
}
};

class GroupNormOpPattern
: public pir::OpRewritePattern<paddle::dialect::GroupNormOp> {
public:
Expand Down Expand Up @@ -2141,6 +2123,7 @@ class TrtOpMarkerPass : public pir::PatternRewritePass {
ps.Add(std::make_unique<ArangeOpPattern>(context));
ps.Add(std::make_unique<SignOpPattern>(context));
ps.Add(std::make_unique<LogicalNotOpPattern>(context));
ps.Add(std::make_unique<LogicalNot_OpPattern>(context));
ps.Add(std::make_unique<LogicalOrOpPattern>(context));
ps.Add(std::make_unique<LogicalOr_OpPattern>(context));
ps.Add(std::make_unique<LogicalAndOpPattern>(context));
Expand Down
13 changes: 7 additions & 6 deletions python/paddle/tensorrt/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,6 @@ def __init__(self, paddle_program, scope, trt_config=None):
param_dict.update({name: weight_array})
self.param_dict = param_dict

trt_manager = TensorRTConfigManager()
if self.trt_config is not None and self.trt_config.ops_run_float:
trt_manager.set_force_fp32_ops(self.trt_config.ops_run_float)
_logger.info(f"force_fp32_ops: {trt_manager.get_force_fp32_ops()}")

self.input_info = {}
self.trt_output_value_map = {}
self.engine_num = 0
Expand Down Expand Up @@ -129,6 +124,10 @@ def __is_output_value(value):
def convert_subgraph_to_trt(self, program, group_op):
from .export import PrecisionMode

trt_manager = TensorRTConfigManager(self.trt_config)
if self.trt_config is not None and self.trt_config.ops_run_float:
_logger.info(f"force_fp32_ops: {trt_manager.get_force_fp32_ops()}")

_logger.info(f"start process {group_op}")

operations = next(iter(group_op.blocks())).ops
Expand Down Expand Up @@ -390,7 +389,9 @@ def convert_subgraph_to_trt(self, program, group_op):
if version_list[0] > 8 or (
version_list[0] == 8 and version_list[1] >= 6
): # trt version >= 8.6
config.builder_optimization_level = 5
config.builder_optimization_level = (
self.trt_config.optimization_level
)
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)

if self.trt_config is not None:
Expand Down
41 changes: 41 additions & 0 deletions python/paddle/tensorrt/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import numpy as np
import tensorrt as trt

from paddle.tensorrt.util import TensorRTConfigManager

current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))
if parent_dir not in sys.path:
Expand Down Expand Up @@ -679,3 +681,42 @@ def squeeze_trt(network, input_tensor, axes):
reshape_layer = network.add_shuffle(input_tensor)
reshape_layer.set_input(1, new_shape_tensor)
return reshape_layer.get_output(0)


def unary_op_converter(network, paddle_op, inputs):
from paddle.tensorrt import PrecisionMode

input_tensor = inputs[0]
layer = None
org_type = input_tensor.dtype

trt_type_mapping = {
trt.DataType.INT8: trt.int8,
trt.DataType.INT32: trt.int32,
}

trt_manager = TensorRTConfigManager()
precision_mode = trt_manager.get_precision_mode()

need_cast = org_type in [trt.DataType.INT8, trt.DataType.INT32]
if need_cast:
identity_layer = network.add_identity(input_tensor)
if precision_mode == PrecisionMode.FP32:
identity_layer.set_output_type(0, trt.float32)
else:
identity_layer.set_output_type(0, trt.float16)
input_tensor = identity_layer.get_output(0)

if paddle_op.name() in ["pd_op.logical_not", "pd_op.logical_not_"]:
layer = network.add_unary(input_tensor, trt.UnaryOperation.NOT)
input_tensor = layer.get_output(0)
else:
raise NotImplementedError(
f"Unsupported unary operation: {paddle_op.name()}"
)
if need_cast:
restore_layer = network.add_identity(input_tensor)
restore_layer.set_output_type(0, trt_type_mapping[org_type])
input_tensor = restore_layer.get_output(0)

return input_tensor
6 changes: 5 additions & 1 deletion python/paddle/tensorrt/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def __init__(
disable_ops: str | list | None = None,
precision_mode: PrecisionMode = PrecisionMode.FP32,
ops_run_float: str | list | None = None,
optimization_level: int | None = 3,
) -> None:
"""
A class for configuring TensorRT optimizations.
Expand All @@ -192,7 +193,9 @@ def __init__(
- PrecisionMode.BFP16: 16-bit Brain Floating Point precision. Only supported in TensorRT versions greater than 9.0.
ops_run_float (str|list, optional):
A set of operation names that should be executed using FP32 precision regardless of the `tensorrt_precision_mode` setting.
The directory where the optimized model will be saved (default is None).
The directory where the optimized model will be saved (default is None).
optimization_level (int, optional):
Set TensorRT optimization level (default is 3). Only supported in TensorRT versions greater than 8.6.
Returns:
None
Expand Down Expand Up @@ -223,6 +226,7 @@ def __init__(
self.precision_mode = precision_mode
self.ops_run_float = ops_run_float
self.disable_ops = disable_ops
self.optimization_level = optimization_level
paddle.framework.set_flags(
{'FLAGS_trt_min_group_size': min_subgraph_size}
)
Expand Down
8 changes: 8 additions & 0 deletions python/paddle/tensorrt/impls/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from paddle.tensorrt.converter_utils import (
add_elementwise_layer,
unary_op_converter,
)
from paddle.tensorrt.register import converter_registry

Expand Down Expand Up @@ -52,3 +53,10 @@ def not_equal_converter(network, paddle_op, inputs):
not_layer = network.add_unary(layer_output, trt.UnaryOperation.NOT)
layer_output = not_layer.get_output(0)
return layer_output


@converter_registry.register("pd_op.logical_not", trt_version="8.x")
@converter_registry.register("pd_op.logical_not_", trt_version="8.x")
def logic_not_converter(network, paddle_op, inputs):
layer_output = unary_op_converter(network, paddle_op, inputs)
return layer_output
29 changes: 13 additions & 16 deletions python/paddle/tensorrt/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,32 +152,29 @@ def mark_builtin_op(program):
class TensorRTConfigManager:
_instance = None

def __new__(cls, *args, **kwargs):
def __new__(cls, trt_config=None):
if not cls._instance:
cls._instance = super().__new__(cls, *args, **kwargs)
cls._instance._init()
cls._instance = super().__new__(cls)
cls._instance.trt_config = trt_config
return cls._instance

def _init(self):
self.force_fp32_ops = []
def _init(self, trt_config=None):
self.trt_config = trt_config

def set_force_fp32_ops(self, ops):
if ops is None:
self.force_fp32_ops = []
elif isinstance(ops, str):
self.force_fp32_ops = [ops]
elif isinstance(ops, list):
self.force_fp32_ops = ops
else:
raise ValueError("Ops should be a string, list, or None.")
def get_precision_mode(self):
if self.trt_config and self.trt_config.precision_mode:
return self.trt_config.precision_mode
return None

def get_force_fp32_ops(self):
return self.force_fp32_ops
if self.trt_config and self.trt_config.ops_run_float:
return self.trt_config.ops_run_float
return []


# In TensorRT FP16 inference, this function sets the precision of specific
# operators to FP32, ensuring numerical accuracy for these operations.
def support_fp32_mix_precision(op_type, layer):
def support_fp32_mix_precision(op_type, layer, trt_config=None):
trt_manager = TensorRTConfigManager()
force_fp32_ops = trt_manager.get_force_fp32_ops()
if op_type in force_fp32_ops:
Expand Down
2 changes: 1 addition & 1 deletion test/tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ if(NOT WIN32 AND TENSORRT_FOUND)
set_tests_properties(test_converter_math PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_activation PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_others PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_manipulation PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_manipulation PROPERTIES TIMEOUT "600")
set_tests_properties(test_converter_creation PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_attribute PROPERTIES TIMEOUT "300")
set_tests_properties(test_converter_common PROPERTIES TIMEOUT "300")
Expand Down
13 changes: 7 additions & 6 deletions test/tensorrt/tensorrt_test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,14 @@ def check_trt_result(self, rtol=1e-5, atol=1e-5, precision_mode=None):

# run TRTConverter(would lower group_op into tensorrt_engine_op)
trt_config = None

input = Input(
min_input_shape=self.min_shape,
optim_input_shape=self.min_shape,
max_input_shape=self.max_shape,
)
trt_config = TensorRTConfig(inputs=[input])
if precision_mode == "fp16":
input = Input(
min_input_shape=self.min_shape,
optim_input_shape=self.min_shape,
max_input_shape=self.max_shape,
)
trt_config = TensorRTConfig(inputs=[input])
trt_config.precision_mode = PrecisionMode.FP16

converter = PaddleToTensorRTConverter(
Expand Down
26 changes: 26 additions & 0 deletions test/tensorrt/test_converter_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,32 @@ def test_trt_result(self):
self.check_marker(expected_result=False)


class TestLogicalNotTRTPattern(TensorRTBaseTest):
def setUp(self):
self.python_api = paddle.logical_not
self.api_args = {
"x": np.random.choice([True, False], size=(2, 3)).astype("bool"),
}
self.program_config = {"feed_list": ["x"]}
self.min_shape = {"x": [2, 3]}
self.max_shape = {"x": [2, 3]}

def test_trt_result(self):
self.check_trt_result()


class TestLogicalNotCase1TRTPattern(TensorRTBaseTest):
def setUp(self):
self.python_api = paddle.logical_not
self.api_args = {"x": np.random.random([2]).astype("bool")}
self.program_config = {"feed_list": ["x"]}
self.min_shape = {"x": [2]}
self.max_shape = {"x": [2]}

def test_trt_result(self):
self.check_trt_result()


class TestLogicalXorTRTPattern(TensorRTBaseTest):
def setUp(self):
self.python_api = paddle.logical_xor
Expand Down
3 changes: 2 additions & 1 deletion test/tensorrt/test_converter_model_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def test_paddle_to_tensorrt_conversion_dummy(self):
# Create a TensorRTConfig with inputs as a required field.
trt_config = TensorRTConfig(inputs=[input_config])
trt_config.precision_mode = PrecisionMode.FP16
trt_config.tensorrt_ops_run_float = "pd_op.add"
trt_config.ops_run_float = "pd_op.add"
trt_config.optimization_level = 5

output_var = program.list_vars()[-1]

Expand Down

0 comments on commit ab1043a

Please sign in to comment.