Skip to content

Commit

Permalink
[quant][core][gpu][improvement] Integrated quantized cudnn max pool2d…
Browse files Browse the repository at this point in the history
… with existing quantized_max_pool2d (pytorch#76129)

Summary:
Pull Request resolved: pytorch#76129

Previously, quantized_max_pool2d_cudnn was made available to the
frontend through torch.ops.quantized.max_pool2d.
We improve the integration by also making it available through
torch.max_pool2d, which is made possible by registering
quantized_max_pool2d_cudnn in native_functions.yaml under
quantized_max_pool2d, which is called in max_pool2d.

Ideally and ultimately, we will get rid of the quantized_max_pool2d
registration in native_functions.yaml, and directly register
quantized_max_pool2d and quantized_max_pool2d_cudnn under max_pool2d,
but current support for quantized dispatch keys blocks us from doing so.

Test Plan:
```
python test/run_tests.py
```

```
python test/run_tests.py
```

Differential Revision:
D35789078
D35789078

Reviewed By: jerryzh168

Pulled By: dzdang

fbshipit-source-id: 5d8220255bfab663b4779b5d3c66dea9f79d8ee7
(cherry picked from commit c27164d)
  • Loading branch information
dzdang authored and pytorchmergebot committed Apr 27, 2022
1 parent 6e959de commit 6e292f1
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ filegroup(
"aten/src/ATen/native/cudnn/*.cpp",
"aten/src/ATen/native/miopen/*.cpp",
"aten/src/ATen/native/nested/cuda/*.cpp",
"aten/src/ATen/native/quantized/cudnn/*.cpp",
"aten/src/ATen/native/sparse/cuda/*.cpp",
"aten/src/ATen/native/transformers/cuda/*.cpp",
"aten/src/THC/*.cpp",
Expand Down
3 changes: 2 additions & 1 deletion aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
file(GLOB native_utils_cpp "native/utils/*.cpp")

# XNNPACK
Expand Down Expand Up @@ -248,7 +249,7 @@ if(USE_ROCM)
list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} ${hip_hip} ${native_hip_hip} ${native_nested_hip_hip} ${native_sparse_hip_hip} ${native_quantized_hip_hip} ${native_transformers_hip_hip})
# TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${native_quantized_cudnn_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
set(all_hip_cpp ${native_miopen_cpp} ${native_cudnn_hip_cpp} ${miopen_cpp} ${all_hip_cpp})
endif()

Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3022,6 +3022,7 @@
- func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
dispatch:
QuantizedCPU: quantized_max_pool2d
QuantizedCUDA: quantized_max_pool2d_cudnn

- func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor

Expand Down
29 changes: 20 additions & 9 deletions aten/src/ATen/native/quantized/cudnn/Pooling.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
#include <c10/util/Exception.h>
#ifdef USE_CUDA
#include <ATen/cuda/CUDAConfig.h> // for the definition of AT_CUDNN_ENABLED

#if AT_CUDNN_ENABLED()

#include <ATen/native/cudnn/Macros.h>

#if HAS_CUDNN_V8()

#include <ATen/ATen.h>
#include <ATen/cuda/Exceptions.h>
#include <ATen/cudnn/Descriptors.h>
#include <ATen/cudnn/Handle.h>
#include <ATen/cudnn/Types.h>
#endif // AT_CUDNN_ENABLED
#endif // USE_CUDA

#include <ATen/ATen.h>
#include <ATen/native/Pool.h>
#include <ATen/native/TensorIterator.h>
#include <c10/core/ScalarType.h>
Expand Down Expand Up @@ -63,6 +63,9 @@ Tensor quantized_max_pool2d_cudnn(
IntArrayRef padding,
IntArrayRef dilation,
bool ceil_mode) {
#ifdef USE_CUDA
#if AT_CUDNN_ENABLED()
#if HAS_CUDNN_V8()
check_maxpool2d_params(
kernel_size,
stride,
Expand Down Expand Up @@ -179,6 +182,18 @@ Tensor quantized_max_pool2d_cudnn(

// recall we casted our input and output to 4D if qx was 3D, so we recast it back to 3D prior to returning
return (ndim == 3 ? qy.view(std::vector<int64_t>(output_shape.begin() + 1, output_shape.end())) : qy);
#else // HAS_CUDNN_V8()
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN v8 support");
return Tensor{}; // never reached, placates the compiler
#endif // HAS_CUDNN_V8()
#else // AT_CUDNN_ENABLED()
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN support");
return Tensor{}; // never reached, placates the compiler
#endif // AT_CUDNN_ENABLED()
#else // USE_CUDA
AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with USE_CUDA support");
return Tensor{}; // never reached, placates the compiler
#endif
}

// Keep the registry in the anonymous namespace.
Expand Down Expand Up @@ -206,7 +221,3 @@ TORCH_LIBRARY_IMPL(quantized, QuantizedCUDA, m) {
} // namespace
} // namespace native
} // namespace at

#endif // HAS_CUDNN_V8
#endif // AT_CUDNN_ENABLED
#endif // USE_CUDA
1 change: 1 addition & 0 deletions tools/amd_build/build_amd.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
"aten/src/ATen/cuda/*",
"aten/src/ATen/native/cuda/*",
"aten/src/ATen/native/cudnn/*",
"aten/src/ATen/native/quantized/cudnn/*",
"aten/src/ATen/native/nested/cuda/*",
"aten/src/ATen/native/sparse/cuda/*",
"aten/src/ATen/native/quantized/cuda/*",
Expand Down

0 comments on commit 6e292f1

Please sign in to comment.