[quant][core][gpu][improvement] Integrated quantized cudnn max pool2d…

… with existing quantized_max_pool2d (pytorch#76129) Summary: Pull Request resolved: pytorch#76129 Previously, quantized_max_pool2d_cudnn was made available to the frontend through torch.ops.quantized.max_pool2d. We improve the integration by also making it available through torch.max_pool2d, which is made possible by registering quantized_max_pool2d_cudnn in native_functions.yaml under quantized_max_pool2d, which is called in max_pool2d. Ideally and ultimately, we will get rid of the quantized_max_pool2d registration in native_functions.yaml, and directly register quantized_max_pool2d and quantized_max_pool2d_cudnn under max_pool2d, but current support for quantized dispatch keys blocks us from doing so. Test Plan: ``` python test/run_tests.py ``` ``` python test/run_tests.py ``` Differential Revision: D35789078 D35789078 Reviewed By: jerryzh168 Pulled By: dzdang fbshipit-source-id: 5d8220255bfab663b4779b5d3c66dea9f79d8ee7 (cherry picked from commit c27164d)
FZenjoys · Apr 27, 2022 · 6e292f1 · 6e292f1
1 parent 6e959de
commit 6e292f1
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 10 deletions.
diff --git a/BUILD.bazel b/BUILD.bazel
@@ -282,6 +282,7 @@ filegroup(
             "aten/src/ATen/native/cudnn/*.cpp",
             "aten/src/ATen/native/miopen/*.cpp",
             "aten/src/ATen/native/nested/cuda/*.cpp",
+            "aten/src/ATen/native/quantized/cudnn/*.cpp",
             "aten/src/ATen/native/sparse/cuda/*.cpp",
             "aten/src/ATen/native/transformers/cuda/*.cpp",
             "aten/src/THC/*.cpp",

diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -145,6 +145,7 @@ file(GLOB native_quantized_hip_hip "native/quantized/hip/*.hip")
 file(GLOB native_quantized_hip_cpp "native/quantized/hip/*.cpp")
 file(GLOB native_transformers_hip_hip "native/transformers/hip/*.hip")
 file(GLOB native_transformers_hip_cpp "native/transformers/hip/*.cpp")
+file(GLOB native_quantized_cudnn_hip_cpp "native/quantized/cudnn/hip/*.cpp")
 file(GLOB native_utils_cpp "native/utils/*.cpp")
 
 # XNNPACK
@@ -248,7 +249,7 @@ if(USE_ROCM)
   list(APPEND ATen_HIP_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/hip)
   set(ATen_HIP_SRCS ${ATen_HIP_SRCS} ${hip_hip} ${native_hip_hip} ${native_nested_hip_hip} ${native_sparse_hip_hip} ${native_quantized_hip_hip} ${native_transformers_hip_hip})
   # TODO: Codegen separate files for HIP and use those (s/cuda_generated_sources/hip_generated_sources)
-  set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp}  ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
+  set(all_hip_cpp ${native_nested_hip_cpp} ${native_sparse_hip_cpp} ${native_quantized_hip_cpp} ${native_transformers_hip_cpp} ${native_quantized_cudnn_hip_cpp} ${hip_cpp} ${native_hip_cpp} ${native_hip_linalg_cpp} ${cuda_generated_sources} ${ATen_HIP_SRCS})
   set(all_hip_cpp ${native_miopen_cpp} ${native_cudnn_hip_cpp} ${miopen_cpp} ${all_hip_cpp})
 endif()
 

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -3022,6 +3022,7 @@
 - func: quantized_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
   dispatch:
     QuantizedCPU: quantized_max_pool2d
+    QuantizedCUDA: quantized_max_pool2d_cudnn
 
 - func: max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
 

diff --git a/aten/src/ATen/native/quantized/cudnn/Pooling.cpp b/aten/src/ATen/native/quantized/cudnn/Pooling.cpp
@@ -1,17 +1,17 @@
+#include <c10/util/Exception.h>
 #ifdef USE_CUDA
 #include <ATen/cuda/CUDAConfig.h>  // for the definition of AT_CUDNN_ENABLED
 
 #if AT_CUDNN_ENABLED()
-
 #include <ATen/native/cudnn/Macros.h>
-
-#if HAS_CUDNN_V8()
-
-#include <ATen/ATen.h>
 #include <ATen/cuda/Exceptions.h>
 #include <ATen/cudnn/Descriptors.h>
 #include <ATen/cudnn/Handle.h>
 #include <ATen/cudnn/Types.h>
+#endif // AT_CUDNN_ENABLED
+#endif // USE_CUDA
+
+#include <ATen/ATen.h>
 #include <ATen/native/Pool.h>
 #include <ATen/native/TensorIterator.h>
 #include <c10/core/ScalarType.h>
@@ -63,6 +63,9 @@ Tensor quantized_max_pool2d_cudnn(
     IntArrayRef padding,
     IntArrayRef dilation,
     bool ceil_mode) {
+#ifdef USE_CUDA
+#if AT_CUDNN_ENABLED()
+#if HAS_CUDNN_V8()
   check_maxpool2d_params(
       kernel_size,
       stride,
@@ -179,6 +182,18 @@ Tensor quantized_max_pool2d_cudnn(
 
   // recall we casted our input and output to 4D if qx was 3D, so we recast it back to 3D prior to returning
   return (ndim == 3 ? qy.view(std::vector<int64_t>(output_shape.begin() + 1, output_shape.end())) : qy);
+#else // HAS_CUDNN_V8()
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN v8 support");
+  return Tensor{}; // never reached, placates the compiler
+#endif // HAS_CUDNN_V8()
+#else // AT_CUDNN_ENABLED()
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with cuDNN support");
+  return Tensor{}; // never reached, placates the compiler
+#endif // AT_CUDNN_ENABLED()
+#else // USE_CUDA
+  AT_ERROR("at::native::quantized_max_pool2d_cudnn: ATen not compiled with USE_CUDA support");
+  return Tensor{}; // never reached, placates the compiler
+#endif
 }
 
 // Keep the registry in the anonymous namespace.
@@ -206,7 +221,3 @@ TORCH_LIBRARY_IMPL(quantized, QuantizedCUDA, m) {
 } // namespace
 } // namespace native
 } // namespace at
-
-#endif  // HAS_CUDNN_V8
-#endif  // AT_CUDNN_ENABLED
-#endif  // USE_CUDA
diff --git a/tools/amd_build/build_amd.py b/tools/amd_build/build_amd.py
@@ -85,6 +85,7 @@
     "aten/src/ATen/cuda/*",
     "aten/src/ATen/native/cuda/*",
     "aten/src/ATen/native/cudnn/*",
+    "aten/src/ATen/native/quantized/cudnn/*",
     "aten/src/ATen/native/nested/cuda/*",
     "aten/src/ATen/native/sparse/cuda/*",
     "aten/src/ATen/native/quantized/cuda/*",