Skip to content

Commit

Permalink
Add support for CUDA 12.4 (pytorch#2565)
Browse files Browse the repository at this point in the history
Summary:
- Add support for CUDA 12.4

Pull Request resolved: pytorch#2565

Reviewed By: spcyppt

Differential Revision: D57027676

Pulled By: q10

fbshipit-source-id: e8b32e101c385fe6317ddc1ebc019cba7ae2bf20
  • Loading branch information
q10 authored and facebook-github-bot committed May 7, 2024
1 parent b83460f commit f2b1b50
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .github/scripts/fbgemm_gpu_install.bash
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ install_fbgemm_gpu_pip () {
echo "Usage: ${FUNCNAME[0]} ENV_NAME FBGEMM_GPU_CHANNEL[/VERSION] FBGEMM_GPU_VARIANT_TYPE[/VARIANT_VERSION]"
echo "Example(s):"
echo " ${FUNCNAME[0]} build_env 0.5.0 cpu # Install the CPU variant, specific version from release channel"
echo " ${FUNCNAME[0]} build_env release cuda 12.1.1 # Install the CUDA variant, latest version from release channel"
echo " ${FUNCNAME[0]} build_env test/0.6.0rc0 cuda 12.1.0 # Install the CUDA 12.1 variant, specific version from test channel"
echo " ${FUNCNAME[0]} build_env release cuda 12.4.1 # Install the CUDA variant, latest version from release channel"
echo " ${FUNCNAME[0]} build_env test/0.6.0rc0 cuda 12.4.1 # Install the CUDA 12.4 variant, specific version from test channel"
echo " ${FUNCNAME[0]} build_env nightly rocm 5.3 # Install the ROCM 5.3 variant, latest version from nightly channel"
return 1
else
Expand Down
1 change: 1 addition & 0 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ test_fbgemm_gpu_setup_and_pip_install () {
local variant_versions=(
11.8.0
12.1.1
12.4.1
)
elif [ "$variant_type" == "rocm" ]; then
local variant_versions=(
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/utils_cuda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ install_cudnn () {
["116"]="https://developer.download.nvidia.com/compute/redist/cudnn/v8.3.2/local_installers/11.5/cudnn-${PLATFORM_NAME_LC}-8.3.2.44_cuda11.5-archive.tar.xz"
["117"]="https://ossci-linux.s3.amazonaws.com/cudnn-${PLATFORM_NAME_LC}-8.5.0.96_cuda11-archive.tar.xz"
["118"]="https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-${PLATFORM_NAME_LC}-8.7.0.84_cuda11-archive.tar.xz"
["121"]="https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.8.1.3_cuda12-archive.tar.xz"
["121"]="https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz"
["124"]="https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz"
)

# Split version string by dot into array, i.e. 11.7.1 => [11, 7, 1]
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_ci_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
compiler: [ "gcc", "clang" ]

steps:
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:
# { arch: x86, instance: "linux.gcp.a100" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "12.1.1" ]
compiler: [ "gcc", "clang" ]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ jobs:
{ instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]

steps:
# Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/fbgemm_gpu_release_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ on:
description: CUDA Version to Use for Building Artifact
type: choice
required: false
options: [ "11.8.0", "12.1.1" ]
options: [ "11.8.0", "12.1.1", "12.4.1" ]
default: "12.1.1"
publish_to_pypi:
description: Publish Artifact to PyPI
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]

steps:
- name: Setup Build Container
Expand Down Expand Up @@ -139,7 +139,7 @@ jobs:
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
needs: build_artifact

steps:
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ if(NOT FBGEMM_CPU_ONLY)
if(NOT USE_ROCM)
# CUTLASS currently doesn't build on ROCm:
#
# /__w/FBGEMM/FBGEMM/fbgemm_gpu/../third_party/cutlass/include/cutlass/half.h:73:10: fatal error: 'cuda_fp16.h' file not found
# 2024-05-06T23:09:35.5730483Z /__w/FBGEMM/FBGEMM/fbgemm_gpu/../third_party/cutlass/include/cutlass/half.h:73:10: fatal error: 'cuda_fp16.h' file not found
# #include <cuda_fp16.h>
#
add_subdirectory(experimental/gen_ai)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
((defined(CUDA_VERSION) && CUDA_VERSION < 11000) || \
(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))))
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#elif (defined(USE_ROCM))
#include <hip/hip_bfloat16.h> // @manual
#include <hip/hip_bf16.h>
#include <hip/hip_fp16.h>
#endif

#ifndef USE_ROCM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,17 @@
defined(USE_ROCM) || \
((defined(CUDA_VERSION) && CUDA_VERSION < 11000) || \
(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))))
#include <cublasLt.h>
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#include <cuda/atomic>
#elif (defined(USE_ROCM))
#include <hip/hip_bfloat16.h>
#include <hip/hip_bf16.h>
#include <hip/hip_fp16.h>
#include <hipblaslt/hipblaslt.h>
#endif
#include <c10/core/ScalarType.h>
#include <c10/cuda/CUDAGuard.h>
#include <cublasLt.h>
#include <cutlass/core_io.h>
#include <cutlass/cutlass.h>
#include <cutlass/gemm/device/gemm.h>
Expand Down
4 changes: 3 additions & 1 deletion fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
((defined(CUDA_VERSION) && CUDA_VERSION < 11000) || \
(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))))
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#elif (defined(USE_ROCM))
#include <hip/hip_bfloat16.h>
#include <hip/hip_bf16.h>
#include <hip/hip_fp16.h>
#endif

#ifndef USE_ROCM
Expand Down

0 comments on commit f2b1b50

Please sign in to comment.