Skip to content

Commit

Permalink
Revert "Add wrappers for synchronous GPUDirect Storage APIs (pytorch#…
Browse files Browse the repository at this point in the history
…130633)"

This reverts commit 709ddf7.

Reverted pytorch#130633 on behalf of https://github.com/clee2000 due to still failing internally D60265673 ([comment](pytorch#130633 (comment)))
  • Loading branch information
pytorchmergebot committed Jul 26, 2024
1 parent e4db5dc commit e191b83
Show file tree
Hide file tree
Showing 20 changed files with 1 addition and 391 deletions.
1 change: 0 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,6 @@ cc_library(
"@cuda//:nvrtc",
"@cudnn",
"@cudnn_frontend",
"@cuda//:cufile",
],
alwayslink = True,
)
Expand Down
9 changes: 0 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,6 @@ cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
"USE_CUDNN" OFF)
cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
# Binary builds will fail for cufile due to https://github.com/pytorch/builder/issues/1924
# Using TH_BINARY_BUILD to check whether is binary build.
# USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
if(DEFINED ENV{TH_BINARY_BUILD})
cmake_dependent_option(USE_CUFILE "Use cuFile" ON
"USE_CUDA AND NOT $ENV{TH_BINARY_BUILD} AND NOT WIN32" OFF)
else()
cmake_dependent_option(USE_CUFILE "Use cuFile" ON "USE_CUDA AND NOT WIN32" OFF)
endif()
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
option(USE_KINETO "Use Kineto profiling library" ON)
option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
Expand Down
1 change: 0 additions & 1 deletion build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,6 @@ libtorch_python_cuda_core_sources = [
"torch/csrc/cuda/shared/cudart.cpp",
"torch/csrc/cuda/shared/nvtx.cpp",
"torch/csrc/cuda/utils.cpp",
"torch/csrc/cuda/GdsFile.cpp",
]

libtorch_python_cuda_sources = libtorch_python_cuda_core_sources + [
Expand Down
4 changes: 0 additions & 4 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -928,10 +928,6 @@ elseif(USE_CUDA)
torch_compile_options(torch_cuda) # see cmake/public/utils.cmake
target_compile_definitions(torch_cuda PRIVATE USE_CUDA)

if(USE_CUFILE)
target_link_libraries(torch_cuda PRIVATE torch::cufile)
target_compile_definitions(torch_cuda PRIVATE USE_CUFILE)
endif()
if(USE_CUSPARSELT)
target_link_libraries(torch_cuda PRIVATE torch::cusparselt)
target_compile_definitions(torch_cuda PRIVATE USE_CUSPARSELT)
Expand Down
6 changes: 1 addition & 5 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ if(USE_CUDA)
set(CAFFE2_USE_CUDA ${USE_CUDA})
set(CAFFE2_USE_CUDNN ${USE_CUDNN})
set(CAFFE2_USE_CUSPARSELT ${USE_CUSPARSELT})
set(CAFFE2_USE_CUFILE ${USE_CUFILE})
set(CAFFE2_USE_NVRTC ${USE_NVRTC})
include(${CMAKE_CURRENT_LIST_DIR}/public/cuda.cmake)
if(CAFFE2_USE_CUDA)
Expand All @@ -61,9 +60,6 @@ if(USE_CUDA)
else()
caffe2_update_option(USE_CUSPARSELT OFF)
endif()
if(CAFFE2_USE_CUFILE)
list(APPEND Caffe2_CUDA_DEPENDENCY_LIBS torch::cufile)
endif()
find_program(SCCACHE_EXECUTABLE sccache)
if(SCCACHE_EXECUTABLE)
# Using RSP/--options-file renders output noncacheable by sccache
Expand All @@ -83,7 +79,6 @@ if(USE_CUDA)
set(CAFFE2_USE_CUDA OFF)
set(CAFFE2_USE_CUDNN OFF)
set(CAFFE2_USE_CUSPARSELT OFF)
set(CAFFE2_USE_CUFILE OFF)
set(CAFFE2_USE_NVRTC OFF)
endif()
endif()
Expand Down Expand Up @@ -1040,6 +1035,7 @@ if(USE_ROCM)
caffe2_update_option(USE_SYSTEM_NCCL ON)
endif()


list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_AMD__=1)
list(APPEND HIP_CXX_FLAGS -DCUDA_HAS_FP16=1)
Expand Down
8 changes: 0 additions & 8 deletions cmake/Modules/FindCUDAToolkit.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -978,14 +978,6 @@ if(CUDAToolkit_FOUND)
_CUDAToolkit_find_and_add_import_lib(cublas_static DEPS culibos)
endif()

if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 11.4)
_CUDAToolkit_find_and_add_import_lib(cuFile ALT cufile DEPS culibos)
_CUDAToolkit_find_and_add_import_lib(cuFile_static ALT cufile_static DEPS culibos)

_CUDAToolkit_find_and_add_import_lib(cuFile_rdma ALT cufile_rdma DEPS cuFile culibos)
_CUDAToolkit_find_and_add_import_lib(cuFile_rdma_static ALT cufile_rdma_static DEPS cuFile_static culibos)
endif()

# cuFFTW depends on cuFFT
_CUDAToolkit_find_and_add_import_lib(cufftw DEPS cufft)
_CUDAToolkit_find_and_add_import_lib(cufftw_static DEPS cufft_static)
Expand Down
4 changes: 0 additions & 4 deletions cmake/Summary.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ function(caffe2_print_configuration_summary)
message(STATUS " CUDA static link : ${CAFFE2_STATIC_LINK_CUDA}")
message(STATUS " USE_CUDNN : ${USE_CUDNN}")
message(STATUS " USE_CUSPARSELT : ${USE_CUSPARSELT}")
message(STATUS " USE_CUFILE : ${USE_CUFILE}")
message(STATUS " CUDA version : ${CUDA_VERSION}")
message(STATUS " USE_FLASH_ATTENTION : ${USE_FLASH_ATTENTION}")
message(STATUS " USE_MEM_EFF_ATTENTION : ${USE_MEM_EFF_ATTENTION}")
Expand All @@ -84,9 +83,6 @@ function(caffe2_print_configuration_summary)
if(${USE_CUSPARSELT})
message(STATUS " cuSPARSELt version : ${CUSPARSELT_VERSION}")
endif()
if(${USE_CUFILE})
message(STATUS " cufile library : ${CUDA_cuFile_LIBRARY}")
endif()
message(STATUS " CUDA root directory : ${CUDA_TOOLKIT_ROOT_DIR}")
message(STATUS " CUDA library : ${CUDA_cuda_driver_LIBRARY}")
message(STATUS " cudart library : ${CUDA_cudart_LIBRARY}")
Expand Down
16 changes: 0 additions & 16 deletions cmake/public/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -244,22 +244,6 @@ else()
message(STATUS "USE_CUSPARSELT is set to 0. Compiling without cuSPARSELt support")
endif()

# cufile
if(CAFFE2_USE_CUFILE)
add_library(torch::cufile INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile_static)
else()
set_property(
TARGET torch::cufile PROPERTY INTERFACE_LINK_LIBRARIES
CUDA::cuFile)
endif()
else()
message(STATUS "USE_CUFILE is set to 0. Compiling without cuFile support")
endif()

# curand
add_library(caffe2::curand INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA AND NOT WIN32)
Expand Down
1 change: 0 additions & 1 deletion docs/source/cuda.rst
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ See the :doc:`documentation <cuda._sanitizer>` for information on how to use it.
.. for tracking purposes
.. py:module:: torch.cuda.comm
.. py:module:: torch.cuda.error
.. py:module:: torch.cuda.gds
.. py:module:: torch.cuda.graphs
.. py:module:: torch.cuda.jiterator
.. py:module:: torch.cuda.memory
Expand Down
3 changes: 0 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@
# USE_CUSPARSELT=0
# disables the cuSPARSELt build
#
# USE_CUFILE=0
# disables the cuFile build
#
# USE_FBGEMM=0
# disables the FBGEMM build
#
Expand Down
46 changes: 0 additions & 46 deletions test/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
from itertools import product
from random import randint

import psutil

import torch
import torch.cuda
from torch import inf, nan
Expand Down Expand Up @@ -64,7 +62,6 @@
skipIfRocm,
slowTest,
subtest,
TemporaryFileName,
TEST_CUDA,
TEST_CUDA_GRAPH,
TEST_NUMPY,
Expand Down Expand Up @@ -4025,15 +4022,6 @@ def test_device_count_not_cached_pre_init(self):
x = torch.cuda.device_count()
self.assertEqual(f"{x}, 1", r)

def test_gds_fails_in_ci(self):
if IS_WINDOWS or TEST_WITH_ROCM:
error_msg = "is not supported on this platform"
else:
error_msg = "cuFileHandleRegister failed"
with TemporaryFileName() as f:
with self.assertRaisesRegex(RuntimeError, error_msg):
file = torch.cuda.gds._GdsFile(f, os.O_CREAT | os.O_RDWR)


@torch.testing._internal.common_utils.markDynamoStrictTest
class TestCudaMallocAsync(TestCase):
Expand Down Expand Up @@ -5181,40 +5169,6 @@ def test_graph_grad_scaling(self, device, dtype, optim_info, foreach, fused):
self.assertEqual(scaler._growth_tracker, growth_tracker)


class TestGDS(TestCase):
def _get_tmp_dir_fs_type(self):
my_path = os.path.realpath("/tmp")
root_type = ""
for part in psutil.disk_partitions():
if part.mountpoint == "/":
root_type = part.fstype
continue
if part.mountpoint == my_path:
return part.fstype
return root_type

@unittest.skipIf(IS_WINDOWS or TEST_WITH_ROCM, "Not supported on Windows or ROCm")
def test_gds_read_write_tensors(self):
if self._get_tmp_dir_fs_type() not in ("ext4", "xfs"):
self.skipTest("GPUDirect Storage requires ext4/xfs for local filesystem")
src1 = torch.randn(1024, device="cuda")
src2 = torch.randn(2, 1024, device="cuda")
torch.cuda.gds._gds_register_buffer(src1.untyped_storage())
torch.cuda.gds._gds_register_buffer(src2.untyped_storage())
dest1 = torch.empty(1024, device="cuda")
dest2 = torch.empty(2, 1024, device="cuda")
with TemporaryFileName() as f:
file = torch.cuda.gds._GdsFile(f, os.O_CREAT | os.O_RDWR)
file.save_storage(src1.untyped_storage(), offset=0)
file.save_storage(src2.untyped_storage(), offset=src1.nbytes)
file.load_storage(dest1.untyped_storage(), offset=0)
file.load_storage(dest2.untyped_storage(), offset=src1.nbytes)
self.assertEqual(src1, dest1)
self.assertEqual(src2, dest2)
torch.cuda.gds._gds_deregister_buffer(src1.untyped_storage())
torch.cuda.gds._gds_deregister_buffer(src2.untyped_storage())


instantiate_parametrized_tests(TestCuda)
instantiate_parametrized_tests(TestCudaMallocAsync)
instantiate_device_type_tests(TestCudaOptims, globals())
Expand Down
6 changes: 0 additions & 6 deletions third_party/cuda.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@ cc_library(
visibility = ["//visibility:public"],
)

cc_library(
name = "cufile",
srcs = ["targets/x86_64-linux/lib/libcufile.so"],
visibility = ["//visibility:public"],
)

cc_library(
name = "nvrtc",
srcs = [
Expand Down
4 changes: 0 additions & 4 deletions torch/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -312,10 +312,6 @@ if(USE_NUMPY)
target_compile_definitions(torch_python PRIVATE USE_NUMPY)
endif()

if(USE_CUFILE AND NOT USE_ROCM)
target_compile_definitions(torch_python PRIVATE USE_CUFILE)
endif()

if(HAVE_SOVERSION)
set_target_properties(torch_python PROPERTIES
VERSION ${TORCH_VERSION} SOVERSION ${TORCH_SOVERSION})
Expand Down
8 changes: 0 additions & 8 deletions torch/_C/__init__.pyi.in
Original file line number Diff line number Diff line change
Expand Up @@ -1975,14 +1975,6 @@ def _can_use_cudnn_attention(params: _SDPAParams, debug: _bool) -> _bool: ...
def _can_use_flash_attention(params: _SDPAParams, debug: _bool) -> _bool: ...
def _can_use_mem_efficient_attention(params: _SDPAParams, debug: _bool) -> _bool: ...

# Defined in torch/csrc/cuda/GdsFile.cpp
def _gds_register_buffer(t: Storage) -> None: ...
def _gds_deregister_buffer(t: Storage) -> None: ...
def _gds_register_handle(fd: _int) -> _int: ...
def _gds_deregister_handle(handle: _int) -> None: ...
def _gds_load_storage(handle: _int, s: Storage, offset: _int) -> None: ...
def _gds_save_storage(handle: _int, s: Storage, offset: _int) -> None: ...

# Defined in torch/csrc/cuda/python_comm.cpp
def _broadcast(tensor: Tensor, devices: List[_int]) -> List[Tensor]: ...
def _broadcast_out(tensor: Tensor, out_tensors: List[Tensor]) -> List[Tensor]: ...
Expand Down
1 change: 0 additions & 1 deletion torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ def _load_global_deps() -> None:
"cuda_runtime": "libcudart.so.*[0-9]",
"cuda_cupti": "libcupti.so.*[0-9]",
"cufft": "libcufft.so.*[0-9]",
"cufile": "libcufile.so.*[0-9]",
"curand": "libcurand.so.*[0-9]",
"nvjitlink": "libnvJitLink.so.*[0-9]",
"cusparse": "libcusparse.so.*[0-9]",
Expand Down
Loading

0 comments on commit e191b83

Please sign in to comment.