Skip to content

Commit

Permalink
Move thnvrtc and DynamicLibrary to ATen (pytorch#22362)
Browse files Browse the repository at this point in the history
Summary:
Having the NVRTC stub in ATen is necessary to call driver APIs in ATen. This is currently blocking pytorch#22229.

`DynamicLibrary` is also moved as it is used in the stub code, and seems general enough.
Pull Request resolved: pytorch#22362

Differential Revision: D16131787

Pulled By: ezyang

fbshipit-source-id: add2ee8a8865229578aa00001a00d5a6671e0e73
  • Loading branch information
ssnl authored and facebook-github-bot committed Jul 9, 2019
1 parent 74883d4 commit 31d821e
Show file tree
Hide file tree
Showing 28 changed files with 356 additions and 270 deletions.
2 changes: 2 additions & 0 deletions aten/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ set(ATen_THIRD_PARTY_INCLUDE)
set(ATen_CUDA_SRCS)
set(ATen_CUDA_TEST_SRCS)
set(ATen_CUDA_INCLUDE)
set(ATen_NVRTC_STUB_SRCS)
set(ATen_HIP_SRCS)
set(ATen_HIP_TEST_SRCS)
set(ATen_HIP_INCLUDE)
Expand Down Expand Up @@ -101,6 +102,7 @@ add_subdirectory(src/ATen)
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
set(ATen_CUDA_TEST_SRCS ${ATen_CUDA_TEST_SRCS} PARENT_SCOPE)
set(ATen_HIP_TEST_SRCS ${ATen_HIP_TEST_SRCS} PARENT_SCOPE)
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/ATen.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <ATen/DeviceGuard.h>
#include <ATen/DimVector.h>
#include <ATen/Dispatch.h>
#include <ATen/DynamicLibrary.h>
#include <ATen/Formatting.h>
#include <ATen/Functions.h>
#ifdef BUILD_NAMEDTENSOR
Expand Down
9 changes: 9 additions & 0 deletions aten/src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,17 @@ FILE(GLOB base_cpp "*.cpp" "detail/*.cpp" "cpu/*.cpp")
add_subdirectory(core)
FILE(GLOB cuda_h "cuda/*.h" "cuda/detail/*.h" "cuda/*.cuh" "cuda/detail/*.cuh")
FILE(GLOB cuda_cpp "cuda/*.cpp" "cuda/detail/*.cpp")
FILE(GLOB cuda_nvrtc_stub_h "cuda/nvrtc_stub/*.h")
FILE(GLOB cuda_nvrtc_stub_cpp "cuda/nvrtc_stub/*.cpp")
FILE(GLOB cuda_cu "cuda/*.cu" "cuda/detail/*.cu")
FILE(GLOB cudnn_h "cudnn/*.h" "cudnn/*.cuh")
FILE(GLOB cudnn_cpp "cudnn/*.cpp")

FILE(GLOB hip_h "hip/*.h" "hip/detail/*.h" "hip/*.cuh" "hip/detail/*.cuh")
FILE(GLOB hip_cpp "hip/*.cpp" "hip/detail/*.cpp" "hip/impl/*.cpp")
FILE(GLOB hip_hip "hip/*.hip" "hip/detail/*.hip" "hip/impl/*.hip")
FILE(GLOB hip_nvrtc_stub_h "hip/nvrtc_stub/*.h")
FILE(GLOB hip_nvrtc_stub_cpp "hip/nvrtc_stub/*.cpp")
FILE(GLOB miopen_h "miopen/*.h")
FILE(GLOB miopen_cpp "miopen/*.cpp")

Expand Down Expand Up @@ -356,6 +360,7 @@ endif()

if(USE_CUDA)
set(ATen_CUDA_SRCS ${all_cuda_cpp})
set(ATen_NVRTC_STUB_SRCS ${cuda_nvrtc_stub_cpp})
if(AT_LINK_STYLE STREQUAL "INTERFACE")
# Source code can't be added to an interface library, so it is
# passed back to be compiled into the containing library
Expand All @@ -368,6 +373,9 @@ endif()

if(USE_ROCM)
set(ATen_HIP_SRCS ${all_hip_cpp})
# caffe2_nvrtc's stubs to driver APIs are useful for HIP.
# See NOTE [ ATen NVRTC Stub and HIP ]
set(ATen_NVRTC_STUB_SRCS ${hip_nvrtc_stub_cpp})
if(AT_LINK_STYLE STREQUAL "INTERFACE")
# Source code can't be added to an interface library, so it is
# passed back to be compiled into the containing library
Expand Down Expand Up @@ -439,6 +447,7 @@ endif()
set(ATen_CORE_SRCS ${ATen_CORE_SRCS} PARENT_SCOPE)
set(ATen_CPU_SRCS ${ATen_CPU_SRCS} PARENT_SCOPE)
set(ATen_CUDA_SRCS ${ATen_CUDA_SRCS} PARENT_SCOPE)
set(ATen_NVRTC_STUB_SRCS ${ATen_NVRTC_STUB_SRCS} PARENT_SCOPE)
set(ATen_HIP_SRCS ${ATen_HIP_SRCS} PARENT_SCOPE)
set(ATen_QUANTIZED_SRCS ${ATen_QUANTIZED_SRCS} PARENT_SCOPE)
set(ATen_CPU_TEST_SRCS ${ATen_CPU_TEST_SRCS} PARENT_SCOPE)
Expand Down
4 changes: 3 additions & 1 deletion aten/src/ATen/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ class CAFFE2_API Context {
});
return thh_state.get();
}

const at::cuda::NVRTC& getNVRTC() {
return detail::getCUDAHooks().nvrtc();
}
THCState* getTHCState() {
// AT_ASSERT(thc_state);
return thc_state.get();
Expand Down
74 changes: 74 additions & 0 deletions aten/src/ATen/DynamicLibrary.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include <c10/util/Exception.h>
#include <ATen/DynamicLibrary.h>
#include <ATen/Utils.h>

#ifndef _WIN32
#include <dlfcn.h>
#include <libgen.h>
#else
#include <Windows.h>
#endif

namespace at {


#ifndef _WIN32

// Unix

static void* checkDL(void* x) {
if (!x) {
AT_ERROR("Error in dlopen or dlsym: ", dlerror());
}

return x;
}
DynamicLibrary::DynamicLibrary(const char* name) {
// NOLINTNEXTLINE(hicpp-signed-bitwise)
handle = checkDL(dlopen(name, RTLD_LOCAL | RTLD_NOW));
}

void* DynamicLibrary::sym(const char* name) {
AT_ASSERT(handle);
return checkDL(dlsym(handle, name));
}

DynamicLibrary::~DynamicLibrary() {
if (!handle)
return;
dlclose(handle);
}

#else

// Windows

DynamicLibrary::DynamicLibrary(const char* name) {
// NOLINTNEXTLINE(hicpp-signed-bitwise)
HMODULE theModule = LoadLibraryA(name);
if (theModule) {
handle = theModule;
} else {
AT_ERROR("error in LoadLibraryA");
}
}

void* DynamicLibrary::sym(const char* name) {
AT_ASSERT(handle);
FARPROC procAddress = GetProcAddress((HMODULE)handle, name);
if (!procAddress) {
AT_ERROR("error in GetProcAddress");
}
return (void*)procAddress;
}

DynamicLibrary::~DynamicLibrary() {
if (!handle) {
return;
}
FreeLibrary((HMODULE)handle);
}

#endif

} // namespace at
21 changes: 21 additions & 0 deletions aten/src/ATen/DynamicLibrary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <c10/macros/Export.h>
#include <ATen/Utils.h>

namespace at {

struct DynamicLibrary {
AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary);

CAFFE2_API DynamicLibrary(const char* name);

CAFFE2_API void* sym(const char* name);

CAFFE2_API ~DynamicLibrary();

private:
void* handle = nullptr;
};

} // namespace at
4 changes: 4 additions & 0 deletions aten/src/ATen/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
#define __ubsan_ignore_vptr__
#endif

#define AT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&) = delete; \
void operator=(const TypeName&) = delete

namespace at {

CAFFE2_API int _crash_if_asan(int);
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/cuda/CUDAContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include <ATen/core/ATenGeneral.h>
#include <ATen/Context.h>
#include <c10/cuda/CUDAStream.h>
#include <ATen/cuda/Exceptions.h>
#include <c10/cuda/CUDAFunctions.h>
#include <ATen/cuda/Exceptions.h>

#include <cstdint>

Expand Down
55 changes: 55 additions & 0 deletions aten/src/ATen/cuda/Exceptions.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <ATen/Context.h>
#include <c10/util/Exception.h>
#include <c10/cuda/CUDAException.h>

Expand All @@ -20,3 +21,57 @@
} while (0)

#define AT_CUDA_CHECK(EXPR) C10_CUDA_CHECK(EXPR)

// For CUDA Driver API
//
// This is here instead of in c10 because NVRTC is loaded dynamically via a stub
// in ATen, and we need to use its nvrtcGetErrorString.
// See NOTE [ USE OF NVRTC AND DRIVER API ].
#ifndef __HIP_PLATFORM_HCC__

#define AT_CUDA_DRIVER_CHECK(EXPR) \
do { \
CUresult __err = EXPR; \
if (__err != CUDA_SUCCESS) { \
const char* err_str; \
CUresult get_error_str_err C10_UNUSED = at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \
if (get_error_str_err != CUDA_SUCCESS) { \
AT_ERROR("CUDA driver error: unknown error"); \
} else { \
AT_ERROR("CUDA driver error: ", err_str); \
} \
} \
} while (0)

#else

#define AT_CUDA_DRIVER_CHECK(EXPR) \
do { \
CUresult __err = EXPR; \
if (__err != CUDA_SUCCESS) { \
AT_ERROR("CUDA driver error: ", static_cast<int>(__err)); \
} \
} while (0)

#endif

// For CUDA NVRTC
//
// Note: As of CUDA 10, nvrtc error code 7, NVRTC_ERROR_BUILTIN_OPERATION_FAILURE,
// incorrectly produces the error string "NVRTC unknown error."
// The following maps it correctly.
//
// This is here instead of in c10 because NVRTC is loaded dynamically via a stub
// in ATen, and we need to use its nvrtcGetErrorString.
// See NOTE [ USE OF NVRTC AND DRIVER API ].
#define AT_CUDA_NVRTC_CHECK(EXPR) \
do { \
nvrtcResult __err = EXPR; \
if (__err != NVRTC_SUCCESS) { \
if (static_cast<int>(__err) != 7) { \
AT_ERROR("CUDA NVRTC error: ", at::globalContext().getNVRTC().nvrtcGetErrorString(__err)); \
} else { \
AT_ERROR("CUDA NVRTC error: NVRTC_ERROR_BUILTIN_OPERATION_FAILURE"); \
} \
} \
} while (0)
28 changes: 28 additions & 0 deletions aten/src/ATen/cuda/detail/CUDAHooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

#include <ATen/CUDAGenerator.h>
#include <ATen/Context.h>
#include <ATen/DynamicLibrary.h>
#include <ATen/cuda/CUDAConfig.h>
#include <ATen/cuda/CUDADevice.h>
#include <ATen/cuda/PinnedMemoryAllocator.h>
#include <ATen/cuda/nvrtc_stub/ATenNVRTC.h>
#include <ATen/detail/CUDAHooksInterface.h>
#include <ATen/native/cuda/CuFFTPlanCache.h>
#include <c10/util/Exception.h>
Expand Down Expand Up @@ -77,6 +79,32 @@ bool CUDAHooks::hasCuDNN() const {
return AT_CUDNN_ENABLED();
}

#ifdef USE_DIRECT_NVRTC
static std::pair<std::unique_ptr<at::DynamicLibrary>, at::cuda::NVRTC*> load_nvrtc() {
return std::make_pair(nullptr, at::cuda::load_nvrtc());
}
#else
static std::pair<std::unique_ptr<at::DynamicLibrary>, at::cuda::NVRTC*> load_nvrtc() {
#if defined(_WIN32)
std::string libcaffe2_nvrtc = "caffe2_nvrtc.dll";
#elif defined(__APPLE__)
std::string libcaffe2_nvrtc = "libcaffe2_nvrtc.dylib";
#else
std::string libcaffe2_nvrtc = "libcaffe2_nvrtc.so";
#endif
std::unique_ptr<at::DynamicLibrary> libnvrtc_stub(
new at::DynamicLibrary(libcaffe2_nvrtc.c_str()));
auto fn = (at::cuda::NVRTC * (*)()) libnvrtc_stub->sym("load_nvrtc");
return std::make_pair(std::move(libnvrtc_stub), fn());
}
#endif

const at::cuda::NVRTC& CUDAHooks::nvrtc() const {
// must hold onto DynamicLibrary otherwise it will unload
static auto handle = load_nvrtc();
return *handle.second;
}

int64_t CUDAHooks::current_device() const {
int device;
cudaError_t err = cudaGetDevice(&device);
Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/cuda/detail/CUDAHooks.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct CUDAHooks : public at::CUDAHooksInterface {
bool hasCUDA() const override;
bool hasMAGMA() const override;
bool hasCuDNN() const override;
const at::cuda::NVRTC& nvrtc() const override;
int64_t current_device() const override;
Allocator* getPinnedMemoryAllocator() const override;
bool compiledWithCuDNN() const override;
Expand Down
13 changes: 13 additions & 0 deletions aten/src/ATen/cuda/nvrtc_stub/ATenNVRTC.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include <ATen/cuda/nvrtc_stub/ATenNVRTC.h>
#include <iostream>

namespace at { namespace cuda {

NVRTC* load_nvrtc() {
auto self = new NVRTC();
#define CREATE_ASSIGN(name) self->name = name;
AT_FORALL_NVRTC(CREATE_ASSIGN)
return self;
}

}} // at::cuda
Loading

0 comments on commit 31d821e

Please sign in to comment.