[CUDA] Add CUDA11 support (dmlc#2308)

* add support for cuda 11 * fix inc bug in pytorch 1.8 * poke ci * fix * small fix * try fix * try fix Co-authored-by: Jinjing Zhou <[email protected]> Co-authored-by: Quan (Andy) Gan <[email protected]>
dragomirradev · Nov 7, 2020 · 4fb0241 · 4fb0241
1 parent 6eda605
commit 4fb0241
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 9 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -20,3 +20,6 @@
 [submodule "third_party/phmap"]
 	path = third_party/phmap
 	url = https://github.com/greg7mdp/parallel-hashmap.git
+[submodule "third_party/thrust"]
+	path = third_party/thrust
+	url = https://github.com/NVIDIA/thrust.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -38,6 +38,17 @@ if(USE_CUDA)
   message(STATUS "Build with CUDA support")
   project(dgl C CXX)
   include(cmake/modules/CUDA.cmake)
+  if ((CUDA_VERSION_MAJOR LESS 11) OR
+      ((CUDA_VERSION_MAJOR EQUAL 11) AND (CUDA_VERSION_MINOR EQUAL 0)))
+    # For cuda<11, use external CUB/Thrust library because CUB is not part of CUDA.
+    # For cuda==11.0, use external CUB/Thrust library because there is a bug in the
+    #   official CUB library which causes invalid device ordinal error for DGL. The bug
+    #   is fixed by https://github.com/NVIDIA/cub/commit/9143e47e048641aa0e6ddfd645bcd54ff1059939
+    #   in 11.1.
+    message(STATUS "Detected CUDA of version ${CUDA_VERSION}. Use external CUB/Thrust library.")
+    list(INSERT CUDA_INCLUDE_DIRS 0 "${CMAKE_SOURCE_DIR}/third_party/thrust")
+    list(INSERT CUDA_INCLUDE_DIRS 0 "${CMAKE_SOURCE_DIR}/third_party/cub")
+  endif()
 endif(USE_CUDA)
 
 # include directories
@@ -47,7 +58,6 @@ include_directories("third_party/METIS/include/")
 include_directories("third_party/dmlc-core/include")
 include_directories("third_party/minigun/minigun")
 include_directories("third_party/minigun/third_party/moderngpu/src")
-include_directories("third_party/cub/")
 include_directories("third_party/phmap/")
 
 # initial variables
@@ -79,9 +89,9 @@ if(MSVC)
   endif()
 else(MSVC)
   include(CheckCXXCompilerFlag)
-  check_cxx_compiler_flag("-std=c++11"    SUPPORT_CXX11)
+  check_cxx_compiler_flag("-std=c++14"    SUPPORT_CXX14)
   set(CMAKE_C_FLAGS "-O2 -Wall -fPIC ${CMAKE_C_FLAGS}")
-  set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -std=c++11 ${CMAKE_CXX_FLAGS}")
+  set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -std=c++14 ${CMAKE_CXX_FLAGS}")
 endif(MSVC)
 
 if(USE_OPENMP)

diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
@@ -8,7 +8,7 @@ endif()
 ###### Borrowed from MSHADOW project
 
 include(CheckCXXCompilerFlag)
-check_cxx_compiler_flag("-std=c++11"   SUPPORT_CXX11)
+check_cxx_compiler_flag("-std=c++14"   SUPPORT_CXX14)
 
 set(dgl_known_gpu_archs "35 50 60 70")
 
@@ -176,7 +176,7 @@ macro(dgl_cuda_compile objlist_variable)
 
   endforeach()
   if(UNIX OR APPLE)
-    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC)
+    list(APPEND CUDA_NVCC_FLAGS -Xcompiler -fPIC --std=c++14)
   endif()
 
   if(APPLE)
@@ -246,6 +246,8 @@ macro(dgl_config_cuda out_variable)
   set(NVCC_FLAGS_EXTRA "${NVCC_FLAGS_EXTRA} --expt-extended-lambda")
   # suppress deprecated warning in moderngpu
   set(NVCC_FLAGS_EXTRA "${NVCC_FLAGS_EXTRA} -Wno-deprecated-declarations")
+  # for compile with c++14
+  set(NVCC_FLAGS_EXTRA "${NVCC_FLAGS_EXTRA} --expt-extended-lambda --std=c++14")
   message(STATUS "NVCC extra flags: ${NVCC_FLAGS_EXTRA}")
   set(CUDA_NVCC_FLAGS  "${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}")
   list(APPEND CMAKE_CUDA_FLAGS "${NVCC_FLAGS_EXTRA}")

diff --git a/python/dgl/heterograph_index.py b/python/dgl/heterograph_index.py
@@ -750,15 +750,15 @@ def incidence_matrix(self, etype, typestr, ctx):
             n = self.number_of_nodes(dsttype)
             row = F.unsqueeze(dst, 0)
             col = F.unsqueeze(eid, 0)
-            idx = F.cat([row, col], dim=0)
+            idx = F.copy_to(F.cat([row, col], dim=0), ctx)
             # FIXME(minjie): data type
             dat = F.ones((m,), dtype=F.float32, ctx=ctx)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
         elif typestr == 'out':
             n = self.number_of_nodes(srctype)
             row = F.unsqueeze(src, 0)
             col = F.unsqueeze(eid, 0)
-            idx = F.cat([row, col], dim=0)
+            idx = F.copy_to(F.cat([row, col], dim=0), ctx)
             # FIXME(minjie): data type
             dat = F.ones((m,), dtype=F.float32, ctx=ctx)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
@@ -775,7 +775,7 @@ def incidence_matrix(self, etype, typestr, ctx):
             # create index
             row = F.unsqueeze(F.cat([src, dst], dim=0), 0)
             col = F.unsqueeze(F.cat([eid, eid], dim=0), 0)
-            idx = F.cat([row, col], dim=0)
+            idx = F.copy_to(F.cat([row, col], dim=0), ctx)
             # FIXME(minjie): data type
             x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx)
             y = F.ones((n_entries,), dtype=F.float32, ctx=ctx)

diff --git a/third_party/cub b/third_party/cub
diff --git a/third_party/thrust b/third_party/thrust