Always use nvrtc for compilation (openmm#4146)

* Always use nvrtc for compilation * Install nvrtc on CI * Workaround for compiler error * Set empty values for deprecated properties
erbad · Jul 20, 2023 · dd07fa7 · dd07fa7
1 parent 007ab83
commit dd07fa7
Show file tree

Hide file tree

Showing 23 changed files with 106 additions and 793 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -289,8 +289,6 @@ jobs:
           # With CUDA, we _expect_ CUDA plugins :)
           if [[ "${{ matrix.cuda-version }}" != "" ]]; then
             test -f ${CONDA_PREFIX}/lib/plugins/libOpenMMCUDA.$SHLIB
-            # TODO: Check with Peter why this is not there. Maybe we need an extra flag?
-            # test -f ${CONDA_PREFIX}/lib/plugins/libOpenMMCudaCompiler.$SHLIB
           fi
           # OpenCL should also be there for CUDA and, well, OpenCL
           if [[ "${{ matrix.cuda-version }}" != "" || ${{ matrix.OPENCL }} == true ]]; then
@@ -447,7 +445,6 @@ jobs:
           if not "${{ matrix.cuda-version }}" == "" (
             if not exist %CONDA_PREFIX%/Library/lib/plugins/OpenMMCUDA.lib exit 1
             if not exist %CONDA_PREFIX%/Library/lib/plugins/OpenMMOpenCL.lib exit 1
-            if not exist %CONDA_PREFIX%/Library/lib/plugins/OpenMMCudaCompiler.lib exit 1
           ) else (
             if not exist %CONDA_PREFIX%/Library/lib/plugins/OpenMMCPU.lib exit 1
             if not exist %CONDA_PREFIX%/Library/lib/plugins/OpenMMPME.lib exit 1

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -401,21 +401,6 @@ IF(OPENMM_BUILD_PME_PLUGIN)
    ADD_SUBDIRECTORY(plugins/cpupme)
 ENDIF(OPENMM_BUILD_PME_PLUGIN)
 
-# CUDA compiler plugin
-
-GET_FILENAME_COMPONENT(CUDA_LIB_DIR "${CUDA_cufft_LIBRARY}" PATH)
-FIND_LIBRARY(CUDA_nvrtc_LIBRARY nvrtc "${CUDA_LIB_DIR}")
-IF(CUDA_nvrtc_LIBRARY)
-    SET(OPENMM_BUILD_CUDA_COMPILER_PLUGIN ON CACHE BOOL "Build CUDA runtime compiler plugin")
-ELSE(CUDA_nvrtc_LIBRARY)
-    SET(OPENMM_BUILD_CUDA_COMPILER_PLUGIN OFF CACHE BOOL "Build CUDA runtime compiler plugin")
-ENDIF(CUDA_nvrtc_LIBRARY)
-SET(OPENMM_BUILD_CUDACOMPILER_PATH)
-IF(OPENMM_BUILD_CUDA_COMPILER_PLUGIN)
-   SET(OPENMM_BUILD_CUDACOMPILER_PATH ${CMAKE_CURRENT_SOURCE_DIR}/plugins/cudacompiler)
-   ADD_SUBDIRECTORY(plugins/cudacompiler)
-ENDIF(OPENMM_BUILD_CUDA_COMPILER_PLUGIN)
-
 IF(OPENMM_BUILD_SHARED_LIB)
     INSTALL_TARGETS(/lib RUNTIME_DIRECTORY /lib ${SHARED_TARGET})
 ENDIF(OPENMM_BUILD_SHARED_LIB)

diff --git a/devtools/ci/gh-actions/scripts/install_cuda.sh b/devtools/ci/gh-actions/scripts/install_cuda.sh
@@ -24,6 +24,7 @@ sudo apt-get install -y \
     cuda-drivers cuda-driver-dev-${CUDA_APT} \
     cuda-cudart-${CUDA_APT} cuda-cudart-dev-${CUDA_APT} \
     ${CUFFT}-${CUDA_APT} ${CUFFT}-dev-${CUDA_APT} \
+    cuda-nvrtc-${CUDA_APT} cuda-nvrtc-dev-${CUDA_APT} \
     cuda-nvprof-${CUDA_APT}
 sudo apt-get clean
 

diff --git a/docs-source/developerguide/07_cuda_platform.rst b/docs-source/developerguide/07_cuda_platform.rst
@@ -15,35 +15,22 @@ The CUDA platform is very similar to the OpenCL platform, and most of the
 previous chapter applies equally well to it, just changing “OpenCL” to “Cuda” in
 class names.  There are a few differences worth noting.
 
-Compiling Kernels
-*****************
+Caching Kernels
+***************
 
 Like the OpenCL platform, the CUDA platform compiles all its kernels at runtime.
-Unlike OpenCL, CUDA does not have built in support for runtime compilation.
-OpenMM therefore needs to implement this itself by writing the source code out
-to disk, invoking the nvcc compiler as a separate process, and then loading the
-compiled kernel in from disk.
-
-For the most part, you can ignore all of this.  Just call
-:code:`createModule()` on the CudaContext, passing it the CUDA source code.
-It takes care of the details of compilation and loading, returning a CUmodule
-object when it is done.  You can then call :code:`getKernel()` to look up
-individual kernels in the module (represented as CUfunction objects) and
-:code:`executeKernel()` to execute them.
-
-The CUDA platform does need two things to make this work: a directory on disk
-where it can write out temporary files, and the path to the nvcc compiler.
-These are specified by the “CudaTempDirectory” and “CudaCompiler” properties
-when you create a new Context.  It often can figure out suitable values for them
-on its own, but sometimes it needs help.  See the “Platform-Specific Properties”
-chapter of the User's Manual for details.
+To improve performance, it tries to cache the compiled kernels on disk for
+later use.  This allows subsequent Contexts to skip compiling some kernels.  To
+make this work, it needs a directory on disk where it can write out temporary
+files.  It is specified by the “CudaTempDirectory” property when you create a
+new Context.  It usually can figure out a suitable value on its own, but
+sometimes it needs help.  See the “Platform-Specific Properties” chapter of the
+User's Manual for details.
 
 Accumulating Forces
 *******************
 
 The OpenCL platform, as described in Section :numref:`computing-forces`\ , uses two types of buffers for
 accumulating forces: a set of floating point buffers, and a single fixed point
 buffer.  In contrast, the CUDA platform uses *only* the fixed point buffer
-(represented by the CUDA type :code:`long` :code:`long`\ ).  This means
-the CUDA platform only works on devices that support 64 bit atomic operations
-(compute capability 1.2 or higher).
+(represented by the CUDA type :code:`long` :code:`long`\ ).
diff --git a/docs-source/usersguide/library/04_platform_specifics.rst b/docs-source/usersguide/library/04_platform_specifics.rst
@@ -73,19 +73,6 @@ The CUDA Platform recognizes the following Platform-specific properties:
 * UseCpuPme: This selects whether to use the CPU-based PME implementation.
   The allowed values are “true” or “false”.  Depending on your hardware, this
   might (or might not) improve performance.
-* CudaCompiler: This specifies the path to the CUDA kernel compiler.  Versions
-  of CUDA before 7.0 require a separate compiler executable.  If you do
-  not specify this, OpenMM will try to locate the compiler itself.  Specify this
-  only when you want to override the default location.  The logic used to pick the
-  default location depends on the operating system:
-
-  * Mac/Linux: It first looks for an environment variable called
-    OPENMM_CUDA_COMPILER.  If that is set, its value is used.  Otherwise, the
-    default location is set to /usr/local/cuda/bin/nvcc.
-  * Windows: It looks for an environment variable called CUDA_BIN_PATH, then
-    appends \nvcc.exe to it.  That environment variable is set by the CUDA
-    installer, so it usually is present.
-
 * TempDirectory: This specifies a directory where temporary files can be
   written while compiling kernels.  OpenMM usually can locate your operating
   system’s temp directory automatically (for example, by looking for the TEMP

diff --git a/platforms/cuda/include/CudaContext.h b/platforms/cuda/include/CudaContext.h
@@ -9,7 +9,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2009-2021 Stanford University and the Authors.      *
+ * Portions copyright (c) 2009-2023 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -75,8 +75,7 @@ class OPENMM_EXPORT_COMMON CudaContext : public ComputeContext {
     static const int ThreadBlockSize;
     static const int TileSize;
     CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const std::string& precision,
-            const std::string& compiler, const std::string& tempDir, const std::string& hostCompiler, bool allowRuntimeCompiler,
-            CudaPlatform::PlatformData& platformData, CudaContext* originalContext);
+            const std::string& tempDir, CudaPlatform::PlatformData& platformData, CudaContext* originalContext);
     ~CudaContext();
     /**
      * This is called to initialize internal data structures after all Forces in the system
@@ -558,9 +557,9 @@ class OPENMM_EXPORT_COMMON CudaContext : public ComputeContext {
     int numAtomBlocks;
     int numThreadBlocks;
     int gpuArchitecture;
-    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasCompilerKernel, isNvccAvailable, hasAssignedPosqCharges;
+    bool useBlockingSync, useDoublePrecision, useMixedPrecision, contextIsValid, boxIsTriclinic, hasAssignedPosqCharges;
     bool isLinkedContext;
-    std::string compiler, tempDir, cacheDir;
+    std::string tempDir, cacheDir;
     float4 periodicBoxVecXFloat, periodicBoxVecYFloat, periodicBoxVecZFloat, periodicBoxSizeFloat, invPeriodicBoxSizeFloat;
     double4 periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, periodicBoxSize, invPeriodicBoxSize;
     std::string defaultOptimizationOptions;

diff --git a/platforms/cuda/include/CudaPlatform.h b/platforms/cuda/include/CudaPlatform.h
@@ -9,7 +9,7 @@
  * Biological Structures at Stanford, funded under the NIH Roadmap for        *
  * Medical Research, grant U54 GM072970. See https://simtk.org.               *
  *                                                                            *
- * Portions copyright (c) 2008-2021 Stanford University and the Authors.      *
+ * Portions copyright (c) 2008-2023 Stanford University and the Authors.      *
  * Authors: Peter Eastman                                                     *
  * Contributors:                                                              *
  *                                                                            *
@@ -91,14 +91,18 @@ class OPENMM_EXPORT_COMMON CudaPlatform : public Platform {
         return key;
     }
     /**
-     * This is the name of the parameter for specifying the path to the CUDA compiler.
+     * This property is ignored.  It exists only for backward compatibility.
+     * 
+     * @deprecated
      */
     static const std::string& CudaCompiler() {
         static const std::string key = "CudaCompiler";
         return key;
     }
     /**
-     * This is the name of the parameter for specifying the host compiler for the CUDA compiler to use.
+     * This property is ignored.  It exists only for backward compatibility.
+     * 
+     * @deprecated
      */
     static const std::string& CudaHostCompiler() {
         static const std::string key = "CudaHostCompiler";
@@ -130,15 +134,15 @@ class OPENMM_EXPORT_COMMON CudaPlatform : public Platform {
 class OPENMM_EXPORT_COMMON CudaPlatform::PlatformData {
 public:
     PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
-            const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty,
-            const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads, bool allowRuntimeCompiler, ContextImpl* originalContext);
+            const std::string& cpuPmeProperty, const std::string& tempProperty, const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty,
+            int numThreads, ContextImpl* originalContext);
     ~PlatformData();
     void initializeContexts(const System& system);
     void syncContexts();
     ContextImpl* context;
     std::vector<CudaContext*> contexts;
     std::vector<double> contextEnergy;
-    bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces, allowRuntimeCompiler;
+    bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
     int cmMotionFrequency, computeForceCount;
     long long stepCount;
     double time;

diff --git a/platforms/cuda/sharedTarget/CMakeLists.txt b/platforms/cuda/sharedTarget/CMakeLists.txt
@@ -13,7 +13,7 @@ SET_SOURCE_FILES_PROPERTIES(${KERNELS_CPP} ${KERNELS_H} ${COMMON_KERNELS_CPP} PR
 ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
 ADD_DEPENDENCIES(${SHARED_TARGET} CommonKernels)
 
-TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} CUDA::cuda_driver CUDA::cufft ${PTHREADS_LIB})
+TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${OPENMM_LIBRARY_NAME} CUDA::cuda_driver CUDA::cufft CUDA::nvrtc ${PTHREADS_LIB})
 SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_COMMON_BUILDING_SHARED_LIBRARY")
 IF (APPLE)
     SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA")