cuda : implement ggml_cuda_available_devices

nomic-ai · dependabot · Nov 24, 2023 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
commit 09058b1847d5a024cc7c56809cea3fd66b4d3dd6
diff --git a/ggml/include/ggml-cuda.h b/ggml/include/ggml-cuda.h
@@ -3,6 +3,8 @@
 #include "ggml.h"
 #include "ggml-backend.h"
 
+#include <stddef.h>
+
 #ifdef GGML_USE_HIPBLAS
 #define GGML_CUDA_NAME "ROCm"
 #define GGML_CUBLAS_NAME "hipBLAS"
@@ -11,11 +13,20 @@
 #define GGML_CUBLAS_NAME "cuBLAS"
 #endif
 
+#define GGML_CUDA_MAX_DEVICES 16
+
 #ifdef  __cplusplus
 extern "C" {
 #endif
 
-#define GGML_CUDA_MAX_DEVICES       16
+struct ggml_cuda_device {
+    uint32_t     index;
+    uint64_t     heapSize;
+    const char * name;
+};
+
+GGML_API GGML_CALL struct ggml_cuda_device * ggml_cuda_available_devices(size_t * count);
+GGML_API GGML_CALL void                      ggml_cuda_device_destroy(ggml_cuda_device * device);
 
 // backend API
 GGML_API GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device);

diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
@@ -34,19 +34,20 @@
 #include <algorithm>
 #include <array>
 #include <atomic>
+#include <cfloat>
 #include <cinttypes>
+#include <cstdarg>
 #include <cstddef>
 #include <cstdint>
-#include <float.h>
+#include <cstdio>
+#include <cstdlib>
 #include <limits>
+#include <list>
 #include <map>
 #include <memory>
 #include <mutex>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
@@ -186,6 +187,11 @@ static ggml_cuda_device_info ggml_cuda_init() {
         CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
         GGML_CUDA_LOG_INFO("  Device %d: %s, compute capability %d.%d, VMM: %s\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
 
+        info.devices[id].total_vram = prop.totalGlobalMem;
+        auto &name_dst = info.devices[id].name;
+        strncpy(name_dst, prop.name, sizeof name_dst);
+        name_dst[sizeof name_dst - 1] = 0;
+
         info.default_tensor_split[id] = total_vram;
         total_vram += prop.totalGlobalMem;
 
@@ -3068,3 +3074,57 @@ GGML_CALL int ggml_backend_cuda_reg_devices() {
     }
     return device_count;
 }
+
+static std::list<ggml_cuda_device> ggml_cuda_available_devices_internal() {
+    std::list<ggml_cuda_device> results;
+
+    const auto & cuda_info = ggml_cuda_info();
+
+    std::unordered_map<std::string, size_t> count_by_name;
+
+    for (int dev_idx = 0; dev_idx < cuda_info.device_count; dev_idx++) {
+        const auto & device = cuda_info.devices[dev_idx];
+
+        std::string name(device.name);
+        size_t n_idx = ++count_by_name[name];
+        if (n_idx > 1) {
+            name += " (" + std::to_string(n_idx) + ")";
+        }
+
+        results.push_back({
+            /* index    = */ uint32_t(dev_idx),
+            /* heapSize = */ uint64_t(device.total_vram),
+            /* name     = */ strdup(name.c_str()),
+        });
+    }
+
+    // std::list::sort is guaranteed to be stable
+    results.sort(
+        [](const ggml_cuda_device & a, const ggml_cuda_device & b) -> bool {
+            return a.heapSize > b.heapSize; // descending
+        }
+    );
+
+    return results;
+}
+
+// public API returns a C-style array
+ggml_cuda_device * ggml_cuda_available_devices(size_t * count) {
+    auto devices = ggml_cuda_available_devices_internal();
+    *count = devices.size();
+    if (devices.empty()) {
+        return nullptr;
+    }
+
+    size_t nbytes = sizeof(ggml_cuda_device) * devices.size();
+    auto * arr = static_cast<ggml_cuda_device *>(malloc(nbytes));
+
+    int i = 0;
+    for (auto & d : devices) { arr[i++] = d; }
+
+    return arr;
+}
+
+void ggml_cuda_device_destroy(ggml_cuda_device * device) {
+    free(const_cast<char *>(device->name));
+}
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -669,6 +669,7 @@ struct ggml_cuda_device_info {
         bool    vmm;                // virtual memory support
         size_t  vmm_granularity;    // granularity of virtual memory
         size_t  total_vram;
+        char    name[256];
     };
 
     cuda_device_info devices[GGML_CUDA_MAX_DEVICES] = {};