fix: NVML stub support for systems without NVIDIA drivers

- Add stub implementation in nvml_gpu.cpp when NVML not available - CMakeLists.txt checks for both NVML library and headers - Build succeeds on macOS/non-NVIDIA systems with stub - Runtime detection via gpu_is_available() prevents runtime errors
2026-02-21 15:16:54 -05:00 · 2026-02-21 15:16:54 -05:00 · c56e53cb52
commit c56e53cb52
parent 05b7af6991
2 changed files with 32 additions and 4 deletions
--- a/native/nvml_gpu/CMakeLists.txt
+++ b/native/nvml_gpu/CMakeLists.txt
@ -17,12 +17,22 @@ find_library(NVML_LIBRARY nvidia-ml
    DOC "NVIDIA Management Library"
 )

-if(NVML_LIBRARY)
+# Check for NVML header
+find_path(NVML_INCLUDE_DIR nvml.h
+    PATHS
+        /usr/include
+        /usr/local/cuda/include
+        /opt/cuda/include
+)
+
+if(NVML_LIBRARY AND NVML_INCLUDE_DIR)
    target_link_libraries(nvml_gpu PRIVATE ${NVML_LIBRARY})
+    target_include_directories(nvml_gpu PRIVATE ${NVML_INCLUDE_DIR})
    message(STATUS "Found NVML: ${NVML_LIBRARY}")
+    message(STATUS "NVML include: ${NVML_INCLUDE_DIR}")
 else()
-    message(WARNING "NVML library not found. GPU monitoring will be disabled.")
-    # Create stub library that always returns unavailable
+    message(WARNING "NVML not found. GPU monitoring will be disabled.")
+    # Create stub library
    target_compile_definitions(nvml_gpu PRIVATE NVML_STUB)
 endif()

--- a/native/nvml_gpu/nvml_gpu.cpp
+++ b/native/nvml_gpu/nvml_gpu.cpp
@ -1,7 +1,23 @@
 #include "nvml_gpu.h"
+#include <string.h>
+
+#ifdef NVML_STUB
+// Stub implementation when NVML is not available
+
+int gpu_init(void) { return -1; }
+void gpu_shutdown(void) {}
+int gpu_get_count(void) { return -1; }
+int gpu_get_info(uint32_t index, gpu_info_t* info) { return -1; }
+int gpu_get_utilization(uint32_t index, uint32_t* utilization) { return -1; }
+int gpu_get_memory(uint32_t index, uint64_t* used, uint64_t* total) { return -1; }
+int gpu_get_temperature(uint32_t index, uint32_t* temp) { return -1; }
+int gpu_is_available(void) { return 0; }
+const char* gpu_last_error(void) { return "NVML not available"; }
+
+#else
+// Full NVML implementation
 #include <nvml.h>
 #include <stdio.h>
-#include <string.h>

 // Thread-local error buffer
 static __thread char last_error_buffer[256] = {0};
@ -207,3 +223,5 @@ int gpu_is_available(void) {
 const char* gpu_last_error(void) {
    return last_error_buffer;
 }
+
+#endif // NVML_STUB