fix: NVML stub support for systems without NVIDIA drivers

- Add stub implementation in nvml_gpu.cpp when NVML not available
- CMakeLists.txt checks for both NVML library and headers
- Build succeeds on macOS/non-NVIDIA systems with stub
- Runtime detection via gpu_is_available() prevents runtime errors
This commit is contained in:
Jeremie Fraeys 2026-02-21 15:16:54 -05:00
parent 05b7af6991
commit c56e53cb52
No known key found for this signature in database
2 changed files with 32 additions and 4 deletions

View file

@ -17,12 +17,22 @@ find_library(NVML_LIBRARY nvidia-ml
DOC "NVIDIA Management Library"
)
if(NVML_LIBRARY)
# Check for NVML header
find_path(NVML_INCLUDE_DIR nvml.h
PATHS
/usr/include
/usr/local/cuda/include
/opt/cuda/include
)
if(NVML_LIBRARY AND NVML_INCLUDE_DIR)
target_link_libraries(nvml_gpu PRIVATE ${NVML_LIBRARY})
target_include_directories(nvml_gpu PRIVATE ${NVML_INCLUDE_DIR})
message(STATUS "Found NVML: ${NVML_LIBRARY}")
message(STATUS "NVML include: ${NVML_INCLUDE_DIR}")
else()
message(WARNING "NVML library not found. GPU monitoring will be disabled.")
# Create stub library that always returns unavailable
message(WARNING "NVML not found. GPU monitoring will be disabled.")
# Create stub library
target_compile_definitions(nvml_gpu PRIVATE NVML_STUB)
endif()

View file

@ -1,7 +1,23 @@
#include "nvml_gpu.h"
#include <string.h>
#ifdef NVML_STUB
// Stub implementation when NVML is not available
int gpu_init(void) { return -1; }
void gpu_shutdown(void) {}
int gpu_get_count(void) { return -1; }
int gpu_get_info(uint32_t index, gpu_info_t* info) { return -1; }
int gpu_get_utilization(uint32_t index, uint32_t* utilization) { return -1; }
int gpu_get_memory(uint32_t index, uint64_t* used, uint64_t* total) { return -1; }
int gpu_get_temperature(uint32_t index, uint32_t* temp) { return -1; }
int gpu_is_available(void) { return 0; }
const char* gpu_last_error(void) { return "NVML not available"; }
#else
// Full NVML implementation
#include <nvml.h>
#include <stdio.h>
#include <string.h>
// Thread-local error buffer
static __thread char last_error_buffer[256] = {0};
@ -207,3 +223,5 @@ int gpu_is_available(void) {
const char* gpu_last_error(void) {
return last_error_buffer;
}
#endif // NVML_STUB