fetch_ml/native/nvml_gpu/nvml_dynamic.c
Jeremie Fraeys be39b37aec
feat: native GPU detection and NVML bridge for macOS and Linux
- Add dynamic NVML loading for Linux GPU detection
- Add macOS GPU detection via IOKit framework
- Add Zig NVML wrapper for cross-platform GPU queries
- Update native bridge to support platform-specific GPU libs
- Add CMake support for NVML dynamic library
2026-02-21 17:59:59 -05:00

272 lines
8.5 KiB
C

#include "nvml_dynamic.h"
#include <string.h>
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#endif
// NVML type definitions (from nvml.h)
typedef int nvmlReturn_t;
typedef void* nvmlDevice_t;
typedef struct {
unsigned int gpu;
unsigned int memory;
} nvmlUtilization_t;
typedef struct {
unsigned long long total;
unsigned long long free;
unsigned long long used;
} nvmlMemory_t;
// Function pointer types
typedef nvmlReturn_t (*nvmlInit_v2_fn)(void);
typedef nvmlReturn_t (*nvmlShutdown_fn)(void);
typedef nvmlReturn_t (*nvmlSystemGetDriverVersion_fn)(char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetCount_fn)(unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetHandleByIndex_v2_fn)(unsigned int, nvmlDevice_t*);
typedef nvmlReturn_t (*nvmlDeviceGetName_fn)(nvmlDevice_t, char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetUtilizationRates_fn)(nvmlDevice_t, nvmlUtilization_t*);
typedef nvmlReturn_t (*nvmlDeviceGetMemoryInfo_fn)(nvmlDevice_t, nvmlMemory_t*);
typedef nvmlReturn_t (*nvmlDeviceGetTemperature_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetPowerUsage_fn)(nvmlDevice_t, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetClockInfo_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetPcieThroughput_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetUUID_fn)(nvmlDevice_t, char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetVbiosVersion_fn)(nvmlDevice_t, char*, unsigned int);
// NVML constants
#define NVML_SUCCESS 0
#define NVML_TEMPERATURE_GPU 0
#define NVML_CLOCK_SM 0
#define NVML_CLOCK_MEM 1
#define NVML_PCIE_UTIL_TX_BYTES 0
#define NVML_PCIE_UTIL_RX_BYTES 1
struct nvml_dynamic {
void* handle;
char last_error[256];
int available;
// Function pointers
nvmlInit_v2_fn init;
nvmlShutdown_fn shutdown;
nvmlSystemGetDriverVersion_fn get_driver_version;
nvmlDeviceGetCount_fn get_count;
nvmlDeviceGetHandleByIndex_v2_fn get_handle_by_index;
nvmlDeviceGetName_fn get_name;
nvmlDeviceGetUtilizationRates_fn get_utilization;
nvmlDeviceGetMemoryInfo_fn get_memory;
nvmlDeviceGetTemperature_fn get_temperature;
nvmlDeviceGetPowerUsage_fn get_power_usage;
nvmlDeviceGetClockInfo_fn get_clock;
nvmlDeviceGetUUID_fn get_uuid;
nvmlDeviceGetVbiosVersion_fn get_vbios;
};
static void set_error(nvml_dynamic_t* nvml, const char* msg) {
if (nvml) {
strncpy(nvml->last_error, msg, sizeof(nvml->last_error) - 1);
nvml->last_error[sizeof(nvml->last_error) - 1] = '\0';
}
}
#ifdef _WIN32
static void* load_lib(const char* name) {
return LoadLibraryA(name);
}
static void* get_sym(void* handle, const char* name) {
return (void*)GetProcAddress((HMODULE)handle, name);
}
static void close_lib(void* handle) {
FreeLibrary((HMODULE)handle);
}
#else
static void* load_lib(const char* name) {
return dlopen(name, RTLD_NOW);
}
static void* get_sym(void* handle, const char* name) {
return dlsym(handle, name);
}
static void close_lib(void* handle) {
dlclose(handle);
}
#endif
nvml_dynamic_t* nvml_load(void) {
nvml_dynamic_t* nvml = (nvml_dynamic_t*)calloc(1, sizeof(nvml_dynamic_t));
if (!nvml) return NULL;
// Try to load NVML library
#ifdef _WIN32
nvml->handle = load_lib("nvml.dll");
if (!nvml->handle) {
nvml->handle = load_lib("C:\\Windows\\System32\\nvml.dll");
}
#else
nvml->handle = load_lib("libnvidia-ml.so.1");
if (!nvml->handle) {
nvml->handle = load_lib("libnvidia-ml.so");
}
#endif
if (!nvml->handle) {
set_error(nvml, "NVML library not found - NVIDIA driver may not be installed");
nvml->available = 0;
return nvml;
}
// Load function pointers
nvml->init = (nvmlInit_v2_fn)get_sym(nvml->handle, "nvmlInit_v2");
nvml->shutdown = (nvmlShutdown_fn)get_sym(nvml->handle, "nvmlShutdown");
nvml->get_driver_version = (nvmlSystemGetDriverVersion_fn)get_sym(nvml->handle, "nvmlSystemGetDriverVersion");
nvml->get_count = (nvmlDeviceGetCount_fn)get_sym(nvml->handle, "nvmlDeviceGetCount");
nvml->get_handle_by_index = (nvmlDeviceGetHandleByIndex_v2_fn)get_sym(nvml->handle, "nvmlDeviceGetHandleByIndex_v2");
nvml->get_name = (nvmlDeviceGetName_fn)get_sym(nvml->handle, "nvmlDeviceGetName");
nvml->get_utilization = (nvmlDeviceGetUtilizationRates_fn)get_sym(nvml->handle, "nvmlDeviceGetUtilizationRates");
nvml->get_memory = (nvmlDeviceGetMemoryInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetMemoryInfo");
nvml->get_temperature = (nvmlDeviceGetTemperature_fn)get_sym(nvml->handle, "nvmlDeviceGetTemperature");
nvml->get_power_usage = (nvmlDeviceGetPowerUsage_fn)get_sym(nvml->handle, "nvmlDeviceGetPowerUsage");
nvml->get_clock = (nvmlDeviceGetClockInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetClockInfo");
nvml->get_uuid = (nvmlDeviceGetUUID_fn)get_sym(nvml->handle, "nvmlDeviceGetUUID");
nvml->get_vbios = (nvmlDeviceGetVbiosVersion_fn)get_sym(nvml->handle, "nvmlDeviceGetVbiosVersion");
// Check required functions
if (!nvml->init || !nvml->shutdown || !nvml->get_count || !nvml->get_handle_by_index) {
set_error(nvml, "Failed to load required NVML functions");
close_lib(nvml->handle);
nvml->handle = NULL;
nvml->available = 0;
return nvml;
}
// Initialize NVML
nvmlReturn_t result = nvml->init();
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to initialize NVML");
close_lib(nvml->handle);
nvml->handle = NULL;
nvml->available = 0;
return nvml;
}
nvml->available = 1;
return nvml;
}
void nvml_unload(nvml_dynamic_t* nvml) {
if (!nvml) return;
if (nvml->handle) {
if (nvml->shutdown) {
nvml->shutdown();
}
close_lib(nvml->handle);
}
free(nvml);
}
int nvml_is_available(const nvml_dynamic_t* nvml) {
return nvml ? nvml->available : 0;
}
const char* nvml_last_error(const nvml_dynamic_t* nvml) {
return nvml ? nvml->last_error : "NULL nvml handle";
}
int nvml_get_gpu_count(nvml_dynamic_t* nvml) {
if (!nvml || !nvml->available || !nvml->get_count) {
return -1;
}
unsigned int count = 0;
nvmlReturn_t result = nvml->get_count(&count);
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to get GPU count");
return -1;
}
return (int)count;
}
int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info) {
if (!nvml || !nvml->available || !info) {
return -1;
}
memset(info, 0, sizeof(*info));
info->index = index;
nvmlDevice_t device;
nvmlReturn_t result = nvml->get_handle_by_index(index, &device);
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to get device handle");
return -1;
}
// Get name
if (nvml->get_name) {
nvml->get_name(device, info->name, sizeof(info->name));
}
// Get utilization
if (nvml->get_utilization) {
nvmlUtilization_t util;
result = nvml->get_utilization(device, &util);
if (result == NVML_SUCCESS) {
info->utilization = util.gpu;
}
}
// Get memory
if (nvml->get_memory) {
nvmlMemory_t mem;
result = nvml->get_memory(device, &mem);
if (result == NVML_SUCCESS) {
info->memory_used = mem.used;
info->memory_total = mem.total;
}
}
// Get temperature
if (nvml->get_temperature) {
unsigned int temp;
result = nvml->get_temperature(device, NVML_TEMPERATURE_GPU, &temp);
if (result == NVML_SUCCESS) {
info->temperature = temp;
}
}
// Get power usage
if (nvml->get_power_usage) {
unsigned int power;
result = nvml->get_power_usage(device, &power);
if (result == NVML_SUCCESS) {
info->power_draw = power;
}
}
// Get clocks
if (nvml->get_clock) {
unsigned int clock;
result = nvml->get_clock(device, NVML_CLOCK_SM, &clock);
if (result == NVML_SUCCESS) {
info->clock_sm = clock;
}
result = nvml->get_clock(device, NVML_CLOCK_MEM, &clock);
if (result == NVML_SUCCESS) {
info->clock_memory = clock;
}
}
// Get UUID
if (nvml->get_uuid) {
nvml->get_uuid(device, info->uuid, sizeof(info->uuid));
}
// Get VBIOS version
if (nvml->get_vbios) {
nvml->get_vbios(device, info->vbios_version, sizeof(info->vbios_version));
}
return 0;
}