#include "nvml_gpu.h" #include #include #include // Thread-local error buffer static __thread char last_error_buffer[256] = {0}; static void set_error(const char* msg) { strncpy(last_error_buffer, msg, sizeof(last_error_buffer) - 1); last_error_buffer[sizeof(last_error_buffer) - 1] = '\0'; } int gpu_init(void) { nvmlReturn_t result = nvmlInit(); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } return 0; } void gpu_shutdown(void) { nvmlShutdown(); } int gpu_get_count(void) { unsigned int count = 0; nvmlReturn_t result = nvmlDeviceGetCount(&count); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } return (int)count; } int gpu_get_info(uint32_t index, gpu_info_t* info) { if (!info) { set_error("null info pointer"); return -1; } memset(info, 0, sizeof(gpu_info_t)); info->index = index; nvmlDevice_t device; nvmlReturn_t result = nvmlDeviceGetHandleByIndex(index, &device); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } // Get name result = nvmlDeviceGetName(device, info->name, sizeof(info->name)); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } // Get UUID result = nvmlDeviceGetUUID(device, info->uuid, sizeof(info->uuid)); if (result != NVML_SUCCESS) { // Non-critical, continue strcpy(info->uuid, "unknown"); } // Get VBIOS version result = nvmlDeviceGetVbiosVersion(device, info->vbios_version, sizeof(info->vbios_version)); if (result != NVML_SUCCESS) { strcpy(info->vbios_version, "unknown"); } // Get utilization nvmlUtilization_t utilization; result = nvmlDeviceGetUtilizationRates(device, &utilization); if (result == NVML_SUCCESS) { info->utilization = utilization.gpu; } // Get memory info nvmlMemory_t memory; result = nvmlDeviceGetMemoryInfo(device, &memory); if (result == NVML_SUCCESS) { info->memory_used = memory.used; info->memory_total = memory.total; } // Get temperature result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &info->temperature); if (result != NVML_SUCCESS) { info->temperature = 0; // Not available } // Get power draw unsigned int power_mw; result = nvmlDeviceGetPowerUsage(device, &power_mw); if (result == NVML_SUCCESS) { info->power_draw = power_mw; } // Get clocks unsigned int clock; result = nvmlDeviceGetClock(device, NVML_CLOCK_SM, NVML_CLOCK_ID_CURRENT, &clock); if (result == NVML_SUCCESS) { info->clock_sm = clock; } result = nvmlDeviceGetClock(device, NVML_CLOCK_MEM, NVML_CLOCK_ID_CURRENT, &clock); if (result == NVML_SUCCESS) { info->clock_memory = clock; } // Get PCIe info nvmlPciInfo_t pciInfo; result = nvmlDeviceGetPciInfo(device, &pciInfo); if (result == NVML_SUCCESS) { // PCIe generation and width require NVML 11.0+ // For now, we leave them as 0 info->pcie_gen = 0; info->pcie_width = 0; } return 0; } int gpu_get_utilization(uint32_t index, uint32_t* utilization) { if (!utilization) { set_error("null utilization pointer"); return -1; } nvmlDevice_t device; nvmlReturn_t result = nvmlDeviceGetHandleByIndex(index, &device); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } nvmlUtilization_t util; result = nvmlDeviceGetUtilizationRates(device, &util); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } *utilization = util.gpu; return 0; } int gpu_get_memory(uint32_t index, uint64_t* used, uint64_t* total) { if (!used || !total) { set_error("null pointer"); return -1; } nvmlDevice_t device; nvmlReturn_t result = nvmlDeviceGetHandleByIndex(index, &device); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } nvmlMemory_t memory; result = nvmlDeviceGetMemoryInfo(device, &memory); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } *used = memory.used; *total = memory.total; return 0; } int gpu_get_temperature(uint32_t index, uint32_t* temp) { if (!temp) { set_error("null temp pointer"); return -1; } nvmlDevice_t device; nvmlReturn_t result = nvmlDeviceGetHandleByIndex(index, &device); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } unsigned int temperature; result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature); if (result != NVML_SUCCESS) { set_error(nvmlErrorString(result)); return -1; } *temp = temperature; return 0; } int gpu_is_available(void) { nvmlReturn_t result = nvmlInit(); if (result == NVML_SUCCESS) { nvmlShutdown(); return 1; } return 0; } const char* gpu_last_error(void) { return last_error_buffer; }