feat: native GPU detection and NVML bridge for macOS and Linux

- Add dynamic NVML loading for Linux GPU detection
- Add macOS GPU detection via IOKit framework
- Add Zig NVML wrapper for cross-platform GPU queries
- Update native bridge to support platform-specific GPU libs
- Add CMake support for NVML dynamic library
This commit is contained in:
Jeremie Fraeys 2026-02-21 17:59:59 -05:00
parent 1a1844e9e9
commit be39b37aec
No known key found for this signature in database
15 changed files with 1321 additions and 35 deletions

View file

@ -0,0 +1,262 @@
const std = @import("std");
const builtin = @import("builtin");
/// macOS GPU Monitoring for Development Mode
/// Uses system_profiler and powermetrics for GPU info
/// Only available on macOS
const c = @cImport({
@cInclude("sys/types.h");
@cInclude("sys/sysctl.h");
});
/// GPU information structure for macOS
pub const MacOSGPUInfo = struct {
index: u32,
name: [256:0]u8,
chipset_model: [256:0]u8,
vram_mb: u32,
is_integrated: bool,
// Performance metrics (if available via powermetrics)
utilization_percent: ?u32,
temperature_celsius: ?u32,
power_mw: ?u32,
};
/// Detect if running on Apple Silicon
pub fn isAppleSilicon() bool {
if (builtin.os.tag != .macos) return false;
var buf: [64]u8 = undefined;
var len: usize = buf.len;
const mib = [_]c_int{ c.CTL_HW, c.HW_MACHINE };
const result = c.sysctl(&mib[0], 2, &buf[0], &len, null, 0);
if (result != 0) return false;
const machine = std.mem.sliceTo(&buf, 0);
return std.mem.startsWith(u8, machine, "arm64") or
std.mem.startsWith(u8, machine, "Apple");
}
/// Get GPU count on macOS
pub fn getGPUCount() u32 {
if (builtin.os.tag != .macos) return 0;
// Run system_profiler to check for GPUs
const result = runSystemProfiler() catch return 0;
defer std.heap.raw_c_allocator.free(result);
// Parse output for GPU entries
var lines = std.mem.splitScalar(u8, result, '\n');
var count: u32 = 0;
while (lines.next()) |line| {
if (std.mem.indexOf(u8, line, "Chipset Model") != null) {
count += 1;
}
}
return count;
}
/// Run system_profiler SPDisplaysDataType
fn runSystemProfiler() ![]u8 {
const argv = [_][]const u8{
"system_profiler",
"SPDisplaysDataType",
"-json",
};
var child = std.process.Child.init(&argv, std.heap.page_allocator);
child.stdout_behavior = .Pipe;
child.stderr_behavior = .Ignore;
try child.spawn();
defer child.kill() catch {};
const stdout = child.stdout.?.reader();
const output = try stdout.readAllAlloc(std.heap.page_allocator, 1024 * 1024);
const term = try child.wait();
if (term != .Exited or term.Exited != 0) {
return error.CommandFailed;
}
return output;
}
/// Parse GPU info from system_profiler JSON output
pub fn parseGPUInfo(allocator: std.mem.Allocator, json_output: []const u8) ![]MacOSGPUInfo {
// Simple parser for system_profiler JSON
// Format: {"SPDisplaysDataType": [{"sppci_model":"...", "sppci_vram":"...", ...}, ...]}
var gpus = std.ArrayList(MacOSGPUInfo).init(allocator);
defer gpus.deinit();
// Parse JSON - look for _items array
const items_key = "_items";
if (std.mem.indexOf(u8, json_output, items_key)) |items_start| {
const rest = json_output[items_start..];
// Find array start
if (std.mem.indexOf(u8, rest, "[")) |array_start| {
const array = rest[array_start..];
// Simple heuristic: find objects between { and }
var i: usize = 0;
while (i < array.len) {
if (array[i] == '{') {
// Found object start
if (findObjectEnd(array[i..])) |obj_end| {
const obj = array[i .. i + obj_end];
if (try parseGPUObject(obj)) |gpu| {
try gpus.append(gpu);
}
i += obj_end;
continue;
}
}
i += 1;
}
}
}
return gpus.toOwnedSlice();
}
fn findObjectEnd(json: []const u8) ?usize {
var depth: i32 = 0;
var in_string = false;
var i: usize = 0;
while (i < json.len) : (i += 1) {
const char = json[i];
if (char == '"' and (i == 0 or json[i - 1] != '\\')) {
in_string = !in_string;
} else if (!in_string) {
if (char == '{') {
depth += 1;
} else if (char == '}') {
depth -= 1;
if (depth == 0) {
return i + 1;
}
}
}
}
return null;
}
fn parseGPUObject(json: []const u8) !?MacOSGPUInfo {
var gpu = MacOSGPUInfo{
.index = 0,
.name = std.mem.zeroes([256:0]u8),
.chipset_model = std.mem.zeroes([256:0]u8),
.vram_mb = 0,
.is_integrated = false,
.utilization_percent = null,
.temperature_celsius = null,
.power_mw = null,
};
// Extract sppci_model
if (extractJsonString(json, "sppci_model")) |model| {
const len = @min(model.len, 255);
@memcpy(gpu.chipset_model[0..len], model[0..len]);
@memcpy(gpu.name[0..len], model[0..len]);
}
// Extract sppci_vram
if (extractJsonString(json, "sppci_vram_shared")) |_| {
gpu.is_integrated = true;
gpu.vram_mb = 0; // Shared memory
} else if (extractJsonString(json, "sppci_vram")) |vram| {
// Parse "16384 MB" -> 16384
var it = std.mem.splitScalar(u8, vram, ' ');
if (it.next()) |num_str| {
gpu.vram_mb = std.fmt.parseInt(u32, num_str, 10) catch 0;
}
}
// Check if it's a valid GPU entry
if (gpu.chipset_model[0] == 0) {
return null;
}
return gpu;
}
fn extractJsonString(json: []const u8, key: []const u8) ?[]const u8 {
const key_quoted = std.fmt.allocPrint(std.heap.page_allocator, "\"{s}\"", .{key}) catch return null;
defer std.heap.page_allocator.free(key_quoted);
if (std.mem.indexOf(u8, json, key_quoted)) |key_pos| {
const after_key = json[key_pos + key_quoted.len ..];
// Find value start (skip : and whitespace)
var i: usize = 0;
while (i < after_key.len and (after_key[i] == ':' or after_key[i] == ' ' or after_key[i] == '\t' or after_key[i] == '\n')) : (i += 1) {}
if (i < after_key.len and after_key[i] == '"') {
// String value
const str_start = i + 1;
var str_end = str_start;
while (str_end < after_key.len and after_key[str_end] != '"') : (str_end += 1) {}
return after_key[str_start..str_end];
}
}
return null;
}
/// Format GPU info for display
pub fn formatMacOSGPUInfo(allocator: std.mem.Allocator, gpus: []const MacOSGPUInfo) ![]u8 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();
const writer = buf.writer();
if (gpus.len == 0) {
try writer.writeAll("GPU Status (macOS)\n");
try writer.writeAll("" ** 50);
try writer.writeAll("\n\nNo GPUs detected\n");
return buf.toOwnedSlice();
}
try writer.writeAll("GPU Status (macOS");
if (isAppleSilicon()) {
try writer.writeAll(" - Apple Silicon");
}
try writer.writeAll(")\n");
try writer.writeAll("" ** 50);
try writer.writeAll("\n\n");
for (gpus) |gpu| {
const name = std.mem.sliceTo(&gpu.name, 0);
const model = std.mem.sliceTo(&gpu.chipset_model, 0);
try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name });
if (!std.mem.eql(u8, model, name)) {
try writer.print(" Model: {s}\n", .{model});
}
if (gpu.is_integrated) {
try writer.writeAll(" Type: Integrated (Unified Memory)\n");
} else {
try writer.print(" VRAM: {d} MB\n", .{gpu.vram_mb});
}
if (gpu.utilization_percent) |util| {
try writer.print(" Utilization: {d}%\n", .{util});
}
if (gpu.temperature_celsius) |temp| {
try writer.print(" Temperature: {d}°C\n", .{temp});
}
if (gpu.power_mw) |power| {
try writer.print(" Power: {d:.1f} W\n", .{@as(f64, @floatFromInt(power)) / 1000.0});
}
try writer.writeAll("\n");
}
try writer.writeAll("💡 Note: Detailed GPU metrics require powermetrics (sudo)\n");
return buf.toOwnedSlice();
}
/// Quick check for GPU availability on macOS
pub fn isMacOSGPUAvailable() bool {
if (builtin.os.tag != .macos) return false;
return getGPUCount() > 0;
}

372
cli/src/native/nvml.zig Normal file
View file

@ -0,0 +1,372 @@
const std = @import("std");
const builtin = @import("builtin");
/// NVML Dynamic Loader for CLI
/// Pure Zig implementation using dlopen/LoadLibrary
/// No build-time dependency on NVIDIA SDK
// Platform-specific dynamic loading
const DynLib = switch (builtin.os.tag) {
.windows => struct {
handle: std.os.windows.HMODULE,
fn open(path: []const u8) !@This() {
const wide_path = try std.os.windows.sliceToPrefixedFileW(path);
const handle = std.os.windows.LoadLibraryW(&wide_path.data) orelse return error.LibraryNotFound;
return .{ .handle = handle };
}
fn close(self: *@This()) void {
_ = std.os.windows.FreeLibrary(self.handle);
}
fn lookup(self: @This(), name: []const u8) ?*anyopaque {
return std.os.windows.GetProcAddress(self.handle, name);
}
},
else => struct {
handle: *anyopaque,
// Extern declarations for dlopen/dlsym
extern "c" fn dlopen(pathname: [*:0]const u8, mode: c_int) ?*anyopaque;
extern "c" fn dlsym(handle: *anyopaque, symbol: [*:0]const u8) ?*anyopaque;
extern "c" fn dlclose(handle: *anyopaque) c_int;
const RTLD_NOW = 2;
fn open(path: []const u8) !@This() {
const c_path = try std.cstr.addNullByte(std.heap.c_allocator, path);
defer std.heap.c_allocator.free(c_path);
const handle = dlopen(c_path.ptr, RTLD_NOW) orelse return error.LibraryNotFound;
return .{ .handle = handle };
}
fn close(self: *@This()) void {
_ = dlclose(self.handle);
}
fn lookup(self: @This(), name: []const u8) ?*anyopaque {
const c_name = std.cstr.addNullByte(std.heap.c_allocator, name) catch return null;
defer std.heap.c_allocator.free(c_name);
return dlsym(self.handle, c_name.ptr);
}
},
};
// NVML type definitions (mirrors nvml.h)
pub const nvmlReturn_t = c_int;
pub const nvmlDevice_t = *anyopaque;
pub const nvmlUtilization_t = extern struct {
gpu: c_uint,
memory: c_uint,
};
pub const nvmlMemory_t = extern struct {
total: c_ulonglong,
free: c_ulonglong,
used: c_ulonglong,
};
// NVML constants
const NVML_SUCCESS = 0;
const NVML_TEMPERATURE_GPU = 0;
const NVML_CLOCK_SM = 0;
const NVML_CLOCK_MEM = 1;
// NVML function types
const nvmlInit_v2_fn = *const fn () callconv(.C) nvmlReturn_t;
const nvmlShutdown_fn = *const fn () callconv(.C) nvmlReturn_t;
const nvmlDeviceGetCount_fn = *const fn (*c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetHandleByIndex_v2_fn = *const fn (c_uint, *nvmlDevice_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetName_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetUtilizationRates_fn = *const fn (nvmlDevice_t, *nvmlUtilization_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetMemoryInfo_fn = *const fn (nvmlDevice_t, *nvmlMemory_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetTemperature_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetPowerUsage_fn = *const fn (nvmlDevice_t, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetClockInfo_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetUUID_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetVbiosVersion_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
/// GPU information structure
pub const GPUInfo = struct {
index: u32,
name: [256:0]u8,
utilization: u32,
memory_used: u64,
memory_total: u64,
temperature: u32,
power_draw: u32,
clock_sm: u32,
clock_memory: u32,
uuid: [64:0]u8,
vbios_version: [32:0]u8,
};
/// NVML handle with loaded functions
pub const NVML = struct {
lib: DynLib,
available: bool,
// Function pointers
init: nvmlInit_v2_fn,
shutdown: nvmlShutdown_fn,
get_count: nvmlDeviceGetCount_fn,
get_handle_by_index: nvmlDeviceGetHandleByIndex_v2_fn,
get_name: ?nvmlDeviceGetName_fn,
get_utilization: ?nvmlDeviceGetUtilizationRates_fn,
get_memory: ?nvmlDeviceGetMemoryInfo_fn,
get_temperature: ?nvmlDeviceGetTemperature_fn,
get_power_usage: ?nvmlDeviceGetPowerUsage_fn,
get_clock: ?nvmlDeviceGetClockInfo_fn,
get_uuid: ?nvmlDeviceGetUUID_fn,
get_vbios: ?nvmlDeviceGetVbiosVersion_fn,
last_error: [256:0]u8,
/// Load NVML dynamically
pub fn load() !?NVML {
var nvml: NVML = undefined;
// Try platform-specific library names
const lib_names = switch (builtin.os.tag) {
.windows => &[_][]const u8{
"nvml.dll",
"C:\\Windows\\System32\\nvml.dll",
},
.linux => &[_][]const u8{
"libnvidia-ml.so.1",
"libnvidia-ml.so",
},
else => return null, // NVML not supported on other platforms
};
// Try to load library
var loaded = false;
for (lib_names) |name| {
if (DynLib.open(name)) |lib| {
nvml.lib = lib;
loaded = true;
break;
} else |_| continue;
}
if (!loaded) {
return null; // NVML not available (no NVIDIA driver)
}
// Load required functions
nvml.init = @ptrCast(nvml.lib.lookup("nvmlInit_v2") orelse return error.InitNotFound);
nvml.shutdown = @ptrCast(nvml.lib.lookup("nvmlShutdown") orelse return error.ShutdownNotFound);
nvml.get_count = @ptrCast(nvml.lib.lookup("nvmlDeviceGetCount") orelse return error.GetCountNotFound);
nvml.get_handle_by_index = @ptrCast(nvml.lib.lookup("nvmlDeviceGetHandleByIndex_v2") orelse return error.GetHandleNotFound);
// Load optional functions
nvml.get_name = @ptrCast(nvml.lib.lookup("nvmlDeviceGetName"));
nvml.get_utilization = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUtilizationRates"));
nvml.get_memory = @ptrCast(nvml.lib.lookup("nvmlDeviceGetMemoryInfo"));
nvml.get_temperature = @ptrCast(nvml.lib.lookup("nvmlDeviceGetTemperature"));
nvml.get_power_usage = @ptrCast(nvml.lib.lookup("nvmlDeviceGetPowerUsage"));
nvml.get_clock = @ptrCast(nvml.lib.lookup("nvmlDeviceGetClockInfo"));
nvml.get_uuid = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUUID"));
nvml.get_vbios = @ptrCast(nvml.lib.lookup("nvmlDeviceGetVbiosVersion"));
// Initialize NVML
const result = nvml.init();
if (result != NVML_SUCCESS) {
nvml.setError("NVML initialization failed");
nvml.lib.close();
return error.NVMLInitFailed;
}
nvml.available = true;
return nvml;
}
/// Unload NVML
pub fn unload(self: *NVML) void {
if (self.available) {
_ = self.shutdown();
}
self.lib.close();
}
/// Check if NVML is available
pub fn isAvailable(self: NVML) bool {
return self.available;
}
/// Get last error message
pub fn getLastError(self: NVML) []const u8 {
return std.mem.sliceTo(&self.last_error, 0);
}
fn setError(self: *NVML, msg: []const u8) void {
@memset(&self.last_error, 0);
const len = @min(msg.len, self.last_error.len - 1);
@memcpy(self.last_error[0..len], msg[0..len]);
}
/// Get number of GPUs
pub fn getGPUCount(self: *NVML) !u32 {
var count: c_uint = 0;
const result = self.get_count(&count);
if (result != NVML_SUCCESS) {
self.setError("Failed to get GPU count");
return error.GetCountFailed;
}
return @intCast(count);
}
/// Get GPU info by index
pub fn getGPUInfo(self: *NVML, index: u32) !GPUInfo {
var info: GPUInfo = .{
.index = index,
.name = std.mem.zeroes([256:0]u8),
.utilization = 0,
.memory_used = 0,
.memory_total = 0,
.temperature = 0,
.power_draw = 0,
.clock_sm = 0,
.clock_memory = 0,
.uuid = std.mem.zeroes([64:0]u8),
.vbios_version = std.mem.zeroes([32:0]u8),
};
var device: nvmlDevice_t = undefined;
var result = self.get_handle_by_index(index, &device);
if (result != NVML_SUCCESS) {
self.setError("Failed to get device handle");
return error.GetHandleFailed;
}
// Get name
if (self.get_name) |func| {
_ = func(device, &info.name, @sizeOf(@TypeOf(info.name)));
}
// Get utilization
if (self.get_utilization) |func| {
var util: nvmlUtilization_t = undefined;
result = func(device, &util);
if (result == NVML_SUCCESS) {
info.utilization = @intCast(util.gpu);
}
}
// Get memory
if (self.get_memory) |func| {
var mem: nvmlMemory_t = undefined;
result = func(device, &mem);
if (result == NVML_SUCCESS) {
info.memory_used = mem.used;
info.memory_total = mem.total;
}
}
// Get temperature
if (self.get_temperature) |func| {
var temp: c_uint = 0;
result = func(device, NVML_TEMPERATURE_GPU, &temp);
if (result == NVML_SUCCESS) {
info.temperature = @intCast(temp);
}
}
// Get power usage
if (self.get_power_usage) |func| {
var power: c_uint = 0;
result = func(device, &power);
if (result == NVML_SUCCESS) {
info.power_draw = @intCast(power);
}
}
// Get clocks
if (self.get_clock) |func| {
var clock: c_uint = 0;
result = func(device, NVML_CLOCK_SM, &clock);
if (result == NVML_SUCCESS) {
info.clock_sm = @intCast(clock);
}
result = func(device, NVML_CLOCK_MEM, &clock);
if (result == NVML_SUCCESS) {
info.clock_memory = @intCast(clock);
}
}
// Get UUID
if (self.get_uuid) |func| {
_ = func(device, &info.uuid, @sizeOf(@TypeOf(info.uuid)));
}
// Get VBIOS version
if (self.get_vbios) |func| {
_ = func(device, &info.vbios_version, @sizeOf(@TypeOf(info.vbios_version)));
}
return info;
}
/// Get info for all GPUs
pub fn getAllGPUInfo(self: *NVML, allocator: std.mem.Allocator) ![]GPUInfo {
const count = try self.getGPUCount();
if (count == 0) return &[_]GPUInfo{};
var gpus = try allocator.alloc(GPUInfo, count);
errdefer allocator.free(gpus);
for (0..count) |i| {
gpus[i] = try self.getGPUInfo(@intCast(i));
}
return gpus;
}
};
// Convenience functions for simple use cases
/// Quick check if NVML is available (creates and destroys temporary handle)
pub fn isNVMLAvailable() bool {
if (NVML.load()) |maybe_nvml| {
if (maybe_nvml) |nvml| {
var nvml_mut = nvml;
defer nvml_mut.unload();
return nvml_mut.isAvailable();
}
} else |_| {}
return false;
}
/// Format GPU info as string for display
pub fn formatGPUInfo(allocator: std.mem.Allocator, gpus: []const GPUInfo) ![]u8 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();
const writer = buf.writer();
try writer.writeAll("GPU Status (NVML)\n");
try writer.writeAll("" ** 50);
try writer.writeAll("\n\n");
for (gpus) |gpu| {
const name = std.mem.sliceTo(&gpu.name, 0);
try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name });
try writer.print(" Utilization: {d}%\n", .{gpu.utilization});
try writer.print(" Memory: {d}/{d} MB\n", .{
gpu.memory_used / 1024 / 1024,
gpu.memory_total / 1024 / 1024,
});
try writer.print(" Temperature: {d}°C\n", .{gpu.temperature});
if (gpu.power_draw > 0) {
try writer.print(" Power: {d:.1} W\n", .{@as(f64, @floatFromInt(gpu.power_draw)) / 1000.0});
}
if (gpu.clock_sm > 0) {
try writer.print(" SM Clock: {d} MHz\n", .{gpu.clock_sm});
}
try writer.writeAll("\n");
}
return buf.toOwnedSlice();
}

View file

@ -1,5 +1,5 @@
//go:build !native_libs
// +build !native_libs
//go:build !cgo || !native_libs
// +build !cgo !native_libs
package queue

View file

@ -380,19 +380,16 @@ func (c *Config) Validate() error {
// - UUID-style gpu_visible_device_ids is NVIDIA-only.
vendor := strings.ToLower(strings.TrimSpace(c.GPUVendor))
if len(c.GPUVisibleDevices) > 0 && len(c.GPUVisibleDeviceIDs) > 0 {
return fmt.Errorf("gpu_visible_devices and gpu_visible_device_ids are mutually exclusive")
}
if len(c.GPUVisibleDeviceIDs) > 0 {
if vendor != string(GPUTypeNVIDIA) {
return fmt.Errorf(
"gpu_visible_device_ids is only supported when gpu_vendor is %q",
"visible_device_ids is only supported when gpu_vendor is %q",
string(GPUTypeNVIDIA),
)
}
for _, id := range c.GPUVisibleDeviceIDs {
id = strings.TrimSpace(id)
if id == "" {
return fmt.Errorf("gpu_visible_device_ids contains an empty value")
return fmt.Errorf("visible_device_ids contains an empty value")
}
if !strings.HasPrefix(id, "GPU-") {
return fmt.Errorf("gpu_visible_device_ids values must start with %q, got %q", "GPU-", id)

View file

@ -98,6 +98,14 @@ type AppleDetector struct {
}
func (d *AppleDetector) DetectGPUCount() int {
// First try actual macOS GPU detection
if IsMacOS() {
count, err := GetMacOSGPUCount()
if err == nil && count > 0 {
return count
}
}
if n, ok := envInt("FETCH_ML_GPU_COUNT"); ok && n >= 0 {
return n
}

View file

@ -0,0 +1,279 @@
//go:build darwin
// +build darwin
package worker
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"time"
)
// MacOSGPUInfo holds information about a macOS GPU
type MacOSGPUInfo struct {
Index uint32 `json:"index"`
Name string `json:"name"`
ChipsetModel string `json:"chipset_model"`
VRAM_MB uint32 `json:"vram_mb"`
IsIntegrated bool `json:"is_integrated"`
IsAppleSilicon bool `json:"is_apple_silicon"`
// Real-time metrics from powermetrics (if available)
UtilizationPercent uint32 `json:"utilization_percent,omitempty"`
PowerMW uint32 `json:"power_mw,omitempty"`
TemperatureC uint32 `json:"temperature_c,omitempty"`
}
// PowermetricsData holds GPU metrics from powermetrics
type PowermetricsData struct {
GPUUtilization float64
GPUPower float64
GPUTemperature float64
HasData bool
}
// IsMacOS returns true if running on macOS
func IsMacOS() bool {
return runtime.GOOS == "darwin"
}
// IsAppleSilicon checks if running on Apple Silicon
func IsAppleSilicon() bool {
if runtime.GOOS != "darwin" {
return false
}
// Check machine hardware name
out, err := exec.Command("uname", "-m").Output()
if err != nil {
return false
}
return strings.TrimSpace(string(out)) == "arm64"
}
// GetMacOSGPUCount returns the number of GPUs on macOS
func GetMacOSGPUCount() (int, error) {
if runtime.GOOS != "darwin" {
return 0, fmt.Errorf("not running on macOS")
}
// Use system_profiler to get GPU count
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
out, err := cmd.Output()
if err != nil {
// Fall back to gfxutil if system_profiler fails
return getGPUCountViaGfxutil()
}
// Parse JSON output
var data map[string]interface{}
if err := json.Unmarshal(out, &data); err != nil {
return 0, err
}
// Extract display items
if spData, ok := data["SPDisplaysDataType"].([]interface{}); ok {
return len(spData), nil
}
return 0, nil
}
// getGPUCountViaGfxutil uses gfxutil to count GPUs (fallback)
func getGPUCountViaGfxutil() (int, error) {
// gfxutil is available on macOS
cmd := exec.Command("gfxutil", "-f", "display")
out, err := cmd.Output()
if err != nil {
return 0, err
}
// Count display paths (one per GPU typically)
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
count := 0
for _, line := range lines {
if strings.Contains(line, "Display") {
count++
}
}
return count, nil
}
// GetMacOSGPUInfo returns detailed information about macOS GPUs
func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) {
if runtime.GOOS != "darwin" {
return nil, fmt.Errorf("not running on macOS")
}
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
out, err := cmd.Output()
if err != nil {
return nil, err
}
var data map[string]interface{}
if err := json.Unmarshal(out, &data); err != nil {
return nil, err
}
spData, ok := data["SPDisplaysDataType"].([]interface{})
if !ok {
return []MacOSGPUInfo{}, nil
}
isAppleSilicon := IsAppleSilicon()
var gpus []MacOSGPUInfo
for i, item := range spData {
if gpuData, ok := item.(map[string]interface{}); ok {
info := MacOSGPUInfo{
Index: uint32(i),
IsAppleSilicon: isAppleSilicon,
}
// Extract chipset model
if model, ok := gpuData["sppci_model"].(string); ok {
info.ChipsetModel = model
info.Name = model
}
// Check for shared memory (integrated GPU)
if _, ok := gpuData["sppci_vram_shared"]; ok {
info.IsIntegrated = true
}
// Extract VRAM
if vram, ok := gpuData["sppci_vram"].(string); ok {
// Parse "16384 MB"
parts := strings.Fields(vram)
if len(parts) >= 1 {
if mb, err := strconv.ParseUint(parts[0], 10, 32); err == nil {
info.VRAM_MB = uint32(mb)
}
}
}
gpus = append(gpus, info)
}
}
return gpus, nil
}
// GetPowermetricsData tries to get real-time GPU metrics from powermetrics
// Requires sudo access. Returns empty data if not available.
func GetPowermetricsData() (*PowermetricsData, error) {
// powermetrics requires sudo, so this may fail
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "powermetrics", "--samplers", "gpu_power", "-n", "1", "-i", "100")
out, err := cmd.Output()
if err != nil {
// powermetrics not available or no permission
return &PowermetricsData{HasData: false}, nil
}
data := &PowermetricsData{HasData: false}
// Parse powermetrics output
// Example: "GPU Power: 5000 mW" or "GPU utilization: 45%"
scanner := bufio.NewScanner(strings.NewReader(string(out)))
for scanner.Scan() {
line := scanner.Text()
// Parse GPU utilization
if strings.Contains(line, "GPU utilization") || strings.Contains(line, "GPU active") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*%`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if util, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUUtilization = util
data.HasData = true
}
}
}
// Parse GPU power
if strings.Contains(line, "GPU Power") || strings.Contains(line, "GPU power") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*mW`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if power, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUPower = power
data.HasData = true
}
}
}
// Parse GPU temperature (if available)
if strings.Contains(line, "GPU Temperature") || strings.Contains(line, "GPU temp") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*C`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if temp, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUTemperature = temp
data.HasData = true
}
}
}
}
return data, nil
}
// FormatMacOSGPUStatus formats GPU status for display
func FormatMacOSGPUStatus() (string, error) {
gpus, err := GetMacOSGPUInfo()
if err != nil {
return "", err
}
// Try to get real-time metrics from powermetrics
powermetrics, _ := GetPowermetricsData()
if len(gpus) == 0 {
return "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS", nil
}
var b strings.Builder
if IsAppleSilicon() {
b.WriteString("GPU Status (macOS - Apple Silicon)\n")
} else {
b.WriteString("GPU Status (macOS)\n")
}
b.WriteString(strings.Repeat("═", 50) + "\n\n")
for _, gpu := range gpus {
fmt.Fprintf(&b, "🎮 GPU %d: %s\n", gpu.Index, gpu.Name)
if gpu.IsAppleSilicon {
b.WriteString(" Type: Apple Silicon (Unified Memory)\n")
} else if gpu.IsIntegrated {
b.WriteString(" Type: Integrated (Shared Memory)\n")
} else {
fmt.Fprintf(&b, " VRAM: %d MB\n", gpu.VRAM_MB)
}
// Display powermetrics data if available
if powermetrics != nil && powermetrics.HasData {
if powermetrics.GPUUtilization > 0 {
b.WriteString(fmt.Sprintf(" Utilization: %.1f%%\n", powermetrics.GPUUtilization))
}
if powermetrics.GPUPower > 0 {
b.WriteString(fmt.Sprintf(" Power: %.1f W\n", powermetrics.GPUPower/1000))
}
if powermetrics.GPUTemperature > 0 {
b.WriteString(fmt.Sprintf(" Temperature: %.0f°C\n", powermetrics.GPUTemperature))
}
}
b.WriteString("\n")
}
if powermetrics == nil || !powermetrics.HasData {
b.WriteString("💡 Note: Run with sudo for real-time GPU metrics via powermetrics\n")
}
return b.String(), nil
}

View file

@ -0,0 +1,41 @@
//go:build !darwin
// +build !darwin
package worker
import "errors"
// MacOSGPUInfo placeholder for non-macOS builds
type MacOSGPUInfo struct {
Index uint32
Name string
ChipsetModel string
VRAM_MB uint32
IsIntegrated bool
IsAppleSilicon bool
}
// IsMacOS returns false on non-macOS
func IsMacOS() bool {
return false
}
// IsAppleSilicon returns false on non-macOS
func IsAppleSilicon() bool {
return false
}
// GetMacOSGPUCount returns error on non-macOS
func GetMacOSGPUCount() (int, error) {
return 0, errors.New("macOS GPU monitoring only available on macOS")
}
// GetMacOSGPUInfo returns error on non-macOS
func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) {
return nil, errors.New("macOS GPU monitoring only available on macOS")
}
// FormatMacOSGPUStatus returns error on non-macOS
func FormatMacOSGPUStatus() (string, error) {
return "", errors.New("macOS GPU monitoring only available on macOS")
}

View file

@ -1,5 +1,5 @@
//go:build cgo && native_libs
// +build cgo,native_libs
//go:build cgo && native_libs && linux
// +build cgo,native_libs,linux
package worker

View file

@ -1,11 +1,26 @@
//go:build cgo && !native_libs
// +build cgo,!native_libs
//go:build !cgo || !native_libs || !linux
// +build !cgo !native_libs !linux
package worker
import "errors"
// Stub implementations when native_libs build tag is not present
// GPUInfo provides comprehensive GPU information
type GPUInfo struct {
Index uint32
Name string
Utilization uint32
MemoryUsed uint64
MemoryTotal uint64
Temperature uint32
PowerDraw uint32
ClockSM uint32
ClockMemory uint32
PCIeGen uint32
PCIeWidth uint32
UUID string
VBIOSVersion string
}
func InitNVML() error {
return errors.New("NVML requires native_libs build tag")
@ -18,10 +33,10 @@ func IsNVMLAvailable() bool {
}
func GetGPUCount() (int, error) {
return 0, errors.New("NVML requires native_libs build tag")
return 0, nil
}
func GetGPUInfo(index uint32) (*GPUInfo, error) {
func GetGPUInfo(index uint32) (*GPUInfo, error) { // <-- was missing
return nil, errors.New("NVML requires native_libs build tag")
}

View file

@ -15,11 +15,6 @@ func init() {
log.Printf("[native] Native libraries disabled (build with -tags native_libs to enable)")
}
// dirOverallSHA256HexNative is not available without native_libs build tag.
func dirOverallSHA256HexNative(_ string) (string, error) {
return "", errors.New("native hash requires native_libs build tag")
}
// HashFilesBatchNative is not available without native_libs build tag.
func HashFilesBatchNative(paths []string) ([]string, error) {
return nil, errors.New("native batch hash requires native_libs build tag")

View file

@ -3,7 +3,8 @@
package worker
// #cgo LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
// #cgo darwin LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
// #cgo linux LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash -lnvml_gpu -lnvidia-ml
// #include "../../native/dataset_hash/dataset_hash.h"
// #include <stdlib.h>
import "C"
@ -25,8 +26,6 @@ var (
ctxInitTime time.Time
)
// getHashContext returns a cached hash context, initializing it once.
// Context reuse eliminates 5-20ms of thread pool creation per hash operation.
func getHashContext() *C.fh_context_t {
hashCtxOnce.Do(func() {
start := time.Now()
@ -38,9 +37,8 @@ func getHashContext() *C.fh_context_t {
return hashCtx
}
// dirOverallSHA256HexNative implementation with native library.
func dirOverallSHA256HexNative(root string) (string, error) {
ctx := getHashContext() // Reuse cached context: ~0.1μs vs 5-20ms
ctx := getHashContext()
croot := C.CString(root)
defer C.free(unsafe.Pointer(croot))
@ -58,28 +56,23 @@ func dirOverallSHA256HexNative(root string) (string, error) {
return C.GoString(result), nil
}
// GetSIMDImplName returns the native SHA256 implementation name.
func GetSIMDImplName() string {
return C.GoString(C.fh_get_simd_impl_name())
}
// HasSIMDSHA256 returns true if SIMD SHA256 is available.
func HasSIMDSHA256() bool {
return C.fh_has_simd_sha256() == 1
}
// ScanArtifactsNative falls back to Go implementation.
func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
return ScanArtifacts(runDir)
}
// ExtractTarGzNative falls back to Go implementation.
func ExtractTarGzNative(archivePath, dstDir string) error {
return ExtractTarGz(archivePath, dstDir)
}
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
// This allows explicit native library usage when -tags native_libs is enabled.
func DirOverallSHA256HexNative(root string) (string, error) {
return dirOverallSHA256HexNative(root)
}

View file

@ -9,11 +9,6 @@ import (
"github.com/jfraeys/fetch_ml/internal/manifest"
)
// dirOverallSHA256HexNative is not available without CGO.
func dirOverallSHA256HexNative(root string) (string, error) {
return "", errors.New("native hash requires CGO")
}
// HashFilesBatchNative is not available without CGO.
func HashFilesBatchNative(paths []string) ([]string, error) {
return nil, errors.New("native batch hash requires CGO")

View file

@ -31,7 +31,11 @@ if(NVML_LIBRARY AND NVML_INCLUDE_DIR)
message(STATUS "Found NVML: ${NVML_LIBRARY}")
message(STATUS "NVML include: ${NVML_INCLUDE_DIR}")
else()
message(WARNING "NVML not found. GPU monitoring will be disabled.")
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
message(WARNING "NVML not found. NVIDIA GPU monitoring will be disabled.")
else()
message(STATUS "NVML not available on ${CMAKE_SYSTEM_NAME}. Using platform-specific GPU monitoring.")
endif()
# Create stub library
target_compile_definitions(nvml_gpu PRIVATE NVML_STUB)
endif()

View file

@ -0,0 +1,272 @@
#include "nvml_dynamic.h"
#include <string.h>
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <dlfcn.h>
#endif
// NVML type definitions (from nvml.h)
typedef int nvmlReturn_t;
typedef void* nvmlDevice_t;
typedef struct {
unsigned int gpu;
unsigned int memory;
} nvmlUtilization_t;
typedef struct {
unsigned long long total;
unsigned long long free;
unsigned long long used;
} nvmlMemory_t;
// Function pointer types
typedef nvmlReturn_t (*nvmlInit_v2_fn)(void);
typedef nvmlReturn_t (*nvmlShutdown_fn)(void);
typedef nvmlReturn_t (*nvmlSystemGetDriverVersion_fn)(char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetCount_fn)(unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetHandleByIndex_v2_fn)(unsigned int, nvmlDevice_t*);
typedef nvmlReturn_t (*nvmlDeviceGetName_fn)(nvmlDevice_t, char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetUtilizationRates_fn)(nvmlDevice_t, nvmlUtilization_t*);
typedef nvmlReturn_t (*nvmlDeviceGetMemoryInfo_fn)(nvmlDevice_t, nvmlMemory_t*);
typedef nvmlReturn_t (*nvmlDeviceGetTemperature_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetPowerUsage_fn)(nvmlDevice_t, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetClockInfo_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetPcieThroughput_fn)(nvmlDevice_t, unsigned int, unsigned int*);
typedef nvmlReturn_t (*nvmlDeviceGetUUID_fn)(nvmlDevice_t, char*, unsigned int);
typedef nvmlReturn_t (*nvmlDeviceGetVbiosVersion_fn)(nvmlDevice_t, char*, unsigned int);
// NVML constants
#define NVML_SUCCESS 0
#define NVML_TEMPERATURE_GPU 0
#define NVML_CLOCK_SM 0
#define NVML_CLOCK_MEM 1
#define NVML_PCIE_UTIL_TX_BYTES 0
#define NVML_PCIE_UTIL_RX_BYTES 1
struct nvml_dynamic {
void* handle;
char last_error[256];
int available;
// Function pointers
nvmlInit_v2_fn init;
nvmlShutdown_fn shutdown;
nvmlSystemGetDriverVersion_fn get_driver_version;
nvmlDeviceGetCount_fn get_count;
nvmlDeviceGetHandleByIndex_v2_fn get_handle_by_index;
nvmlDeviceGetName_fn get_name;
nvmlDeviceGetUtilizationRates_fn get_utilization;
nvmlDeviceGetMemoryInfo_fn get_memory;
nvmlDeviceGetTemperature_fn get_temperature;
nvmlDeviceGetPowerUsage_fn get_power_usage;
nvmlDeviceGetClockInfo_fn get_clock;
nvmlDeviceGetUUID_fn get_uuid;
nvmlDeviceGetVbiosVersion_fn get_vbios;
};
static void set_error(nvml_dynamic_t* nvml, const char* msg) {
if (nvml) {
strncpy(nvml->last_error, msg, sizeof(nvml->last_error) - 1);
nvml->last_error[sizeof(nvml->last_error) - 1] = '\0';
}
}
#ifdef _WIN32
static void* load_lib(const char* name) {
return LoadLibraryA(name);
}
static void* get_sym(void* handle, const char* name) {
return (void*)GetProcAddress((HMODULE)handle, name);
}
static void close_lib(void* handle) {
FreeLibrary((HMODULE)handle);
}
#else
static void* load_lib(const char* name) {
return dlopen(name, RTLD_NOW);
}
static void* get_sym(void* handle, const char* name) {
return dlsym(handle, name);
}
static void close_lib(void* handle) {
dlclose(handle);
}
#endif
nvml_dynamic_t* nvml_load(void) {
nvml_dynamic_t* nvml = (nvml_dynamic_t*)calloc(1, sizeof(nvml_dynamic_t));
if (!nvml) return NULL;
// Try to load NVML library
#ifdef _WIN32
nvml->handle = load_lib("nvml.dll");
if (!nvml->handle) {
nvml->handle = load_lib("C:\\Windows\\System32\\nvml.dll");
}
#else
nvml->handle = load_lib("libnvidia-ml.so.1");
if (!nvml->handle) {
nvml->handle = load_lib("libnvidia-ml.so");
}
#endif
if (!nvml->handle) {
set_error(nvml, "NVML library not found - NVIDIA driver may not be installed");
nvml->available = 0;
return nvml;
}
// Load function pointers
nvml->init = (nvmlInit_v2_fn)get_sym(nvml->handle, "nvmlInit_v2");
nvml->shutdown = (nvmlShutdown_fn)get_sym(nvml->handle, "nvmlShutdown");
nvml->get_driver_version = (nvmlSystemGetDriverVersion_fn)get_sym(nvml->handle, "nvmlSystemGetDriverVersion");
nvml->get_count = (nvmlDeviceGetCount_fn)get_sym(nvml->handle, "nvmlDeviceGetCount");
nvml->get_handle_by_index = (nvmlDeviceGetHandleByIndex_v2_fn)get_sym(nvml->handle, "nvmlDeviceGetHandleByIndex_v2");
nvml->get_name = (nvmlDeviceGetName_fn)get_sym(nvml->handle, "nvmlDeviceGetName");
nvml->get_utilization = (nvmlDeviceGetUtilizationRates_fn)get_sym(nvml->handle, "nvmlDeviceGetUtilizationRates");
nvml->get_memory = (nvmlDeviceGetMemoryInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetMemoryInfo");
nvml->get_temperature = (nvmlDeviceGetTemperature_fn)get_sym(nvml->handle, "nvmlDeviceGetTemperature");
nvml->get_power_usage = (nvmlDeviceGetPowerUsage_fn)get_sym(nvml->handle, "nvmlDeviceGetPowerUsage");
nvml->get_clock = (nvmlDeviceGetClockInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetClockInfo");
nvml->get_uuid = (nvmlDeviceGetUUID_fn)get_sym(nvml->handle, "nvmlDeviceGetUUID");
nvml->get_vbios = (nvmlDeviceGetVbiosVersion_fn)get_sym(nvml->handle, "nvmlDeviceGetVbiosVersion");
// Check required functions
if (!nvml->init || !nvml->shutdown || !nvml->get_count || !nvml->get_handle_by_index) {
set_error(nvml, "Failed to load required NVML functions");
close_lib(nvml->handle);
nvml->handle = NULL;
nvml->available = 0;
return nvml;
}
// Initialize NVML
nvmlReturn_t result = nvml->init();
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to initialize NVML");
close_lib(nvml->handle);
nvml->handle = NULL;
nvml->available = 0;
return nvml;
}
nvml->available = 1;
return nvml;
}
void nvml_unload(nvml_dynamic_t* nvml) {
if (!nvml) return;
if (nvml->handle) {
if (nvml->shutdown) {
nvml->shutdown();
}
close_lib(nvml->handle);
}
free(nvml);
}
int nvml_is_available(const nvml_dynamic_t* nvml) {
return nvml ? nvml->available : 0;
}
const char* nvml_last_error(const nvml_dynamic_t* nvml) {
return nvml ? nvml->last_error : "NULL nvml handle";
}
int nvml_get_gpu_count(nvml_dynamic_t* nvml) {
if (!nvml || !nvml->available || !nvml->get_count) {
return -1;
}
unsigned int count = 0;
nvmlReturn_t result = nvml->get_count(&count);
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to get GPU count");
return -1;
}
return (int)count;
}
int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info) {
if (!nvml || !nvml->available || !info) {
return -1;
}
memset(info, 0, sizeof(*info));
info->index = index;
nvmlDevice_t device;
nvmlReturn_t result = nvml->get_handle_by_index(index, &device);
if (result != NVML_SUCCESS) {
set_error(nvml, "Failed to get device handle");
return -1;
}
// Get name
if (nvml->get_name) {
nvml->get_name(device, info->name, sizeof(info->name));
}
// Get utilization
if (nvml->get_utilization) {
nvmlUtilization_t util;
result = nvml->get_utilization(device, &util);
if (result == NVML_SUCCESS) {
info->utilization = util.gpu;
}
}
// Get memory
if (nvml->get_memory) {
nvmlMemory_t mem;
result = nvml->get_memory(device, &mem);
if (result == NVML_SUCCESS) {
info->memory_used = mem.used;
info->memory_total = mem.total;
}
}
// Get temperature
if (nvml->get_temperature) {
unsigned int temp;
result = nvml->get_temperature(device, NVML_TEMPERATURE_GPU, &temp);
if (result == NVML_SUCCESS) {
info->temperature = temp;
}
}
// Get power usage
if (nvml->get_power_usage) {
unsigned int power;
result = nvml->get_power_usage(device, &power);
if (result == NVML_SUCCESS) {
info->power_draw = power;
}
}
// Get clocks
if (nvml->get_clock) {
unsigned int clock;
result = nvml->get_clock(device, NVML_CLOCK_SM, &clock);
if (result == NVML_SUCCESS) {
info->clock_sm = clock;
}
result = nvml->get_clock(device, NVML_CLOCK_MEM, &clock);
if (result == NVML_SUCCESS) {
info->clock_memory = clock;
}
}
// Get UUID
if (nvml->get_uuid) {
nvml->get_uuid(device, info->uuid, sizeof(info->uuid));
}
// Get VBIOS version
if (nvml->get_vbios) {
nvml->get_vbios(device, info->vbios_version, sizeof(info->vbios_version));
}
return 0;
}

View file

@ -0,0 +1,53 @@
#ifndef NVML_DYNAMIC_H
#define NVML_DYNAMIC_H
#include <stdint.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
// Opaque handle
typedef struct nvml_dynamic nvml_dynamic_t;
// GPU info structure
typedef struct {
uint32_t index;
char name[256];
uint32_t utilization; // GPU utilization (0-100)
uint64_t memory_used; // Memory used in bytes
uint64_t memory_total; // Total memory in bytes
uint32_t temperature; // Temperature in Celsius
uint32_t power_draw; // Power draw in milliwatts
uint32_t clock_sm; // SM clock in MHz
uint32_t clock_memory; // Memory clock in MHz
uint32_t pcie_gen; // PCIe generation
uint32_t pcie_width; // PCIe link width
char uuid[64]; // GPU UUID
char vbios_version[32]; // VBIOS version
} gpu_info_t;
// Load NVML dynamically (returns NULL if not available)
nvml_dynamic_t* nvml_load(void);
// Unload NVML and free resources
void nvml_unload(nvml_dynamic_t* nvml);
// Check if NVML is available and loaded
int nvml_is_available(const nvml_dynamic_t* nvml);
// Get number of GPUs (-1 on error)
int nvml_get_gpu_count(nvml_dynamic_t* nvml);
// Get GPU info by index (returns 0 on success)
int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info);
// Get last error message
const char* nvml_last_error(const nvml_dynamic_t* nvml);
#ifdef __cplusplus
}
#endif
#endif // NVML_DYNAMIC_H