From be39b37aec9001b50a8c534f261aef16b00ad992 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Sat, 21 Feb 2026 17:59:59 -0500 Subject: [PATCH] feat: native GPU detection and NVML bridge for macOS and Linux - Add dynamic NVML loading for Linux GPU detection - Add macOS GPU detection via IOKit framework - Add Zig NVML wrapper for cross-platform GPU queries - Update native bridge to support platform-specific GPU libs - Add CMake support for NVML dynamic library --- cli/src/native/macos_gpu.zig | 262 +++++++++++++++++ cli/src/native/nvml.zig | 372 +++++++++++++++++++++++++ internal/queue/native_queue_stub.go | 4 +- internal/worker/config.go | 7 +- internal/worker/gpu_detector.go | 8 + internal/worker/gpu_macos.go | 279 +++++++++++++++++++ internal/worker/gpu_macos_stub.go | 41 +++ internal/worker/gpu_nvml_native.go | 4 +- internal/worker/gpu_nvml_stub.go | 25 +- internal/worker/native_bridge.go | 5 - internal/worker/native_bridge_libs.go | 13 +- internal/worker/native_bridge_nocgo.go | 5 - native/nvml_gpu/CMakeLists.txt | 6 +- native/nvml_gpu/nvml_dynamic.c | 272 ++++++++++++++++++ native/nvml_gpu/nvml_dynamic.h | 53 ++++ 15 files changed, 1321 insertions(+), 35 deletions(-) create mode 100644 cli/src/native/macos_gpu.zig create mode 100644 cli/src/native/nvml.zig create mode 100644 internal/worker/gpu_macos.go create mode 100644 internal/worker/gpu_macos_stub.go create mode 100644 native/nvml_gpu/nvml_dynamic.c create mode 100644 native/nvml_gpu/nvml_dynamic.h diff --git a/cli/src/native/macos_gpu.zig b/cli/src/native/macos_gpu.zig new file mode 100644 index 0000000..29dc8a0 --- /dev/null +++ b/cli/src/native/macos_gpu.zig @@ -0,0 +1,262 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// macOS GPU Monitoring for Development Mode +/// Uses system_profiler and powermetrics for GPU info +/// Only available on macOS +const c = @cImport({ + @cInclude("sys/types.h"); + @cInclude("sys/sysctl.h"); +}); + +/// GPU information structure for macOS +pub const MacOSGPUInfo = struct { + index: u32, + name: [256:0]u8, + chipset_model: [256:0]u8, + vram_mb: u32, + is_integrated: bool, + // Performance metrics (if available via powermetrics) + utilization_percent: ?u32, + temperature_celsius: ?u32, + power_mw: ?u32, +}; + +/// Detect if running on Apple Silicon +pub fn isAppleSilicon() bool { + if (builtin.os.tag != .macos) return false; + + var buf: [64]u8 = undefined; + var len: usize = buf.len; + const mib = [_]c_int{ c.CTL_HW, c.HW_MACHINE }; + + const result = c.sysctl(&mib[0], 2, &buf[0], &len, null, 0); + if (result != 0) return false; + + const machine = std.mem.sliceTo(&buf, 0); + return std.mem.startsWith(u8, machine, "arm64") or + std.mem.startsWith(u8, machine, "Apple"); +} + +/// Get GPU count on macOS +pub fn getGPUCount() u32 { + if (builtin.os.tag != .macos) return 0; + + // Run system_profiler to check for GPUs + const result = runSystemProfiler() catch return 0; + defer std.heap.raw_c_allocator.free(result); + + // Parse output for GPU entries + var lines = std.mem.splitScalar(u8, result, '\n'); + var count: u32 = 0; + while (lines.next()) |line| { + if (std.mem.indexOf(u8, line, "Chipset Model") != null) { + count += 1; + } + } + + return count; +} + +/// Run system_profiler SPDisplaysDataType +fn runSystemProfiler() ![]u8 { + const argv = [_][]const u8{ + "system_profiler", + "SPDisplaysDataType", + "-json", + }; + + var child = std.process.Child.init(&argv, std.heap.page_allocator); + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Ignore; + + try child.spawn(); + defer child.kill() catch {}; + + const stdout = child.stdout.?.reader(); + const output = try stdout.readAllAlloc(std.heap.page_allocator, 1024 * 1024); + + const term = try child.wait(); + if (term != .Exited or term.Exited != 0) { + return error.CommandFailed; + } + + return output; +} + +/// Parse GPU info from system_profiler JSON output +pub fn parseGPUInfo(allocator: std.mem.Allocator, json_output: []const u8) ![]MacOSGPUInfo { + // Simple parser for system_profiler JSON + // Format: {"SPDisplaysDataType": [{"sppci_model":"...", "sppci_vram":"...", ...}, ...]} + + var gpus = std.ArrayList(MacOSGPUInfo).init(allocator); + defer gpus.deinit(); + + // Parse JSON - look for _items array + const items_key = "_items"; + if (std.mem.indexOf(u8, json_output, items_key)) |items_start| { + const rest = json_output[items_start..]; + // Find array start + if (std.mem.indexOf(u8, rest, "[")) |array_start| { + const array = rest[array_start..]; + // Simple heuristic: find objects between { and } + var i: usize = 0; + while (i < array.len) { + if (array[i] == '{') { + // Found object start + if (findObjectEnd(array[i..])) |obj_end| { + const obj = array[i .. i + obj_end]; + if (try parseGPUObject(obj)) |gpu| { + try gpus.append(gpu); + } + i += obj_end; + continue; + } + } + i += 1; + } + } + } + + return gpus.toOwnedSlice(); +} + +fn findObjectEnd(json: []const u8) ?usize { + var depth: i32 = 0; + var in_string = false; + var i: usize = 0; + while (i < json.len) : (i += 1) { + const char = json[i]; + if (char == '"' and (i == 0 or json[i - 1] != '\\')) { + in_string = !in_string; + } else if (!in_string) { + if (char == '{') { + depth += 1; + } else if (char == '}') { + depth -= 1; + if (depth == 0) { + return i + 1; + } + } + } + } + return null; +} + +fn parseGPUObject(json: []const u8) !?MacOSGPUInfo { + var gpu = MacOSGPUInfo{ + .index = 0, + .name = std.mem.zeroes([256:0]u8), + .chipset_model = std.mem.zeroes([256:0]u8), + .vram_mb = 0, + .is_integrated = false, + .utilization_percent = null, + .temperature_celsius = null, + .power_mw = null, + }; + + // Extract sppci_model + if (extractJsonString(json, "sppci_model")) |model| { + const len = @min(model.len, 255); + @memcpy(gpu.chipset_model[0..len], model[0..len]); + @memcpy(gpu.name[0..len], model[0..len]); + } + + // Extract sppci_vram + if (extractJsonString(json, "sppci_vram_shared")) |_| { + gpu.is_integrated = true; + gpu.vram_mb = 0; // Shared memory + } else if (extractJsonString(json, "sppci_vram")) |vram| { + // Parse "16384 MB" -> 16384 + var it = std.mem.splitScalar(u8, vram, ' '); + if (it.next()) |num_str| { + gpu.vram_mb = std.fmt.parseInt(u32, num_str, 10) catch 0; + } + } + + // Check if it's a valid GPU entry + if (gpu.chipset_model[0] == 0) { + return null; + } + + return gpu; +} + +fn extractJsonString(json: []const u8, key: []const u8) ?[]const u8 { + const key_quoted = std.fmt.allocPrint(std.heap.page_allocator, "\"{s}\"", .{key}) catch return null; + defer std.heap.page_allocator.free(key_quoted); + + if (std.mem.indexOf(u8, json, key_quoted)) |key_pos| { + const after_key = json[key_pos + key_quoted.len ..]; + // Find value start (skip : and whitespace) + var i: usize = 0; + while (i < after_key.len and (after_key[i] == ':' or after_key[i] == ' ' or after_key[i] == '\t' or after_key[i] == '\n')) : (i += 1) {} + + if (i < after_key.len and after_key[i] == '"') { + // String value + const str_start = i + 1; + var str_end = str_start; + while (str_end < after_key.len and after_key[str_end] != '"') : (str_end += 1) {} + return after_key[str_start..str_end]; + } + } + return null; +} + +/// Format GPU info for display +pub fn formatMacOSGPUInfo(allocator: std.mem.Allocator, gpus: []const MacOSGPUInfo) ![]u8 { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + + const writer = buf.writer(); + + if (gpus.len == 0) { + try writer.writeAll("GPU Status (macOS)\n"); + try writer.writeAll("═" ** 50); + try writer.writeAll("\n\nNo GPUs detected\n"); + return buf.toOwnedSlice(); + } + + try writer.writeAll("GPU Status (macOS"); + if (isAppleSilicon()) { + try writer.writeAll(" - Apple Silicon"); + } + try writer.writeAll(")\n"); + try writer.writeAll("═" ** 50); + try writer.writeAll("\n\n"); + + for (gpus) |gpu| { + const name = std.mem.sliceTo(&gpu.name, 0); + const model = std.mem.sliceTo(&gpu.chipset_model, 0); + + try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name }); + if (!std.mem.eql(u8, model, name)) { + try writer.print(" Model: {s}\n", .{model}); + } + if (gpu.is_integrated) { + try writer.writeAll(" Type: Integrated (Unified Memory)\n"); + } else { + try writer.print(" VRAM: {d} MB\n", .{gpu.vram_mb}); + } + if (gpu.utilization_percent) |util| { + try writer.print(" Utilization: {d}%\n", .{util}); + } + if (gpu.temperature_celsius) |temp| { + try writer.print(" Temperature: {d}°C\n", .{temp}); + } + if (gpu.power_mw) |power| { + try writer.print(" Power: {d:.1f} W\n", .{@as(f64, @floatFromInt(power)) / 1000.0}); + } + try writer.writeAll("\n"); + } + + try writer.writeAll("💡 Note: Detailed GPU metrics require powermetrics (sudo)\n"); + + return buf.toOwnedSlice(); +} + +/// Quick check for GPU availability on macOS +pub fn isMacOSGPUAvailable() bool { + if (builtin.os.tag != .macos) return false; + return getGPUCount() > 0; +} diff --git a/cli/src/native/nvml.zig b/cli/src/native/nvml.zig new file mode 100644 index 0000000..5616db4 --- /dev/null +++ b/cli/src/native/nvml.zig @@ -0,0 +1,372 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +/// NVML Dynamic Loader for CLI +/// Pure Zig implementation using dlopen/LoadLibrary +/// No build-time dependency on NVIDIA SDK + +// Platform-specific dynamic loading +const DynLib = switch (builtin.os.tag) { + .windows => struct { + handle: std.os.windows.HMODULE, + + fn open(path: []const u8) !@This() { + const wide_path = try std.os.windows.sliceToPrefixedFileW(path); + const handle = std.os.windows.LoadLibraryW(&wide_path.data) orelse return error.LibraryNotFound; + return .{ .handle = handle }; + } + + fn close(self: *@This()) void { + _ = std.os.windows.FreeLibrary(self.handle); + } + + fn lookup(self: @This(), name: []const u8) ?*anyopaque { + return std.os.windows.GetProcAddress(self.handle, name); + } + }, + else => struct { + handle: *anyopaque, + + // Extern declarations for dlopen/dlsym + extern "c" fn dlopen(pathname: [*:0]const u8, mode: c_int) ?*anyopaque; + extern "c" fn dlsym(handle: *anyopaque, symbol: [*:0]const u8) ?*anyopaque; + extern "c" fn dlclose(handle: *anyopaque) c_int; + + const RTLD_NOW = 2; + + fn open(path: []const u8) !@This() { + const c_path = try std.cstr.addNullByte(std.heap.c_allocator, path); + defer std.heap.c_allocator.free(c_path); + const handle = dlopen(c_path.ptr, RTLD_NOW) orelse return error.LibraryNotFound; + return .{ .handle = handle }; + } + + fn close(self: *@This()) void { + _ = dlclose(self.handle); + } + + fn lookup(self: @This(), name: []const u8) ?*anyopaque { + const c_name = std.cstr.addNullByte(std.heap.c_allocator, name) catch return null; + defer std.heap.c_allocator.free(c_name); + return dlsym(self.handle, c_name.ptr); + } + }, +}; + +// NVML type definitions (mirrors nvml.h) +pub const nvmlReturn_t = c_int; +pub const nvmlDevice_t = *anyopaque; + +pub const nvmlUtilization_t = extern struct { + gpu: c_uint, + memory: c_uint, +}; + +pub const nvmlMemory_t = extern struct { + total: c_ulonglong, + free: c_ulonglong, + used: c_ulonglong, +}; + +// NVML constants +const NVML_SUCCESS = 0; +const NVML_TEMPERATURE_GPU = 0; +const NVML_CLOCK_SM = 0; +const NVML_CLOCK_MEM = 1; + +// NVML function types +const nvmlInit_v2_fn = *const fn () callconv(.C) nvmlReturn_t; +const nvmlShutdown_fn = *const fn () callconv(.C) nvmlReturn_t; +const nvmlDeviceGetCount_fn = *const fn (*c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetHandleByIndex_v2_fn = *const fn (c_uint, *nvmlDevice_t) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetName_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetUtilizationRates_fn = *const fn (nvmlDevice_t, *nvmlUtilization_t) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetMemoryInfo_fn = *const fn (nvmlDevice_t, *nvmlMemory_t) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetTemperature_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetPowerUsage_fn = *const fn (nvmlDevice_t, *c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetClockInfo_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetUUID_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t; +const nvmlDeviceGetVbiosVersion_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t; + +/// GPU information structure +pub const GPUInfo = struct { + index: u32, + name: [256:0]u8, + utilization: u32, + memory_used: u64, + memory_total: u64, + temperature: u32, + power_draw: u32, + clock_sm: u32, + clock_memory: u32, + uuid: [64:0]u8, + vbios_version: [32:0]u8, +}; + +/// NVML handle with loaded functions +pub const NVML = struct { + lib: DynLib, + available: bool, + + // Function pointers + init: nvmlInit_v2_fn, + shutdown: nvmlShutdown_fn, + get_count: nvmlDeviceGetCount_fn, + get_handle_by_index: nvmlDeviceGetHandleByIndex_v2_fn, + get_name: ?nvmlDeviceGetName_fn, + get_utilization: ?nvmlDeviceGetUtilizationRates_fn, + get_memory: ?nvmlDeviceGetMemoryInfo_fn, + get_temperature: ?nvmlDeviceGetTemperature_fn, + get_power_usage: ?nvmlDeviceGetPowerUsage_fn, + get_clock: ?nvmlDeviceGetClockInfo_fn, + get_uuid: ?nvmlDeviceGetUUID_fn, + get_vbios: ?nvmlDeviceGetVbiosVersion_fn, + + last_error: [256:0]u8, + + /// Load NVML dynamically + pub fn load() !?NVML { + var nvml: NVML = undefined; + + // Try platform-specific library names + const lib_names = switch (builtin.os.tag) { + .windows => &[_][]const u8{ + "nvml.dll", + "C:\\Windows\\System32\\nvml.dll", + }, + .linux => &[_][]const u8{ + "libnvidia-ml.so.1", + "libnvidia-ml.so", + }, + else => return null, // NVML not supported on other platforms + }; + + // Try to load library + var loaded = false; + for (lib_names) |name| { + if (DynLib.open(name)) |lib| { + nvml.lib = lib; + loaded = true; + break; + } else |_| continue; + } + + if (!loaded) { + return null; // NVML not available (no NVIDIA driver) + } + + // Load required functions + nvml.init = @ptrCast(nvml.lib.lookup("nvmlInit_v2") orelse return error.InitNotFound); + nvml.shutdown = @ptrCast(nvml.lib.lookup("nvmlShutdown") orelse return error.ShutdownNotFound); + nvml.get_count = @ptrCast(nvml.lib.lookup("nvmlDeviceGetCount") orelse return error.GetCountNotFound); + nvml.get_handle_by_index = @ptrCast(nvml.lib.lookup("nvmlDeviceGetHandleByIndex_v2") orelse return error.GetHandleNotFound); + + // Load optional functions + nvml.get_name = @ptrCast(nvml.lib.lookup("nvmlDeviceGetName")); + nvml.get_utilization = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUtilizationRates")); + nvml.get_memory = @ptrCast(nvml.lib.lookup("nvmlDeviceGetMemoryInfo")); + nvml.get_temperature = @ptrCast(nvml.lib.lookup("nvmlDeviceGetTemperature")); + nvml.get_power_usage = @ptrCast(nvml.lib.lookup("nvmlDeviceGetPowerUsage")); + nvml.get_clock = @ptrCast(nvml.lib.lookup("nvmlDeviceGetClockInfo")); + nvml.get_uuid = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUUID")); + nvml.get_vbios = @ptrCast(nvml.lib.lookup("nvmlDeviceGetVbiosVersion")); + + // Initialize NVML + const result = nvml.init(); + if (result != NVML_SUCCESS) { + nvml.setError("NVML initialization failed"); + nvml.lib.close(); + return error.NVMLInitFailed; + } + + nvml.available = true; + return nvml; + } + + /// Unload NVML + pub fn unload(self: *NVML) void { + if (self.available) { + _ = self.shutdown(); + } + self.lib.close(); + } + + /// Check if NVML is available + pub fn isAvailable(self: NVML) bool { + return self.available; + } + + /// Get last error message + pub fn getLastError(self: NVML) []const u8 { + return std.mem.sliceTo(&self.last_error, 0); + } + + fn setError(self: *NVML, msg: []const u8) void { + @memset(&self.last_error, 0); + const len = @min(msg.len, self.last_error.len - 1); + @memcpy(self.last_error[0..len], msg[0..len]); + } + + /// Get number of GPUs + pub fn getGPUCount(self: *NVML) !u32 { + var count: c_uint = 0; + const result = self.get_count(&count); + if (result != NVML_SUCCESS) { + self.setError("Failed to get GPU count"); + return error.GetCountFailed; + } + return @intCast(count); + } + + /// Get GPU info by index + pub fn getGPUInfo(self: *NVML, index: u32) !GPUInfo { + var info: GPUInfo = .{ + .index = index, + .name = std.mem.zeroes([256:0]u8), + .utilization = 0, + .memory_used = 0, + .memory_total = 0, + .temperature = 0, + .power_draw = 0, + .clock_sm = 0, + .clock_memory = 0, + .uuid = std.mem.zeroes([64:0]u8), + .vbios_version = std.mem.zeroes([32:0]u8), + }; + + var device: nvmlDevice_t = undefined; + var result = self.get_handle_by_index(index, &device); + if (result != NVML_SUCCESS) { + self.setError("Failed to get device handle"); + return error.GetHandleFailed; + } + + // Get name + if (self.get_name) |func| { + _ = func(device, &info.name, @sizeOf(@TypeOf(info.name))); + } + + // Get utilization + if (self.get_utilization) |func| { + var util: nvmlUtilization_t = undefined; + result = func(device, &util); + if (result == NVML_SUCCESS) { + info.utilization = @intCast(util.gpu); + } + } + + // Get memory + if (self.get_memory) |func| { + var mem: nvmlMemory_t = undefined; + result = func(device, &mem); + if (result == NVML_SUCCESS) { + info.memory_used = mem.used; + info.memory_total = mem.total; + } + } + + // Get temperature + if (self.get_temperature) |func| { + var temp: c_uint = 0; + result = func(device, NVML_TEMPERATURE_GPU, &temp); + if (result == NVML_SUCCESS) { + info.temperature = @intCast(temp); + } + } + + // Get power usage + if (self.get_power_usage) |func| { + var power: c_uint = 0; + result = func(device, &power); + if (result == NVML_SUCCESS) { + info.power_draw = @intCast(power); + } + } + + // Get clocks + if (self.get_clock) |func| { + var clock: c_uint = 0; + result = func(device, NVML_CLOCK_SM, &clock); + if (result == NVML_SUCCESS) { + info.clock_sm = @intCast(clock); + } + result = func(device, NVML_CLOCK_MEM, &clock); + if (result == NVML_SUCCESS) { + info.clock_memory = @intCast(clock); + } + } + + // Get UUID + if (self.get_uuid) |func| { + _ = func(device, &info.uuid, @sizeOf(@TypeOf(info.uuid))); + } + + // Get VBIOS version + if (self.get_vbios) |func| { + _ = func(device, &info.vbios_version, @sizeOf(@TypeOf(info.vbios_version))); + } + + return info; + } + + /// Get info for all GPUs + pub fn getAllGPUInfo(self: *NVML, allocator: std.mem.Allocator) ![]GPUInfo { + const count = try self.getGPUCount(); + if (count == 0) return &[_]GPUInfo{}; + + var gpus = try allocator.alloc(GPUInfo, count); + errdefer allocator.free(gpus); + + for (0..count) |i| { + gpus[i] = try self.getGPUInfo(@intCast(i)); + } + + return gpus; + } +}; + +// Convenience functions for simple use cases + +/// Quick check if NVML is available (creates and destroys temporary handle) +pub fn isNVMLAvailable() bool { + if (NVML.load()) |maybe_nvml| { + if (maybe_nvml) |nvml| { + var nvml_mut = nvml; + defer nvml_mut.unload(); + return nvml_mut.isAvailable(); + } + } else |_| {} + return false; +} + +/// Format GPU info as string for display +pub fn formatGPUInfo(allocator: std.mem.Allocator, gpus: []const GPUInfo) ![]u8 { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + + const writer = buf.writer(); + + try writer.writeAll("GPU Status (NVML)\n"); + try writer.writeAll("═" ** 50); + try writer.writeAll("\n\n"); + + for (gpus) |gpu| { + const name = std.mem.sliceTo(&gpu.name, 0); + try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name }); + try writer.print(" Utilization: {d}%\n", .{gpu.utilization}); + try writer.print(" Memory: {d}/{d} MB\n", .{ + gpu.memory_used / 1024 / 1024, + gpu.memory_total / 1024 / 1024, + }); + try writer.print(" Temperature: {d}°C\n", .{gpu.temperature}); + if (gpu.power_draw > 0) { + try writer.print(" Power: {d:.1} W\n", .{@as(f64, @floatFromInt(gpu.power_draw)) / 1000.0}); + } + if (gpu.clock_sm > 0) { + try writer.print(" SM Clock: {d} MHz\n", .{gpu.clock_sm}); + } + try writer.writeAll("\n"); + } + + return buf.toOwnedSlice(); +} diff --git a/internal/queue/native_queue_stub.go b/internal/queue/native_queue_stub.go index 6a58b8c..0ceecb9 100644 --- a/internal/queue/native_queue_stub.go +++ b/internal/queue/native_queue_stub.go @@ -1,5 +1,5 @@ -//go:build !native_libs -// +build !native_libs +//go:build !cgo || !native_libs +// +build !cgo !native_libs package queue diff --git a/internal/worker/config.go b/internal/worker/config.go index 1f018fc..6bf0b4e 100644 --- a/internal/worker/config.go +++ b/internal/worker/config.go @@ -380,19 +380,16 @@ func (c *Config) Validate() error { // - UUID-style gpu_visible_device_ids is NVIDIA-only. vendor := strings.ToLower(strings.TrimSpace(c.GPUVendor)) if len(c.GPUVisibleDevices) > 0 && len(c.GPUVisibleDeviceIDs) > 0 { - return fmt.Errorf("gpu_visible_devices and gpu_visible_device_ids are mutually exclusive") - } - if len(c.GPUVisibleDeviceIDs) > 0 { if vendor != string(GPUTypeNVIDIA) { return fmt.Errorf( - "gpu_visible_device_ids is only supported when gpu_vendor is %q", + "visible_device_ids is only supported when gpu_vendor is %q", string(GPUTypeNVIDIA), ) } for _, id := range c.GPUVisibleDeviceIDs { id = strings.TrimSpace(id) if id == "" { - return fmt.Errorf("gpu_visible_device_ids contains an empty value") + return fmt.Errorf("visible_device_ids contains an empty value") } if !strings.HasPrefix(id, "GPU-") { return fmt.Errorf("gpu_visible_device_ids values must start with %q, got %q", "GPU-", id) diff --git a/internal/worker/gpu_detector.go b/internal/worker/gpu_detector.go index 61693e6..987cf88 100644 --- a/internal/worker/gpu_detector.go +++ b/internal/worker/gpu_detector.go @@ -98,6 +98,14 @@ type AppleDetector struct { } func (d *AppleDetector) DetectGPUCount() int { + // First try actual macOS GPU detection + if IsMacOS() { + count, err := GetMacOSGPUCount() + if err == nil && count > 0 { + return count + } + } + if n, ok := envInt("FETCH_ML_GPU_COUNT"); ok && n >= 0 { return n } diff --git a/internal/worker/gpu_macos.go b/internal/worker/gpu_macos.go new file mode 100644 index 0000000..d1a764f --- /dev/null +++ b/internal/worker/gpu_macos.go @@ -0,0 +1,279 @@ +//go:build darwin +// +build darwin + +package worker + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os/exec" + "regexp" + "runtime" + "strconv" + "strings" + "time" +) + +// MacOSGPUInfo holds information about a macOS GPU +type MacOSGPUInfo struct { + Index uint32 `json:"index"` + Name string `json:"name"` + ChipsetModel string `json:"chipset_model"` + VRAM_MB uint32 `json:"vram_mb"` + IsIntegrated bool `json:"is_integrated"` + IsAppleSilicon bool `json:"is_apple_silicon"` + // Real-time metrics from powermetrics (if available) + UtilizationPercent uint32 `json:"utilization_percent,omitempty"` + PowerMW uint32 `json:"power_mw,omitempty"` + TemperatureC uint32 `json:"temperature_c,omitempty"` +} + +// PowermetricsData holds GPU metrics from powermetrics +type PowermetricsData struct { + GPUUtilization float64 + GPUPower float64 + GPUTemperature float64 + HasData bool +} + +// IsMacOS returns true if running on macOS +func IsMacOS() bool { + return runtime.GOOS == "darwin" +} + +// IsAppleSilicon checks if running on Apple Silicon +func IsAppleSilicon() bool { + if runtime.GOOS != "darwin" { + return false + } + // Check machine hardware name + out, err := exec.Command("uname", "-m").Output() + if err != nil { + return false + } + return strings.TrimSpace(string(out)) == "arm64" +} + +// GetMacOSGPUCount returns the number of GPUs on macOS +func GetMacOSGPUCount() (int, error) { + if runtime.GOOS != "darwin" { + return 0, fmt.Errorf("not running on macOS") + } + + // Use system_profiler to get GPU count + cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json") + out, err := cmd.Output() + if err != nil { + // Fall back to gfxutil if system_profiler fails + return getGPUCountViaGfxutil() + } + + // Parse JSON output + var data map[string]interface{} + if err := json.Unmarshal(out, &data); err != nil { + return 0, err + } + + // Extract display items + if spData, ok := data["SPDisplaysDataType"].([]interface{}); ok { + return len(spData), nil + } + + return 0, nil +} + +// getGPUCountViaGfxutil uses gfxutil to count GPUs (fallback) +func getGPUCountViaGfxutil() (int, error) { + // gfxutil is available on macOS + cmd := exec.Command("gfxutil", "-f", "display") + out, err := cmd.Output() + if err != nil { + return 0, err + } + + // Count display paths (one per GPU typically) + lines := strings.Split(strings.TrimSpace(string(out)), "\n") + count := 0 + for _, line := range lines { + if strings.Contains(line, "Display") { + count++ + } + } + return count, nil +} + +// GetMacOSGPUInfo returns detailed information about macOS GPUs +func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) { + if runtime.GOOS != "darwin" { + return nil, fmt.Errorf("not running on macOS") + } + + cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json") + out, err := cmd.Output() + if err != nil { + return nil, err + } + + var data map[string]interface{} + if err := json.Unmarshal(out, &data); err != nil { + return nil, err + } + + spData, ok := data["SPDisplaysDataType"].([]interface{}) + if !ok { + return []MacOSGPUInfo{}, nil + } + + isAppleSilicon := IsAppleSilicon() + var gpus []MacOSGPUInfo + + for i, item := range spData { + if gpuData, ok := item.(map[string]interface{}); ok { + info := MacOSGPUInfo{ + Index: uint32(i), + IsAppleSilicon: isAppleSilicon, + } + + // Extract chipset model + if model, ok := gpuData["sppci_model"].(string); ok { + info.ChipsetModel = model + info.Name = model + } + + // Check for shared memory (integrated GPU) + if _, ok := gpuData["sppci_vram_shared"]; ok { + info.IsIntegrated = true + } + + // Extract VRAM + if vram, ok := gpuData["sppci_vram"].(string); ok { + // Parse "16384 MB" + parts := strings.Fields(vram) + if len(parts) >= 1 { + if mb, err := strconv.ParseUint(parts[0], 10, 32); err == nil { + info.VRAM_MB = uint32(mb) + } + } + } + + gpus = append(gpus, info) + } + } + + return gpus, nil +} + +// GetPowermetricsData tries to get real-time GPU metrics from powermetrics +// Requires sudo access. Returns empty data if not available. +func GetPowermetricsData() (*PowermetricsData, error) { + // powermetrics requires sudo, so this may fail + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "powermetrics", "--samplers", "gpu_power", "-n", "1", "-i", "100") + out, err := cmd.Output() + if err != nil { + // powermetrics not available or no permission + return &PowermetricsData{HasData: false}, nil + } + + data := &PowermetricsData{HasData: false} + + // Parse powermetrics output + // Example: "GPU Power: 5000 mW" or "GPU utilization: 45%" + scanner := bufio.NewScanner(strings.NewReader(string(out))) + for scanner.Scan() { + line := scanner.Text() + + // Parse GPU utilization + if strings.Contains(line, "GPU utilization") || strings.Contains(line, "GPU active") { + re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*%`) + if matches := re.FindStringSubmatch(line); len(matches) > 1 { + if util, err := strconv.ParseFloat(matches[1], 64); err == nil { + data.GPUUtilization = util + data.HasData = true + } + } + } + + // Parse GPU power + if strings.Contains(line, "GPU Power") || strings.Contains(line, "GPU power") { + re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*mW`) + if matches := re.FindStringSubmatch(line); len(matches) > 1 { + if power, err := strconv.ParseFloat(matches[1], 64); err == nil { + data.GPUPower = power + data.HasData = true + } + } + } + + // Parse GPU temperature (if available) + if strings.Contains(line, "GPU Temperature") || strings.Contains(line, "GPU temp") { + re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*C`) + if matches := re.FindStringSubmatch(line); len(matches) > 1 { + if temp, err := strconv.ParseFloat(matches[1], 64); err == nil { + data.GPUTemperature = temp + data.HasData = true + } + } + } + } + + return data, nil +} + +// FormatMacOSGPUStatus formats GPU status for display +func FormatMacOSGPUStatus() (string, error) { + gpus, err := GetMacOSGPUInfo() + if err != nil { + return "", err + } + + // Try to get real-time metrics from powermetrics + powermetrics, _ := GetPowermetricsData() + + if len(gpus) == 0 { + return "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS", nil + } + + var b strings.Builder + + if IsAppleSilicon() { + b.WriteString("GPU Status (macOS - Apple Silicon)\n") + } else { + b.WriteString("GPU Status (macOS)\n") + } + b.WriteString(strings.Repeat("═", 50) + "\n\n") + + for _, gpu := range gpus { + fmt.Fprintf(&b, "🎮 GPU %d: %s\n", gpu.Index, gpu.Name) + if gpu.IsAppleSilicon { + b.WriteString(" Type: Apple Silicon (Unified Memory)\n") + } else if gpu.IsIntegrated { + b.WriteString(" Type: Integrated (Shared Memory)\n") + } else { + fmt.Fprintf(&b, " VRAM: %d MB\n", gpu.VRAM_MB) + } + + // Display powermetrics data if available + if powermetrics != nil && powermetrics.HasData { + if powermetrics.GPUUtilization > 0 { + b.WriteString(fmt.Sprintf(" Utilization: %.1f%%\n", powermetrics.GPUUtilization)) + } + if powermetrics.GPUPower > 0 { + b.WriteString(fmt.Sprintf(" Power: %.1f W\n", powermetrics.GPUPower/1000)) + } + if powermetrics.GPUTemperature > 0 { + b.WriteString(fmt.Sprintf(" Temperature: %.0f°C\n", powermetrics.GPUTemperature)) + } + } + b.WriteString("\n") + } + + if powermetrics == nil || !powermetrics.HasData { + b.WriteString("💡 Note: Run with sudo for real-time GPU metrics via powermetrics\n") + } + return b.String(), nil +} diff --git a/internal/worker/gpu_macos_stub.go b/internal/worker/gpu_macos_stub.go new file mode 100644 index 0000000..c59e683 --- /dev/null +++ b/internal/worker/gpu_macos_stub.go @@ -0,0 +1,41 @@ +//go:build !darwin +// +build !darwin + +package worker + +import "errors" + +// MacOSGPUInfo placeholder for non-macOS builds +type MacOSGPUInfo struct { + Index uint32 + Name string + ChipsetModel string + VRAM_MB uint32 + IsIntegrated bool + IsAppleSilicon bool +} + +// IsMacOS returns false on non-macOS +func IsMacOS() bool { + return false +} + +// IsAppleSilicon returns false on non-macOS +func IsAppleSilicon() bool { + return false +} + +// GetMacOSGPUCount returns error on non-macOS +func GetMacOSGPUCount() (int, error) { + return 0, errors.New("macOS GPU monitoring only available on macOS") +} + +// GetMacOSGPUInfo returns error on non-macOS +func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) { + return nil, errors.New("macOS GPU monitoring only available on macOS") +} + +// FormatMacOSGPUStatus returns error on non-macOS +func FormatMacOSGPUStatus() (string, error) { + return "", errors.New("macOS GPU monitoring only available on macOS") +} diff --git a/internal/worker/gpu_nvml_native.go b/internal/worker/gpu_nvml_native.go index 2feb72a..7390251 100644 --- a/internal/worker/gpu_nvml_native.go +++ b/internal/worker/gpu_nvml_native.go @@ -1,5 +1,5 @@ -//go:build cgo && native_libs -// +build cgo,native_libs +//go:build cgo && native_libs && linux +// +build cgo,native_libs,linux package worker diff --git a/internal/worker/gpu_nvml_stub.go b/internal/worker/gpu_nvml_stub.go index 337a7f8..47779ff 100644 --- a/internal/worker/gpu_nvml_stub.go +++ b/internal/worker/gpu_nvml_stub.go @@ -1,11 +1,26 @@ -//go:build cgo && !native_libs -// +build cgo,!native_libs +//go:build !cgo || !native_libs || !linux +// +build !cgo !native_libs !linux package worker import "errors" -// Stub implementations when native_libs build tag is not present +// GPUInfo provides comprehensive GPU information +type GPUInfo struct { + Index uint32 + Name string + Utilization uint32 + MemoryUsed uint64 + MemoryTotal uint64 + Temperature uint32 + PowerDraw uint32 + ClockSM uint32 + ClockMemory uint32 + PCIeGen uint32 + PCIeWidth uint32 + UUID string + VBIOSVersion string +} func InitNVML() error { return errors.New("NVML requires native_libs build tag") @@ -18,10 +33,10 @@ func IsNVMLAvailable() bool { } func GetGPUCount() (int, error) { - return 0, errors.New("NVML requires native_libs build tag") + return 0, nil } -func GetGPUInfo(index uint32) (*GPUInfo, error) { +func GetGPUInfo(index uint32) (*GPUInfo, error) { // <-- was missing return nil, errors.New("NVML requires native_libs build tag") } diff --git a/internal/worker/native_bridge.go b/internal/worker/native_bridge.go index 29597ab..500e827 100644 --- a/internal/worker/native_bridge.go +++ b/internal/worker/native_bridge.go @@ -15,11 +15,6 @@ func init() { log.Printf("[native] Native libraries disabled (build with -tags native_libs to enable)") } -// dirOverallSHA256HexNative is not available without native_libs build tag. -func dirOverallSHA256HexNative(_ string) (string, error) { - return "", errors.New("native hash requires native_libs build tag") -} - // HashFilesBatchNative is not available without native_libs build tag. func HashFilesBatchNative(paths []string) ([]string, error) { return nil, errors.New("native batch hash requires native_libs build tag") diff --git a/internal/worker/native_bridge_libs.go b/internal/worker/native_bridge_libs.go index cff1f87..58bbe3b 100644 --- a/internal/worker/native_bridge_libs.go +++ b/internal/worker/native_bridge_libs.go @@ -3,7 +3,8 @@ package worker -// #cgo LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash +// #cgo darwin LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash +// #cgo linux LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash -lnvml_gpu -lnvidia-ml // #include "../../native/dataset_hash/dataset_hash.h" // #include import "C" @@ -25,8 +26,6 @@ var ( ctxInitTime time.Time ) -// getHashContext returns a cached hash context, initializing it once. -// Context reuse eliminates 5-20ms of thread pool creation per hash operation. func getHashContext() *C.fh_context_t { hashCtxOnce.Do(func() { start := time.Now() @@ -38,9 +37,8 @@ func getHashContext() *C.fh_context_t { return hashCtx } -// dirOverallSHA256HexNative implementation with native library. func dirOverallSHA256HexNative(root string) (string, error) { - ctx := getHashContext() // Reuse cached context: ~0.1μs vs 5-20ms + ctx := getHashContext() croot := C.CString(root) defer C.free(unsafe.Pointer(croot)) @@ -58,28 +56,23 @@ func dirOverallSHA256HexNative(root string) (string, error) { return C.GoString(result), nil } -// GetSIMDImplName returns the native SHA256 implementation name. func GetSIMDImplName() string { return C.GoString(C.fh_get_simd_impl_name()) } -// HasSIMDSHA256 returns true if SIMD SHA256 is available. func HasSIMDSHA256() bool { return C.fh_has_simd_sha256() == 1 } -// ScanArtifactsNative falls back to Go implementation. func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) { return ScanArtifacts(runDir) } -// ExtractTarGzNative falls back to Go implementation. func ExtractTarGzNative(archivePath, dstDir string) error { return ExtractTarGz(archivePath, dstDir) } // DirOverallSHA256HexNative exports the native hash implementation for benchmarks. -// This allows explicit native library usage when -tags native_libs is enabled. func DirOverallSHA256HexNative(root string) (string, error) { return dirOverallSHA256HexNative(root) } diff --git a/internal/worker/native_bridge_nocgo.go b/internal/worker/native_bridge_nocgo.go index 10e4be1..ed5de83 100644 --- a/internal/worker/native_bridge_nocgo.go +++ b/internal/worker/native_bridge_nocgo.go @@ -9,11 +9,6 @@ import ( "github.com/jfraeys/fetch_ml/internal/manifest" ) -// dirOverallSHA256HexNative is not available without CGO. -func dirOverallSHA256HexNative(root string) (string, error) { - return "", errors.New("native hash requires CGO") -} - // HashFilesBatchNative is not available without CGO. func HashFilesBatchNative(paths []string) ([]string, error) { return nil, errors.New("native batch hash requires CGO") diff --git a/native/nvml_gpu/CMakeLists.txt b/native/nvml_gpu/CMakeLists.txt index e4ee3b5..0fbe4f0 100644 --- a/native/nvml_gpu/CMakeLists.txt +++ b/native/nvml_gpu/CMakeLists.txt @@ -31,7 +31,11 @@ if(NVML_LIBRARY AND NVML_INCLUDE_DIR) message(STATUS "Found NVML: ${NVML_LIBRARY}") message(STATUS "NVML include: ${NVML_INCLUDE_DIR}") else() - message(WARNING "NVML not found. GPU monitoring will be disabled.") + if(CMAKE_SYSTEM_NAME MATCHES "Linux") + message(WARNING "NVML not found. NVIDIA GPU monitoring will be disabled.") + else() + message(STATUS "NVML not available on ${CMAKE_SYSTEM_NAME}. Using platform-specific GPU monitoring.") + endif() # Create stub library target_compile_definitions(nvml_gpu PRIVATE NVML_STUB) endif() diff --git a/native/nvml_gpu/nvml_dynamic.c b/native/nvml_gpu/nvml_dynamic.c new file mode 100644 index 0000000..15c457a --- /dev/null +++ b/native/nvml_gpu/nvml_dynamic.c @@ -0,0 +1,272 @@ +#include "nvml_dynamic.h" +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +// NVML type definitions (from nvml.h) +typedef int nvmlReturn_t; +typedef void* nvmlDevice_t; +typedef struct { + unsigned int gpu; + unsigned int memory; +} nvmlUtilization_t; +typedef struct { + unsigned long long total; + unsigned long long free; + unsigned long long used; +} nvmlMemory_t; + +// Function pointer types +typedef nvmlReturn_t (*nvmlInit_v2_fn)(void); +typedef nvmlReturn_t (*nvmlShutdown_fn)(void); +typedef nvmlReturn_t (*nvmlSystemGetDriverVersion_fn)(char*, unsigned int); +typedef nvmlReturn_t (*nvmlDeviceGetCount_fn)(unsigned int*); +typedef nvmlReturn_t (*nvmlDeviceGetHandleByIndex_v2_fn)(unsigned int, nvmlDevice_t*); +typedef nvmlReturn_t (*nvmlDeviceGetName_fn)(nvmlDevice_t, char*, unsigned int); +typedef nvmlReturn_t (*nvmlDeviceGetUtilizationRates_fn)(nvmlDevice_t, nvmlUtilization_t*); +typedef nvmlReturn_t (*nvmlDeviceGetMemoryInfo_fn)(nvmlDevice_t, nvmlMemory_t*); +typedef nvmlReturn_t (*nvmlDeviceGetTemperature_fn)(nvmlDevice_t, unsigned int, unsigned int*); +typedef nvmlReturn_t (*nvmlDeviceGetPowerUsage_fn)(nvmlDevice_t, unsigned int*); +typedef nvmlReturn_t (*nvmlDeviceGetClockInfo_fn)(nvmlDevice_t, unsigned int, unsigned int*); +typedef nvmlReturn_t (*nvmlDeviceGetPcieThroughput_fn)(nvmlDevice_t, unsigned int, unsigned int*); +typedef nvmlReturn_t (*nvmlDeviceGetUUID_fn)(nvmlDevice_t, char*, unsigned int); +typedef nvmlReturn_t (*nvmlDeviceGetVbiosVersion_fn)(nvmlDevice_t, char*, unsigned int); + +// NVML constants +#define NVML_SUCCESS 0 +#define NVML_TEMPERATURE_GPU 0 +#define NVML_CLOCK_SM 0 +#define NVML_CLOCK_MEM 1 +#define NVML_PCIE_UTIL_TX_BYTES 0 +#define NVML_PCIE_UTIL_RX_BYTES 1 + +struct nvml_dynamic { + void* handle; + char last_error[256]; + int available; + + // Function pointers + nvmlInit_v2_fn init; + nvmlShutdown_fn shutdown; + nvmlSystemGetDriverVersion_fn get_driver_version; + nvmlDeviceGetCount_fn get_count; + nvmlDeviceGetHandleByIndex_v2_fn get_handle_by_index; + nvmlDeviceGetName_fn get_name; + nvmlDeviceGetUtilizationRates_fn get_utilization; + nvmlDeviceGetMemoryInfo_fn get_memory; + nvmlDeviceGetTemperature_fn get_temperature; + nvmlDeviceGetPowerUsage_fn get_power_usage; + nvmlDeviceGetClockInfo_fn get_clock; + nvmlDeviceGetUUID_fn get_uuid; + nvmlDeviceGetVbiosVersion_fn get_vbios; +}; + +static void set_error(nvml_dynamic_t* nvml, const char* msg) { + if (nvml) { + strncpy(nvml->last_error, msg, sizeof(nvml->last_error) - 1); + nvml->last_error[sizeof(nvml->last_error) - 1] = '\0'; + } +} + +#ifdef _WIN32 +static void* load_lib(const char* name) { + return LoadLibraryA(name); +} +static void* get_sym(void* handle, const char* name) { + return (void*)GetProcAddress((HMODULE)handle, name); +} +static void close_lib(void* handle) { + FreeLibrary((HMODULE)handle); +} +#else +static void* load_lib(const char* name) { + return dlopen(name, RTLD_NOW); +} +static void* get_sym(void* handle, const char* name) { + return dlsym(handle, name); +} +static void close_lib(void* handle) { + dlclose(handle); +} +#endif + +nvml_dynamic_t* nvml_load(void) { + nvml_dynamic_t* nvml = (nvml_dynamic_t*)calloc(1, sizeof(nvml_dynamic_t)); + if (!nvml) return NULL; + + // Try to load NVML library +#ifdef _WIN32 + nvml->handle = load_lib("nvml.dll"); + if (!nvml->handle) { + nvml->handle = load_lib("C:\\Windows\\System32\\nvml.dll"); + } +#else + nvml->handle = load_lib("libnvidia-ml.so.1"); + if (!nvml->handle) { + nvml->handle = load_lib("libnvidia-ml.so"); + } +#endif + + if (!nvml->handle) { + set_error(nvml, "NVML library not found - NVIDIA driver may not be installed"); + nvml->available = 0; + return nvml; + } + + // Load function pointers + nvml->init = (nvmlInit_v2_fn)get_sym(nvml->handle, "nvmlInit_v2"); + nvml->shutdown = (nvmlShutdown_fn)get_sym(nvml->handle, "nvmlShutdown"); + nvml->get_driver_version = (nvmlSystemGetDriverVersion_fn)get_sym(nvml->handle, "nvmlSystemGetDriverVersion"); + nvml->get_count = (nvmlDeviceGetCount_fn)get_sym(nvml->handle, "nvmlDeviceGetCount"); + nvml->get_handle_by_index = (nvmlDeviceGetHandleByIndex_v2_fn)get_sym(nvml->handle, "nvmlDeviceGetHandleByIndex_v2"); + nvml->get_name = (nvmlDeviceGetName_fn)get_sym(nvml->handle, "nvmlDeviceGetName"); + nvml->get_utilization = (nvmlDeviceGetUtilizationRates_fn)get_sym(nvml->handle, "nvmlDeviceGetUtilizationRates"); + nvml->get_memory = (nvmlDeviceGetMemoryInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetMemoryInfo"); + nvml->get_temperature = (nvmlDeviceGetTemperature_fn)get_sym(nvml->handle, "nvmlDeviceGetTemperature"); + nvml->get_power_usage = (nvmlDeviceGetPowerUsage_fn)get_sym(nvml->handle, "nvmlDeviceGetPowerUsage"); + nvml->get_clock = (nvmlDeviceGetClockInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetClockInfo"); + nvml->get_uuid = (nvmlDeviceGetUUID_fn)get_sym(nvml->handle, "nvmlDeviceGetUUID"); + nvml->get_vbios = (nvmlDeviceGetVbiosVersion_fn)get_sym(nvml->handle, "nvmlDeviceGetVbiosVersion"); + + // Check required functions + if (!nvml->init || !nvml->shutdown || !nvml->get_count || !nvml->get_handle_by_index) { + set_error(nvml, "Failed to load required NVML functions"); + close_lib(nvml->handle); + nvml->handle = NULL; + nvml->available = 0; + return nvml; + } + + // Initialize NVML + nvmlReturn_t result = nvml->init(); + if (result != NVML_SUCCESS) { + set_error(nvml, "Failed to initialize NVML"); + close_lib(nvml->handle); + nvml->handle = NULL; + nvml->available = 0; + return nvml; + } + + nvml->available = 1; + return nvml; +} + +void nvml_unload(nvml_dynamic_t* nvml) { + if (!nvml) return; + if (nvml->handle) { + if (nvml->shutdown) { + nvml->shutdown(); + } + close_lib(nvml->handle); + } + free(nvml); +} + +int nvml_is_available(const nvml_dynamic_t* nvml) { + return nvml ? nvml->available : 0; +} + +const char* nvml_last_error(const nvml_dynamic_t* nvml) { + return nvml ? nvml->last_error : "NULL nvml handle"; +} + +int nvml_get_gpu_count(nvml_dynamic_t* nvml) { + if (!nvml || !nvml->available || !nvml->get_count) { + return -1; + } + unsigned int count = 0; + nvmlReturn_t result = nvml->get_count(&count); + if (result != NVML_SUCCESS) { + set_error(nvml, "Failed to get GPU count"); + return -1; + } + return (int)count; +} + +int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info) { + if (!nvml || !nvml->available || !info) { + return -1; + } + + memset(info, 0, sizeof(*info)); + info->index = index; + + nvmlDevice_t device; + nvmlReturn_t result = nvml->get_handle_by_index(index, &device); + if (result != NVML_SUCCESS) { + set_error(nvml, "Failed to get device handle"); + return -1; + } + + // Get name + if (nvml->get_name) { + nvml->get_name(device, info->name, sizeof(info->name)); + } + + // Get utilization + if (nvml->get_utilization) { + nvmlUtilization_t util; + result = nvml->get_utilization(device, &util); + if (result == NVML_SUCCESS) { + info->utilization = util.gpu; + } + } + + // Get memory + if (nvml->get_memory) { + nvmlMemory_t mem; + result = nvml->get_memory(device, &mem); + if (result == NVML_SUCCESS) { + info->memory_used = mem.used; + info->memory_total = mem.total; + } + } + + // Get temperature + if (nvml->get_temperature) { + unsigned int temp; + result = nvml->get_temperature(device, NVML_TEMPERATURE_GPU, &temp); + if (result == NVML_SUCCESS) { + info->temperature = temp; + } + } + + // Get power usage + if (nvml->get_power_usage) { + unsigned int power; + result = nvml->get_power_usage(device, &power); + if (result == NVML_SUCCESS) { + info->power_draw = power; + } + } + + // Get clocks + if (nvml->get_clock) { + unsigned int clock; + result = nvml->get_clock(device, NVML_CLOCK_SM, &clock); + if (result == NVML_SUCCESS) { + info->clock_sm = clock; + } + result = nvml->get_clock(device, NVML_CLOCK_MEM, &clock); + if (result == NVML_SUCCESS) { + info->clock_memory = clock; + } + } + + // Get UUID + if (nvml->get_uuid) { + nvml->get_uuid(device, info->uuid, sizeof(info->uuid)); + } + + // Get VBIOS version + if (nvml->get_vbios) { + nvml->get_vbios(device, info->vbios_version, sizeof(info->vbios_version)); + } + + return 0; +} diff --git a/native/nvml_gpu/nvml_dynamic.h b/native/nvml_gpu/nvml_dynamic.h new file mode 100644 index 0000000..3b72fb0 --- /dev/null +++ b/native/nvml_gpu/nvml_dynamic.h @@ -0,0 +1,53 @@ +#ifndef NVML_DYNAMIC_H +#define NVML_DYNAMIC_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Opaque handle +typedef struct nvml_dynamic nvml_dynamic_t; + +// GPU info structure +typedef struct { + uint32_t index; + char name[256]; + uint32_t utilization; // GPU utilization (0-100) + uint64_t memory_used; // Memory used in bytes + uint64_t memory_total; // Total memory in bytes + uint32_t temperature; // Temperature in Celsius + uint32_t power_draw; // Power draw in milliwatts + uint32_t clock_sm; // SM clock in MHz + uint32_t clock_memory; // Memory clock in MHz + uint32_t pcie_gen; // PCIe generation + uint32_t pcie_width; // PCIe link width + char uuid[64]; // GPU UUID + char vbios_version[32]; // VBIOS version +} gpu_info_t; + +// Load NVML dynamically (returns NULL if not available) +nvml_dynamic_t* nvml_load(void); + +// Unload NVML and free resources +void nvml_unload(nvml_dynamic_t* nvml); + +// Check if NVML is available and loaded +int nvml_is_available(const nvml_dynamic_t* nvml); + +// Get number of GPUs (-1 on error) +int nvml_get_gpu_count(nvml_dynamic_t* nvml); + +// Get GPU info by index (returns 0 on success) +int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info); + +// Get last error message +const char* nvml_last_error(const nvml_dynamic_t* nvml); + +#ifdef __cplusplus +} +#endif + +#endif // NVML_DYNAMIC_H