feat: native GPU detection and NVML bridge for macOS and Linux
- Add dynamic NVML loading for Linux GPU detection - Add macOS GPU detection via IOKit framework - Add Zig NVML wrapper for cross-platform GPU queries - Update native bridge to support platform-specific GPU libs - Add CMake support for NVML dynamic library
This commit is contained in:
parent
1a1844e9e9
commit
be39b37aec
15 changed files with 1321 additions and 35 deletions
262
cli/src/native/macos_gpu.zig
Normal file
262
cli/src/native/macos_gpu.zig
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
/// macOS GPU Monitoring for Development Mode
|
||||
/// Uses system_profiler and powermetrics for GPU info
|
||||
/// Only available on macOS
|
||||
const c = @cImport({
|
||||
@cInclude("sys/types.h");
|
||||
@cInclude("sys/sysctl.h");
|
||||
});
|
||||
|
||||
/// GPU information structure for macOS
|
||||
pub const MacOSGPUInfo = struct {
|
||||
index: u32,
|
||||
name: [256:0]u8,
|
||||
chipset_model: [256:0]u8,
|
||||
vram_mb: u32,
|
||||
is_integrated: bool,
|
||||
// Performance metrics (if available via powermetrics)
|
||||
utilization_percent: ?u32,
|
||||
temperature_celsius: ?u32,
|
||||
power_mw: ?u32,
|
||||
};
|
||||
|
||||
/// Detect if running on Apple Silicon
|
||||
pub fn isAppleSilicon() bool {
|
||||
if (builtin.os.tag != .macos) return false;
|
||||
|
||||
var buf: [64]u8 = undefined;
|
||||
var len: usize = buf.len;
|
||||
const mib = [_]c_int{ c.CTL_HW, c.HW_MACHINE };
|
||||
|
||||
const result = c.sysctl(&mib[0], 2, &buf[0], &len, null, 0);
|
||||
if (result != 0) return false;
|
||||
|
||||
const machine = std.mem.sliceTo(&buf, 0);
|
||||
return std.mem.startsWith(u8, machine, "arm64") or
|
||||
std.mem.startsWith(u8, machine, "Apple");
|
||||
}
|
||||
|
||||
/// Get GPU count on macOS
|
||||
pub fn getGPUCount() u32 {
|
||||
if (builtin.os.tag != .macos) return 0;
|
||||
|
||||
// Run system_profiler to check for GPUs
|
||||
const result = runSystemProfiler() catch return 0;
|
||||
defer std.heap.raw_c_allocator.free(result);
|
||||
|
||||
// Parse output for GPU entries
|
||||
var lines = std.mem.splitScalar(u8, result, '\n');
|
||||
var count: u32 = 0;
|
||||
while (lines.next()) |line| {
|
||||
if (std.mem.indexOf(u8, line, "Chipset Model") != null) {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/// Run system_profiler SPDisplaysDataType
|
||||
fn runSystemProfiler() ![]u8 {
|
||||
const argv = [_][]const u8{
|
||||
"system_profiler",
|
||||
"SPDisplaysDataType",
|
||||
"-json",
|
||||
};
|
||||
|
||||
var child = std.process.Child.init(&argv, std.heap.page_allocator);
|
||||
child.stdout_behavior = .Pipe;
|
||||
child.stderr_behavior = .Ignore;
|
||||
|
||||
try child.spawn();
|
||||
defer child.kill() catch {};
|
||||
|
||||
const stdout = child.stdout.?.reader();
|
||||
const output = try stdout.readAllAlloc(std.heap.page_allocator, 1024 * 1024);
|
||||
|
||||
const term = try child.wait();
|
||||
if (term != .Exited or term.Exited != 0) {
|
||||
return error.CommandFailed;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/// Parse GPU info from system_profiler JSON output
|
||||
pub fn parseGPUInfo(allocator: std.mem.Allocator, json_output: []const u8) ![]MacOSGPUInfo {
|
||||
// Simple parser for system_profiler JSON
|
||||
// Format: {"SPDisplaysDataType": [{"sppci_model":"...", "sppci_vram":"...", ...}, ...]}
|
||||
|
||||
var gpus = std.ArrayList(MacOSGPUInfo).init(allocator);
|
||||
defer gpus.deinit();
|
||||
|
||||
// Parse JSON - look for _items array
|
||||
const items_key = "_items";
|
||||
if (std.mem.indexOf(u8, json_output, items_key)) |items_start| {
|
||||
const rest = json_output[items_start..];
|
||||
// Find array start
|
||||
if (std.mem.indexOf(u8, rest, "[")) |array_start| {
|
||||
const array = rest[array_start..];
|
||||
// Simple heuristic: find objects between { and }
|
||||
var i: usize = 0;
|
||||
while (i < array.len) {
|
||||
if (array[i] == '{') {
|
||||
// Found object start
|
||||
if (findObjectEnd(array[i..])) |obj_end| {
|
||||
const obj = array[i .. i + obj_end];
|
||||
if (try parseGPUObject(obj)) |gpu| {
|
||||
try gpus.append(gpu);
|
||||
}
|
||||
i += obj_end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return gpus.toOwnedSlice();
|
||||
}
|
||||
|
||||
fn findObjectEnd(json: []const u8) ?usize {
|
||||
var depth: i32 = 0;
|
||||
var in_string = false;
|
||||
var i: usize = 0;
|
||||
while (i < json.len) : (i += 1) {
|
||||
const char = json[i];
|
||||
if (char == '"' and (i == 0 or json[i - 1] != '\\')) {
|
||||
in_string = !in_string;
|
||||
} else if (!in_string) {
|
||||
if (char == '{') {
|
||||
depth += 1;
|
||||
} else if (char == '}') {
|
||||
depth -= 1;
|
||||
if (depth == 0) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
fn parseGPUObject(json: []const u8) !?MacOSGPUInfo {
|
||||
var gpu = MacOSGPUInfo{
|
||||
.index = 0,
|
||||
.name = std.mem.zeroes([256:0]u8),
|
||||
.chipset_model = std.mem.zeroes([256:0]u8),
|
||||
.vram_mb = 0,
|
||||
.is_integrated = false,
|
||||
.utilization_percent = null,
|
||||
.temperature_celsius = null,
|
||||
.power_mw = null,
|
||||
};
|
||||
|
||||
// Extract sppci_model
|
||||
if (extractJsonString(json, "sppci_model")) |model| {
|
||||
const len = @min(model.len, 255);
|
||||
@memcpy(gpu.chipset_model[0..len], model[0..len]);
|
||||
@memcpy(gpu.name[0..len], model[0..len]);
|
||||
}
|
||||
|
||||
// Extract sppci_vram
|
||||
if (extractJsonString(json, "sppci_vram_shared")) |_| {
|
||||
gpu.is_integrated = true;
|
||||
gpu.vram_mb = 0; // Shared memory
|
||||
} else if (extractJsonString(json, "sppci_vram")) |vram| {
|
||||
// Parse "16384 MB" -> 16384
|
||||
var it = std.mem.splitScalar(u8, vram, ' ');
|
||||
if (it.next()) |num_str| {
|
||||
gpu.vram_mb = std.fmt.parseInt(u32, num_str, 10) catch 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if it's a valid GPU entry
|
||||
if (gpu.chipset_model[0] == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
fn extractJsonString(json: []const u8, key: []const u8) ?[]const u8 {
|
||||
const key_quoted = std.fmt.allocPrint(std.heap.page_allocator, "\"{s}\"", .{key}) catch return null;
|
||||
defer std.heap.page_allocator.free(key_quoted);
|
||||
|
||||
if (std.mem.indexOf(u8, json, key_quoted)) |key_pos| {
|
||||
const after_key = json[key_pos + key_quoted.len ..];
|
||||
// Find value start (skip : and whitespace)
|
||||
var i: usize = 0;
|
||||
while (i < after_key.len and (after_key[i] == ':' or after_key[i] == ' ' or after_key[i] == '\t' or after_key[i] == '\n')) : (i += 1) {}
|
||||
|
||||
if (i < after_key.len and after_key[i] == '"') {
|
||||
// String value
|
||||
const str_start = i + 1;
|
||||
var str_end = str_start;
|
||||
while (str_end < after_key.len and after_key[str_end] != '"') : (str_end += 1) {}
|
||||
return after_key[str_start..str_end];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Format GPU info for display
|
||||
pub fn formatMacOSGPUInfo(allocator: std.mem.Allocator, gpus: []const MacOSGPUInfo) ![]u8 {
|
||||
var buf = std.ArrayList(u8).init(allocator);
|
||||
defer buf.deinit();
|
||||
|
||||
const writer = buf.writer();
|
||||
|
||||
if (gpus.len == 0) {
|
||||
try writer.writeAll("GPU Status (macOS)\n");
|
||||
try writer.writeAll("═" ** 50);
|
||||
try writer.writeAll("\n\nNo GPUs detected\n");
|
||||
return buf.toOwnedSlice();
|
||||
}
|
||||
|
||||
try writer.writeAll("GPU Status (macOS");
|
||||
if (isAppleSilicon()) {
|
||||
try writer.writeAll(" - Apple Silicon");
|
||||
}
|
||||
try writer.writeAll(")\n");
|
||||
try writer.writeAll("═" ** 50);
|
||||
try writer.writeAll("\n\n");
|
||||
|
||||
for (gpus) |gpu| {
|
||||
const name = std.mem.sliceTo(&gpu.name, 0);
|
||||
const model = std.mem.sliceTo(&gpu.chipset_model, 0);
|
||||
|
||||
try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name });
|
||||
if (!std.mem.eql(u8, model, name)) {
|
||||
try writer.print(" Model: {s}\n", .{model});
|
||||
}
|
||||
if (gpu.is_integrated) {
|
||||
try writer.writeAll(" Type: Integrated (Unified Memory)\n");
|
||||
} else {
|
||||
try writer.print(" VRAM: {d} MB\n", .{gpu.vram_mb});
|
||||
}
|
||||
if (gpu.utilization_percent) |util| {
|
||||
try writer.print(" Utilization: {d}%\n", .{util});
|
||||
}
|
||||
if (gpu.temperature_celsius) |temp| {
|
||||
try writer.print(" Temperature: {d}°C\n", .{temp});
|
||||
}
|
||||
if (gpu.power_mw) |power| {
|
||||
try writer.print(" Power: {d:.1f} W\n", .{@as(f64, @floatFromInt(power)) / 1000.0});
|
||||
}
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
|
||||
try writer.writeAll("💡 Note: Detailed GPU metrics require powermetrics (sudo)\n");
|
||||
|
||||
return buf.toOwnedSlice();
|
||||
}
|
||||
|
||||
/// Quick check for GPU availability on macOS
|
||||
pub fn isMacOSGPUAvailable() bool {
|
||||
if (builtin.os.tag != .macos) return false;
|
||||
return getGPUCount() > 0;
|
||||
}
|
||||
372
cli/src/native/nvml.zig
Normal file
372
cli/src/native/nvml.zig
Normal file
|
|
@ -0,0 +1,372 @@
|
|||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
|
||||
/// NVML Dynamic Loader for CLI
|
||||
/// Pure Zig implementation using dlopen/LoadLibrary
|
||||
/// No build-time dependency on NVIDIA SDK
|
||||
|
||||
// Platform-specific dynamic loading
|
||||
const DynLib = switch (builtin.os.tag) {
|
||||
.windows => struct {
|
||||
handle: std.os.windows.HMODULE,
|
||||
|
||||
fn open(path: []const u8) !@This() {
|
||||
const wide_path = try std.os.windows.sliceToPrefixedFileW(path);
|
||||
const handle = std.os.windows.LoadLibraryW(&wide_path.data) orelse return error.LibraryNotFound;
|
||||
return .{ .handle = handle };
|
||||
}
|
||||
|
||||
fn close(self: *@This()) void {
|
||||
_ = std.os.windows.FreeLibrary(self.handle);
|
||||
}
|
||||
|
||||
fn lookup(self: @This(), name: []const u8) ?*anyopaque {
|
||||
return std.os.windows.GetProcAddress(self.handle, name);
|
||||
}
|
||||
},
|
||||
else => struct {
|
||||
handle: *anyopaque,
|
||||
|
||||
// Extern declarations for dlopen/dlsym
|
||||
extern "c" fn dlopen(pathname: [*:0]const u8, mode: c_int) ?*anyopaque;
|
||||
extern "c" fn dlsym(handle: *anyopaque, symbol: [*:0]const u8) ?*anyopaque;
|
||||
extern "c" fn dlclose(handle: *anyopaque) c_int;
|
||||
|
||||
const RTLD_NOW = 2;
|
||||
|
||||
fn open(path: []const u8) !@This() {
|
||||
const c_path = try std.cstr.addNullByte(std.heap.c_allocator, path);
|
||||
defer std.heap.c_allocator.free(c_path);
|
||||
const handle = dlopen(c_path.ptr, RTLD_NOW) orelse return error.LibraryNotFound;
|
||||
return .{ .handle = handle };
|
||||
}
|
||||
|
||||
fn close(self: *@This()) void {
|
||||
_ = dlclose(self.handle);
|
||||
}
|
||||
|
||||
fn lookup(self: @This(), name: []const u8) ?*anyopaque {
|
||||
const c_name = std.cstr.addNullByte(std.heap.c_allocator, name) catch return null;
|
||||
defer std.heap.c_allocator.free(c_name);
|
||||
return dlsym(self.handle, c_name.ptr);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// NVML type definitions (mirrors nvml.h)
|
||||
pub const nvmlReturn_t = c_int;
|
||||
pub const nvmlDevice_t = *anyopaque;
|
||||
|
||||
pub const nvmlUtilization_t = extern struct {
|
||||
gpu: c_uint,
|
||||
memory: c_uint,
|
||||
};
|
||||
|
||||
pub const nvmlMemory_t = extern struct {
|
||||
total: c_ulonglong,
|
||||
free: c_ulonglong,
|
||||
used: c_ulonglong,
|
||||
};
|
||||
|
||||
// NVML constants
|
||||
const NVML_SUCCESS = 0;
|
||||
const NVML_TEMPERATURE_GPU = 0;
|
||||
const NVML_CLOCK_SM = 0;
|
||||
const NVML_CLOCK_MEM = 1;
|
||||
|
||||
// NVML function types
|
||||
const nvmlInit_v2_fn = *const fn () callconv(.C) nvmlReturn_t;
|
||||
const nvmlShutdown_fn = *const fn () callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetCount_fn = *const fn (*c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetHandleByIndex_v2_fn = *const fn (c_uint, *nvmlDevice_t) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetName_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetUtilizationRates_fn = *const fn (nvmlDevice_t, *nvmlUtilization_t) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetMemoryInfo_fn = *const fn (nvmlDevice_t, *nvmlMemory_t) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetTemperature_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetPowerUsage_fn = *const fn (nvmlDevice_t, *c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetClockInfo_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetUUID_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
|
||||
const nvmlDeviceGetVbiosVersion_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
|
||||
|
||||
/// GPU information structure
|
||||
pub const GPUInfo = struct {
|
||||
index: u32,
|
||||
name: [256:0]u8,
|
||||
utilization: u32,
|
||||
memory_used: u64,
|
||||
memory_total: u64,
|
||||
temperature: u32,
|
||||
power_draw: u32,
|
||||
clock_sm: u32,
|
||||
clock_memory: u32,
|
||||
uuid: [64:0]u8,
|
||||
vbios_version: [32:0]u8,
|
||||
};
|
||||
|
||||
/// NVML handle with loaded functions
|
||||
pub const NVML = struct {
|
||||
lib: DynLib,
|
||||
available: bool,
|
||||
|
||||
// Function pointers
|
||||
init: nvmlInit_v2_fn,
|
||||
shutdown: nvmlShutdown_fn,
|
||||
get_count: nvmlDeviceGetCount_fn,
|
||||
get_handle_by_index: nvmlDeviceGetHandleByIndex_v2_fn,
|
||||
get_name: ?nvmlDeviceGetName_fn,
|
||||
get_utilization: ?nvmlDeviceGetUtilizationRates_fn,
|
||||
get_memory: ?nvmlDeviceGetMemoryInfo_fn,
|
||||
get_temperature: ?nvmlDeviceGetTemperature_fn,
|
||||
get_power_usage: ?nvmlDeviceGetPowerUsage_fn,
|
||||
get_clock: ?nvmlDeviceGetClockInfo_fn,
|
||||
get_uuid: ?nvmlDeviceGetUUID_fn,
|
||||
get_vbios: ?nvmlDeviceGetVbiosVersion_fn,
|
||||
|
||||
last_error: [256:0]u8,
|
||||
|
||||
/// Load NVML dynamically
|
||||
pub fn load() !?NVML {
|
||||
var nvml: NVML = undefined;
|
||||
|
||||
// Try platform-specific library names
|
||||
const lib_names = switch (builtin.os.tag) {
|
||||
.windows => &[_][]const u8{
|
||||
"nvml.dll",
|
||||
"C:\\Windows\\System32\\nvml.dll",
|
||||
},
|
||||
.linux => &[_][]const u8{
|
||||
"libnvidia-ml.so.1",
|
||||
"libnvidia-ml.so",
|
||||
},
|
||||
else => return null, // NVML not supported on other platforms
|
||||
};
|
||||
|
||||
// Try to load library
|
||||
var loaded = false;
|
||||
for (lib_names) |name| {
|
||||
if (DynLib.open(name)) |lib| {
|
||||
nvml.lib = lib;
|
||||
loaded = true;
|
||||
break;
|
||||
} else |_| continue;
|
||||
}
|
||||
|
||||
if (!loaded) {
|
||||
return null; // NVML not available (no NVIDIA driver)
|
||||
}
|
||||
|
||||
// Load required functions
|
||||
nvml.init = @ptrCast(nvml.lib.lookup("nvmlInit_v2") orelse return error.InitNotFound);
|
||||
nvml.shutdown = @ptrCast(nvml.lib.lookup("nvmlShutdown") orelse return error.ShutdownNotFound);
|
||||
nvml.get_count = @ptrCast(nvml.lib.lookup("nvmlDeviceGetCount") orelse return error.GetCountNotFound);
|
||||
nvml.get_handle_by_index = @ptrCast(nvml.lib.lookup("nvmlDeviceGetHandleByIndex_v2") orelse return error.GetHandleNotFound);
|
||||
|
||||
// Load optional functions
|
||||
nvml.get_name = @ptrCast(nvml.lib.lookup("nvmlDeviceGetName"));
|
||||
nvml.get_utilization = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUtilizationRates"));
|
||||
nvml.get_memory = @ptrCast(nvml.lib.lookup("nvmlDeviceGetMemoryInfo"));
|
||||
nvml.get_temperature = @ptrCast(nvml.lib.lookup("nvmlDeviceGetTemperature"));
|
||||
nvml.get_power_usage = @ptrCast(nvml.lib.lookup("nvmlDeviceGetPowerUsage"));
|
||||
nvml.get_clock = @ptrCast(nvml.lib.lookup("nvmlDeviceGetClockInfo"));
|
||||
nvml.get_uuid = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUUID"));
|
||||
nvml.get_vbios = @ptrCast(nvml.lib.lookup("nvmlDeviceGetVbiosVersion"));
|
||||
|
||||
// Initialize NVML
|
||||
const result = nvml.init();
|
||||
if (result != NVML_SUCCESS) {
|
||||
nvml.setError("NVML initialization failed");
|
||||
nvml.lib.close();
|
||||
return error.NVMLInitFailed;
|
||||
}
|
||||
|
||||
nvml.available = true;
|
||||
return nvml;
|
||||
}
|
||||
|
||||
/// Unload NVML
|
||||
pub fn unload(self: *NVML) void {
|
||||
if (self.available) {
|
||||
_ = self.shutdown();
|
||||
}
|
||||
self.lib.close();
|
||||
}
|
||||
|
||||
/// Check if NVML is available
|
||||
pub fn isAvailable(self: NVML) bool {
|
||||
return self.available;
|
||||
}
|
||||
|
||||
/// Get last error message
|
||||
pub fn getLastError(self: NVML) []const u8 {
|
||||
return std.mem.sliceTo(&self.last_error, 0);
|
||||
}
|
||||
|
||||
fn setError(self: *NVML, msg: []const u8) void {
|
||||
@memset(&self.last_error, 0);
|
||||
const len = @min(msg.len, self.last_error.len - 1);
|
||||
@memcpy(self.last_error[0..len], msg[0..len]);
|
||||
}
|
||||
|
||||
/// Get number of GPUs
|
||||
pub fn getGPUCount(self: *NVML) !u32 {
|
||||
var count: c_uint = 0;
|
||||
const result = self.get_count(&count);
|
||||
if (result != NVML_SUCCESS) {
|
||||
self.setError("Failed to get GPU count");
|
||||
return error.GetCountFailed;
|
||||
}
|
||||
return @intCast(count);
|
||||
}
|
||||
|
||||
/// Get GPU info by index
|
||||
pub fn getGPUInfo(self: *NVML, index: u32) !GPUInfo {
|
||||
var info: GPUInfo = .{
|
||||
.index = index,
|
||||
.name = std.mem.zeroes([256:0]u8),
|
||||
.utilization = 0,
|
||||
.memory_used = 0,
|
||||
.memory_total = 0,
|
||||
.temperature = 0,
|
||||
.power_draw = 0,
|
||||
.clock_sm = 0,
|
||||
.clock_memory = 0,
|
||||
.uuid = std.mem.zeroes([64:0]u8),
|
||||
.vbios_version = std.mem.zeroes([32:0]u8),
|
||||
};
|
||||
|
||||
var device: nvmlDevice_t = undefined;
|
||||
var result = self.get_handle_by_index(index, &device);
|
||||
if (result != NVML_SUCCESS) {
|
||||
self.setError("Failed to get device handle");
|
||||
return error.GetHandleFailed;
|
||||
}
|
||||
|
||||
// Get name
|
||||
if (self.get_name) |func| {
|
||||
_ = func(device, &info.name, @sizeOf(@TypeOf(info.name)));
|
||||
}
|
||||
|
||||
// Get utilization
|
||||
if (self.get_utilization) |func| {
|
||||
var util: nvmlUtilization_t = undefined;
|
||||
result = func(device, &util);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.utilization = @intCast(util.gpu);
|
||||
}
|
||||
}
|
||||
|
||||
// Get memory
|
||||
if (self.get_memory) |func| {
|
||||
var mem: nvmlMemory_t = undefined;
|
||||
result = func(device, &mem);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.memory_used = mem.used;
|
||||
info.memory_total = mem.total;
|
||||
}
|
||||
}
|
||||
|
||||
// Get temperature
|
||||
if (self.get_temperature) |func| {
|
||||
var temp: c_uint = 0;
|
||||
result = func(device, NVML_TEMPERATURE_GPU, &temp);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.temperature = @intCast(temp);
|
||||
}
|
||||
}
|
||||
|
||||
// Get power usage
|
||||
if (self.get_power_usage) |func| {
|
||||
var power: c_uint = 0;
|
||||
result = func(device, &power);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.power_draw = @intCast(power);
|
||||
}
|
||||
}
|
||||
|
||||
// Get clocks
|
||||
if (self.get_clock) |func| {
|
||||
var clock: c_uint = 0;
|
||||
result = func(device, NVML_CLOCK_SM, &clock);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.clock_sm = @intCast(clock);
|
||||
}
|
||||
result = func(device, NVML_CLOCK_MEM, &clock);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info.clock_memory = @intCast(clock);
|
||||
}
|
||||
}
|
||||
|
||||
// Get UUID
|
||||
if (self.get_uuid) |func| {
|
||||
_ = func(device, &info.uuid, @sizeOf(@TypeOf(info.uuid)));
|
||||
}
|
||||
|
||||
// Get VBIOS version
|
||||
if (self.get_vbios) |func| {
|
||||
_ = func(device, &info.vbios_version, @sizeOf(@TypeOf(info.vbios_version)));
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
/// Get info for all GPUs
|
||||
pub fn getAllGPUInfo(self: *NVML, allocator: std.mem.Allocator) ![]GPUInfo {
|
||||
const count = try self.getGPUCount();
|
||||
if (count == 0) return &[_]GPUInfo{};
|
||||
|
||||
var gpus = try allocator.alloc(GPUInfo, count);
|
||||
errdefer allocator.free(gpus);
|
||||
|
||||
for (0..count) |i| {
|
||||
gpus[i] = try self.getGPUInfo(@intCast(i));
|
||||
}
|
||||
|
||||
return gpus;
|
||||
}
|
||||
};
|
||||
|
||||
// Convenience functions for simple use cases
|
||||
|
||||
/// Quick check if NVML is available (creates and destroys temporary handle)
|
||||
pub fn isNVMLAvailable() bool {
|
||||
if (NVML.load()) |maybe_nvml| {
|
||||
if (maybe_nvml) |nvml| {
|
||||
var nvml_mut = nvml;
|
||||
defer nvml_mut.unload();
|
||||
return nvml_mut.isAvailable();
|
||||
}
|
||||
} else |_| {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Format GPU info as string for display
|
||||
pub fn formatGPUInfo(allocator: std.mem.Allocator, gpus: []const GPUInfo) ![]u8 {
|
||||
var buf = std.ArrayList(u8).init(allocator);
|
||||
defer buf.deinit();
|
||||
|
||||
const writer = buf.writer();
|
||||
|
||||
try writer.writeAll("GPU Status (NVML)\n");
|
||||
try writer.writeAll("═" ** 50);
|
||||
try writer.writeAll("\n\n");
|
||||
|
||||
for (gpus) |gpu| {
|
||||
const name = std.mem.sliceTo(&gpu.name, 0);
|
||||
try writer.print("🎮 GPU {d}: {s}\n", .{ gpu.index, name });
|
||||
try writer.print(" Utilization: {d}%\n", .{gpu.utilization});
|
||||
try writer.print(" Memory: {d}/{d} MB\n", .{
|
||||
gpu.memory_used / 1024 / 1024,
|
||||
gpu.memory_total / 1024 / 1024,
|
||||
});
|
||||
try writer.print(" Temperature: {d}°C\n", .{gpu.temperature});
|
||||
if (gpu.power_draw > 0) {
|
||||
try writer.print(" Power: {d:.1} W\n", .{@as(f64, @floatFromInt(gpu.power_draw)) / 1000.0});
|
||||
}
|
||||
if (gpu.clock_sm > 0) {
|
||||
try writer.print(" SM Clock: {d} MHz\n", .{gpu.clock_sm});
|
||||
}
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
|
||||
return buf.toOwnedSlice();
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
//go:build !native_libs
|
||||
// +build !native_libs
|
||||
//go:build !cgo || !native_libs
|
||||
// +build !cgo !native_libs
|
||||
|
||||
package queue
|
||||
|
||||
|
|
|
|||
|
|
@ -380,19 +380,16 @@ func (c *Config) Validate() error {
|
|||
// - UUID-style gpu_visible_device_ids is NVIDIA-only.
|
||||
vendor := strings.ToLower(strings.TrimSpace(c.GPUVendor))
|
||||
if len(c.GPUVisibleDevices) > 0 && len(c.GPUVisibleDeviceIDs) > 0 {
|
||||
return fmt.Errorf("gpu_visible_devices and gpu_visible_device_ids are mutually exclusive")
|
||||
}
|
||||
if len(c.GPUVisibleDeviceIDs) > 0 {
|
||||
if vendor != string(GPUTypeNVIDIA) {
|
||||
return fmt.Errorf(
|
||||
"gpu_visible_device_ids is only supported when gpu_vendor is %q",
|
||||
"visible_device_ids is only supported when gpu_vendor is %q",
|
||||
string(GPUTypeNVIDIA),
|
||||
)
|
||||
}
|
||||
for _, id := range c.GPUVisibleDeviceIDs {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
return fmt.Errorf("gpu_visible_device_ids contains an empty value")
|
||||
return fmt.Errorf("visible_device_ids contains an empty value")
|
||||
}
|
||||
if !strings.HasPrefix(id, "GPU-") {
|
||||
return fmt.Errorf("gpu_visible_device_ids values must start with %q, got %q", "GPU-", id)
|
||||
|
|
|
|||
|
|
@ -98,6 +98,14 @@ type AppleDetector struct {
|
|||
}
|
||||
|
||||
func (d *AppleDetector) DetectGPUCount() int {
|
||||
// First try actual macOS GPU detection
|
||||
if IsMacOS() {
|
||||
count, err := GetMacOSGPUCount()
|
||||
if err == nil && count > 0 {
|
||||
return count
|
||||
}
|
||||
}
|
||||
|
||||
if n, ok := envInt("FETCH_ML_GPU_COUNT"); ok && n >= 0 {
|
||||
return n
|
||||
}
|
||||
|
|
|
|||
279
internal/worker/gpu_macos.go
Normal file
279
internal/worker/gpu_macos.go
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
//go:build darwin
|
||||
// +build darwin
|
||||
|
||||
package worker
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MacOSGPUInfo holds information about a macOS GPU
|
||||
type MacOSGPUInfo struct {
|
||||
Index uint32 `json:"index"`
|
||||
Name string `json:"name"`
|
||||
ChipsetModel string `json:"chipset_model"`
|
||||
VRAM_MB uint32 `json:"vram_mb"`
|
||||
IsIntegrated bool `json:"is_integrated"`
|
||||
IsAppleSilicon bool `json:"is_apple_silicon"`
|
||||
// Real-time metrics from powermetrics (if available)
|
||||
UtilizationPercent uint32 `json:"utilization_percent,omitempty"`
|
||||
PowerMW uint32 `json:"power_mw,omitempty"`
|
||||
TemperatureC uint32 `json:"temperature_c,omitempty"`
|
||||
}
|
||||
|
||||
// PowermetricsData holds GPU metrics from powermetrics
|
||||
type PowermetricsData struct {
|
||||
GPUUtilization float64
|
||||
GPUPower float64
|
||||
GPUTemperature float64
|
||||
HasData bool
|
||||
}
|
||||
|
||||
// IsMacOS returns true if running on macOS
|
||||
func IsMacOS() bool {
|
||||
return runtime.GOOS == "darwin"
|
||||
}
|
||||
|
||||
// IsAppleSilicon checks if running on Apple Silicon
|
||||
func IsAppleSilicon() bool {
|
||||
if runtime.GOOS != "darwin" {
|
||||
return false
|
||||
}
|
||||
// Check machine hardware name
|
||||
out, err := exec.Command("uname", "-m").Output()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return strings.TrimSpace(string(out)) == "arm64"
|
||||
}
|
||||
|
||||
// GetMacOSGPUCount returns the number of GPUs on macOS
|
||||
func GetMacOSGPUCount() (int, error) {
|
||||
if runtime.GOOS != "darwin" {
|
||||
return 0, fmt.Errorf("not running on macOS")
|
||||
}
|
||||
|
||||
// Use system_profiler to get GPU count
|
||||
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
// Fall back to gfxutil if system_profiler fails
|
||||
return getGPUCountViaGfxutil()
|
||||
}
|
||||
|
||||
// Parse JSON output
|
||||
var data map[string]interface{}
|
||||
if err := json.Unmarshal(out, &data); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Extract display items
|
||||
if spData, ok := data["SPDisplaysDataType"].([]interface{}); ok {
|
||||
return len(spData), nil
|
||||
}
|
||||
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// getGPUCountViaGfxutil uses gfxutil to count GPUs (fallback)
|
||||
func getGPUCountViaGfxutil() (int, error) {
|
||||
// gfxutil is available on macOS
|
||||
cmd := exec.Command("gfxutil", "-f", "display")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Count display paths (one per GPU typically)
|
||||
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
|
||||
count := 0
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, "Display") {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// GetMacOSGPUInfo returns detailed information about macOS GPUs
|
||||
func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) {
|
||||
if runtime.GOOS != "darwin" {
|
||||
return nil, fmt.Errorf("not running on macOS")
|
||||
}
|
||||
|
||||
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var data map[string]interface{}
|
||||
if err := json.Unmarshal(out, &data); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
spData, ok := data["SPDisplaysDataType"].([]interface{})
|
||||
if !ok {
|
||||
return []MacOSGPUInfo{}, nil
|
||||
}
|
||||
|
||||
isAppleSilicon := IsAppleSilicon()
|
||||
var gpus []MacOSGPUInfo
|
||||
|
||||
for i, item := range spData {
|
||||
if gpuData, ok := item.(map[string]interface{}); ok {
|
||||
info := MacOSGPUInfo{
|
||||
Index: uint32(i),
|
||||
IsAppleSilicon: isAppleSilicon,
|
||||
}
|
||||
|
||||
// Extract chipset model
|
||||
if model, ok := gpuData["sppci_model"].(string); ok {
|
||||
info.ChipsetModel = model
|
||||
info.Name = model
|
||||
}
|
||||
|
||||
// Check for shared memory (integrated GPU)
|
||||
if _, ok := gpuData["sppci_vram_shared"]; ok {
|
||||
info.IsIntegrated = true
|
||||
}
|
||||
|
||||
// Extract VRAM
|
||||
if vram, ok := gpuData["sppci_vram"].(string); ok {
|
||||
// Parse "16384 MB"
|
||||
parts := strings.Fields(vram)
|
||||
if len(parts) >= 1 {
|
||||
if mb, err := strconv.ParseUint(parts[0], 10, 32); err == nil {
|
||||
info.VRAM_MB = uint32(mb)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpus = append(gpus, info)
|
||||
}
|
||||
}
|
||||
|
||||
return gpus, nil
|
||||
}
|
||||
|
||||
// GetPowermetricsData tries to get real-time GPU metrics from powermetrics
|
||||
// Requires sudo access. Returns empty data if not available.
|
||||
func GetPowermetricsData() (*PowermetricsData, error) {
|
||||
// powermetrics requires sudo, so this may fail
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, "powermetrics", "--samplers", "gpu_power", "-n", "1", "-i", "100")
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
// powermetrics not available or no permission
|
||||
return &PowermetricsData{HasData: false}, nil
|
||||
}
|
||||
|
||||
data := &PowermetricsData{HasData: false}
|
||||
|
||||
// Parse powermetrics output
|
||||
// Example: "GPU Power: 5000 mW" or "GPU utilization: 45%"
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(out)))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// Parse GPU utilization
|
||||
if strings.Contains(line, "GPU utilization") || strings.Contains(line, "GPU active") {
|
||||
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*%`)
|
||||
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
|
||||
if util, err := strconv.ParseFloat(matches[1], 64); err == nil {
|
||||
data.GPUUtilization = util
|
||||
data.HasData = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse GPU power
|
||||
if strings.Contains(line, "GPU Power") || strings.Contains(line, "GPU power") {
|
||||
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*mW`)
|
||||
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
|
||||
if power, err := strconv.ParseFloat(matches[1], 64); err == nil {
|
||||
data.GPUPower = power
|
||||
data.HasData = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse GPU temperature (if available)
|
||||
if strings.Contains(line, "GPU Temperature") || strings.Contains(line, "GPU temp") {
|
||||
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*C`)
|
||||
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
|
||||
if temp, err := strconv.ParseFloat(matches[1], 64); err == nil {
|
||||
data.GPUTemperature = temp
|
||||
data.HasData = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// FormatMacOSGPUStatus formats GPU status for display
|
||||
func FormatMacOSGPUStatus() (string, error) {
|
||||
gpus, err := GetMacOSGPUInfo()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Try to get real-time metrics from powermetrics
|
||||
powermetrics, _ := GetPowermetricsData()
|
||||
|
||||
if len(gpus) == 0 {
|
||||
return "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS", nil
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
|
||||
if IsAppleSilicon() {
|
||||
b.WriteString("GPU Status (macOS - Apple Silicon)\n")
|
||||
} else {
|
||||
b.WriteString("GPU Status (macOS)\n")
|
||||
}
|
||||
b.WriteString(strings.Repeat("═", 50) + "\n\n")
|
||||
|
||||
for _, gpu := range gpus {
|
||||
fmt.Fprintf(&b, "🎮 GPU %d: %s\n", gpu.Index, gpu.Name)
|
||||
if gpu.IsAppleSilicon {
|
||||
b.WriteString(" Type: Apple Silicon (Unified Memory)\n")
|
||||
} else if gpu.IsIntegrated {
|
||||
b.WriteString(" Type: Integrated (Shared Memory)\n")
|
||||
} else {
|
||||
fmt.Fprintf(&b, " VRAM: %d MB\n", gpu.VRAM_MB)
|
||||
}
|
||||
|
||||
// Display powermetrics data if available
|
||||
if powermetrics != nil && powermetrics.HasData {
|
||||
if powermetrics.GPUUtilization > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Utilization: %.1f%%\n", powermetrics.GPUUtilization))
|
||||
}
|
||||
if powermetrics.GPUPower > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Power: %.1f W\n", powermetrics.GPUPower/1000))
|
||||
}
|
||||
if powermetrics.GPUTemperature > 0 {
|
||||
b.WriteString(fmt.Sprintf(" Temperature: %.0f°C\n", powermetrics.GPUTemperature))
|
||||
}
|
||||
}
|
||||
b.WriteString("\n")
|
||||
}
|
||||
|
||||
if powermetrics == nil || !powermetrics.HasData {
|
||||
b.WriteString("💡 Note: Run with sudo for real-time GPU metrics via powermetrics\n")
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
41
internal/worker/gpu_macos_stub.go
Normal file
41
internal/worker/gpu_macos_stub.go
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
//go:build !darwin
|
||||
// +build !darwin
|
||||
|
||||
package worker
|
||||
|
||||
import "errors"
|
||||
|
||||
// MacOSGPUInfo placeholder for non-macOS builds
|
||||
type MacOSGPUInfo struct {
|
||||
Index uint32
|
||||
Name string
|
||||
ChipsetModel string
|
||||
VRAM_MB uint32
|
||||
IsIntegrated bool
|
||||
IsAppleSilicon bool
|
||||
}
|
||||
|
||||
// IsMacOS returns false on non-macOS
|
||||
func IsMacOS() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// IsAppleSilicon returns false on non-macOS
|
||||
func IsAppleSilicon() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// GetMacOSGPUCount returns error on non-macOS
|
||||
func GetMacOSGPUCount() (int, error) {
|
||||
return 0, errors.New("macOS GPU monitoring only available on macOS")
|
||||
}
|
||||
|
||||
// GetMacOSGPUInfo returns error on non-macOS
|
||||
func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) {
|
||||
return nil, errors.New("macOS GPU monitoring only available on macOS")
|
||||
}
|
||||
|
||||
// FormatMacOSGPUStatus returns error on non-macOS
|
||||
func FormatMacOSGPUStatus() (string, error) {
|
||||
return "", errors.New("macOS GPU monitoring only available on macOS")
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
//go:build cgo && native_libs
|
||||
// +build cgo,native_libs
|
||||
//go:build cgo && native_libs && linux
|
||||
// +build cgo,native_libs,linux
|
||||
|
||||
package worker
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,26 @@
|
|||
//go:build cgo && !native_libs
|
||||
// +build cgo,!native_libs
|
||||
//go:build !cgo || !native_libs || !linux
|
||||
// +build !cgo !native_libs !linux
|
||||
|
||||
package worker
|
||||
|
||||
import "errors"
|
||||
|
||||
// Stub implementations when native_libs build tag is not present
|
||||
// GPUInfo provides comprehensive GPU information
|
||||
type GPUInfo struct {
|
||||
Index uint32
|
||||
Name string
|
||||
Utilization uint32
|
||||
MemoryUsed uint64
|
||||
MemoryTotal uint64
|
||||
Temperature uint32
|
||||
PowerDraw uint32
|
||||
ClockSM uint32
|
||||
ClockMemory uint32
|
||||
PCIeGen uint32
|
||||
PCIeWidth uint32
|
||||
UUID string
|
||||
VBIOSVersion string
|
||||
}
|
||||
|
||||
func InitNVML() error {
|
||||
return errors.New("NVML requires native_libs build tag")
|
||||
|
|
@ -18,10 +33,10 @@ func IsNVMLAvailable() bool {
|
|||
}
|
||||
|
||||
func GetGPUCount() (int, error) {
|
||||
return 0, errors.New("NVML requires native_libs build tag")
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func GetGPUInfo(index uint32) (*GPUInfo, error) {
|
||||
func GetGPUInfo(index uint32) (*GPUInfo, error) { // <-- was missing
|
||||
return nil, errors.New("NVML requires native_libs build tag")
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,11 +15,6 @@ func init() {
|
|||
log.Printf("[native] Native libraries disabled (build with -tags native_libs to enable)")
|
||||
}
|
||||
|
||||
// dirOverallSHA256HexNative is not available without native_libs build tag.
|
||||
func dirOverallSHA256HexNative(_ string) (string, error) {
|
||||
return "", errors.New("native hash requires native_libs build tag")
|
||||
}
|
||||
|
||||
// HashFilesBatchNative is not available without native_libs build tag.
|
||||
func HashFilesBatchNative(paths []string) ([]string, error) {
|
||||
return nil, errors.New("native batch hash requires native_libs build tag")
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
package worker
|
||||
|
||||
// #cgo LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
|
||||
// #cgo darwin LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
|
||||
// #cgo linux LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash -lnvml_gpu -lnvidia-ml
|
||||
// #include "../../native/dataset_hash/dataset_hash.h"
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
|
@ -25,8 +26,6 @@ var (
|
|||
ctxInitTime time.Time
|
||||
)
|
||||
|
||||
// getHashContext returns a cached hash context, initializing it once.
|
||||
// Context reuse eliminates 5-20ms of thread pool creation per hash operation.
|
||||
func getHashContext() *C.fh_context_t {
|
||||
hashCtxOnce.Do(func() {
|
||||
start := time.Now()
|
||||
|
|
@ -38,9 +37,8 @@ func getHashContext() *C.fh_context_t {
|
|||
return hashCtx
|
||||
}
|
||||
|
||||
// dirOverallSHA256HexNative implementation with native library.
|
||||
func dirOverallSHA256HexNative(root string) (string, error) {
|
||||
ctx := getHashContext() // Reuse cached context: ~0.1μs vs 5-20ms
|
||||
ctx := getHashContext()
|
||||
|
||||
croot := C.CString(root)
|
||||
defer C.free(unsafe.Pointer(croot))
|
||||
|
|
@ -58,28 +56,23 @@ func dirOverallSHA256HexNative(root string) (string, error) {
|
|||
return C.GoString(result), nil
|
||||
}
|
||||
|
||||
// GetSIMDImplName returns the native SHA256 implementation name.
|
||||
func GetSIMDImplName() string {
|
||||
return C.GoString(C.fh_get_simd_impl_name())
|
||||
}
|
||||
|
||||
// HasSIMDSHA256 returns true if SIMD SHA256 is available.
|
||||
func HasSIMDSHA256() bool {
|
||||
return C.fh_has_simd_sha256() == 1
|
||||
}
|
||||
|
||||
// ScanArtifactsNative falls back to Go implementation.
|
||||
func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
|
||||
return ScanArtifacts(runDir)
|
||||
}
|
||||
|
||||
// ExtractTarGzNative falls back to Go implementation.
|
||||
func ExtractTarGzNative(archivePath, dstDir string) error {
|
||||
return ExtractTarGz(archivePath, dstDir)
|
||||
}
|
||||
|
||||
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
|
||||
// This allows explicit native library usage when -tags native_libs is enabled.
|
||||
func DirOverallSHA256HexNative(root string) (string, error) {
|
||||
return dirOverallSHA256HexNative(root)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,11 +9,6 @@ import (
|
|||
"github.com/jfraeys/fetch_ml/internal/manifest"
|
||||
)
|
||||
|
||||
// dirOverallSHA256HexNative is not available without CGO.
|
||||
func dirOverallSHA256HexNative(root string) (string, error) {
|
||||
return "", errors.New("native hash requires CGO")
|
||||
}
|
||||
|
||||
// HashFilesBatchNative is not available without CGO.
|
||||
func HashFilesBatchNative(paths []string) ([]string, error) {
|
||||
return nil, errors.New("native batch hash requires CGO")
|
||||
|
|
|
|||
|
|
@ -31,7 +31,11 @@ if(NVML_LIBRARY AND NVML_INCLUDE_DIR)
|
|||
message(STATUS "Found NVML: ${NVML_LIBRARY}")
|
||||
message(STATUS "NVML include: ${NVML_INCLUDE_DIR}")
|
||||
else()
|
||||
message(WARNING "NVML not found. GPU monitoring will be disabled.")
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
message(WARNING "NVML not found. NVIDIA GPU monitoring will be disabled.")
|
||||
else()
|
||||
message(STATUS "NVML not available on ${CMAKE_SYSTEM_NAME}. Using platform-specific GPU monitoring.")
|
||||
endif()
|
||||
# Create stub library
|
||||
target_compile_definitions(nvml_gpu PRIVATE NVML_STUB)
|
||||
endif()
|
||||
|
|
|
|||
272
native/nvml_gpu/nvml_dynamic.c
Normal file
272
native/nvml_gpu/nvml_dynamic.c
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
#include "nvml_dynamic.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
// NVML type definitions (from nvml.h)
|
||||
typedef int nvmlReturn_t;
|
||||
typedef void* nvmlDevice_t;
|
||||
typedef struct {
|
||||
unsigned int gpu;
|
||||
unsigned int memory;
|
||||
} nvmlUtilization_t;
|
||||
typedef struct {
|
||||
unsigned long long total;
|
||||
unsigned long long free;
|
||||
unsigned long long used;
|
||||
} nvmlMemory_t;
|
||||
|
||||
// Function pointer types
|
||||
typedef nvmlReturn_t (*nvmlInit_v2_fn)(void);
|
||||
typedef nvmlReturn_t (*nvmlShutdown_fn)(void);
|
||||
typedef nvmlReturn_t (*nvmlSystemGetDriverVersion_fn)(char*, unsigned int);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetCount_fn)(unsigned int*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetHandleByIndex_v2_fn)(unsigned int, nvmlDevice_t*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetName_fn)(nvmlDevice_t, char*, unsigned int);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetUtilizationRates_fn)(nvmlDevice_t, nvmlUtilization_t*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetMemoryInfo_fn)(nvmlDevice_t, nvmlMemory_t*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetTemperature_fn)(nvmlDevice_t, unsigned int, unsigned int*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetPowerUsage_fn)(nvmlDevice_t, unsigned int*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetClockInfo_fn)(nvmlDevice_t, unsigned int, unsigned int*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetPcieThroughput_fn)(nvmlDevice_t, unsigned int, unsigned int*);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetUUID_fn)(nvmlDevice_t, char*, unsigned int);
|
||||
typedef nvmlReturn_t (*nvmlDeviceGetVbiosVersion_fn)(nvmlDevice_t, char*, unsigned int);
|
||||
|
||||
// NVML constants
|
||||
#define NVML_SUCCESS 0
|
||||
#define NVML_TEMPERATURE_GPU 0
|
||||
#define NVML_CLOCK_SM 0
|
||||
#define NVML_CLOCK_MEM 1
|
||||
#define NVML_PCIE_UTIL_TX_BYTES 0
|
||||
#define NVML_PCIE_UTIL_RX_BYTES 1
|
||||
|
||||
struct nvml_dynamic {
|
||||
void* handle;
|
||||
char last_error[256];
|
||||
int available;
|
||||
|
||||
// Function pointers
|
||||
nvmlInit_v2_fn init;
|
||||
nvmlShutdown_fn shutdown;
|
||||
nvmlSystemGetDriverVersion_fn get_driver_version;
|
||||
nvmlDeviceGetCount_fn get_count;
|
||||
nvmlDeviceGetHandleByIndex_v2_fn get_handle_by_index;
|
||||
nvmlDeviceGetName_fn get_name;
|
||||
nvmlDeviceGetUtilizationRates_fn get_utilization;
|
||||
nvmlDeviceGetMemoryInfo_fn get_memory;
|
||||
nvmlDeviceGetTemperature_fn get_temperature;
|
||||
nvmlDeviceGetPowerUsage_fn get_power_usage;
|
||||
nvmlDeviceGetClockInfo_fn get_clock;
|
||||
nvmlDeviceGetUUID_fn get_uuid;
|
||||
nvmlDeviceGetVbiosVersion_fn get_vbios;
|
||||
};
|
||||
|
||||
static void set_error(nvml_dynamic_t* nvml, const char* msg) {
|
||||
if (nvml) {
|
||||
strncpy(nvml->last_error, msg, sizeof(nvml->last_error) - 1);
|
||||
nvml->last_error[sizeof(nvml->last_error) - 1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
static void* load_lib(const char* name) {
|
||||
return LoadLibraryA(name);
|
||||
}
|
||||
static void* get_sym(void* handle, const char* name) {
|
||||
return (void*)GetProcAddress((HMODULE)handle, name);
|
||||
}
|
||||
static void close_lib(void* handle) {
|
||||
FreeLibrary((HMODULE)handle);
|
||||
}
|
||||
#else
|
||||
static void* load_lib(const char* name) {
|
||||
return dlopen(name, RTLD_NOW);
|
||||
}
|
||||
static void* get_sym(void* handle, const char* name) {
|
||||
return dlsym(handle, name);
|
||||
}
|
||||
static void close_lib(void* handle) {
|
||||
dlclose(handle);
|
||||
}
|
||||
#endif
|
||||
|
||||
nvml_dynamic_t* nvml_load(void) {
|
||||
nvml_dynamic_t* nvml = (nvml_dynamic_t*)calloc(1, sizeof(nvml_dynamic_t));
|
||||
if (!nvml) return NULL;
|
||||
|
||||
// Try to load NVML library
|
||||
#ifdef _WIN32
|
||||
nvml->handle = load_lib("nvml.dll");
|
||||
if (!nvml->handle) {
|
||||
nvml->handle = load_lib("C:\\Windows\\System32\\nvml.dll");
|
||||
}
|
||||
#else
|
||||
nvml->handle = load_lib("libnvidia-ml.so.1");
|
||||
if (!nvml->handle) {
|
||||
nvml->handle = load_lib("libnvidia-ml.so");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!nvml->handle) {
|
||||
set_error(nvml, "NVML library not found - NVIDIA driver may not be installed");
|
||||
nvml->available = 0;
|
||||
return nvml;
|
||||
}
|
||||
|
||||
// Load function pointers
|
||||
nvml->init = (nvmlInit_v2_fn)get_sym(nvml->handle, "nvmlInit_v2");
|
||||
nvml->shutdown = (nvmlShutdown_fn)get_sym(nvml->handle, "nvmlShutdown");
|
||||
nvml->get_driver_version = (nvmlSystemGetDriverVersion_fn)get_sym(nvml->handle, "nvmlSystemGetDriverVersion");
|
||||
nvml->get_count = (nvmlDeviceGetCount_fn)get_sym(nvml->handle, "nvmlDeviceGetCount");
|
||||
nvml->get_handle_by_index = (nvmlDeviceGetHandleByIndex_v2_fn)get_sym(nvml->handle, "nvmlDeviceGetHandleByIndex_v2");
|
||||
nvml->get_name = (nvmlDeviceGetName_fn)get_sym(nvml->handle, "nvmlDeviceGetName");
|
||||
nvml->get_utilization = (nvmlDeviceGetUtilizationRates_fn)get_sym(nvml->handle, "nvmlDeviceGetUtilizationRates");
|
||||
nvml->get_memory = (nvmlDeviceGetMemoryInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetMemoryInfo");
|
||||
nvml->get_temperature = (nvmlDeviceGetTemperature_fn)get_sym(nvml->handle, "nvmlDeviceGetTemperature");
|
||||
nvml->get_power_usage = (nvmlDeviceGetPowerUsage_fn)get_sym(nvml->handle, "nvmlDeviceGetPowerUsage");
|
||||
nvml->get_clock = (nvmlDeviceGetClockInfo_fn)get_sym(nvml->handle, "nvmlDeviceGetClockInfo");
|
||||
nvml->get_uuid = (nvmlDeviceGetUUID_fn)get_sym(nvml->handle, "nvmlDeviceGetUUID");
|
||||
nvml->get_vbios = (nvmlDeviceGetVbiosVersion_fn)get_sym(nvml->handle, "nvmlDeviceGetVbiosVersion");
|
||||
|
||||
// Check required functions
|
||||
if (!nvml->init || !nvml->shutdown || !nvml->get_count || !nvml->get_handle_by_index) {
|
||||
set_error(nvml, "Failed to load required NVML functions");
|
||||
close_lib(nvml->handle);
|
||||
nvml->handle = NULL;
|
||||
nvml->available = 0;
|
||||
return nvml;
|
||||
}
|
||||
|
||||
// Initialize NVML
|
||||
nvmlReturn_t result = nvml->init();
|
||||
if (result != NVML_SUCCESS) {
|
||||
set_error(nvml, "Failed to initialize NVML");
|
||||
close_lib(nvml->handle);
|
||||
nvml->handle = NULL;
|
||||
nvml->available = 0;
|
||||
return nvml;
|
||||
}
|
||||
|
||||
nvml->available = 1;
|
||||
return nvml;
|
||||
}
|
||||
|
||||
void nvml_unload(nvml_dynamic_t* nvml) {
|
||||
if (!nvml) return;
|
||||
if (nvml->handle) {
|
||||
if (nvml->shutdown) {
|
||||
nvml->shutdown();
|
||||
}
|
||||
close_lib(nvml->handle);
|
||||
}
|
||||
free(nvml);
|
||||
}
|
||||
|
||||
int nvml_is_available(const nvml_dynamic_t* nvml) {
|
||||
return nvml ? nvml->available : 0;
|
||||
}
|
||||
|
||||
const char* nvml_last_error(const nvml_dynamic_t* nvml) {
|
||||
return nvml ? nvml->last_error : "NULL nvml handle";
|
||||
}
|
||||
|
||||
int nvml_get_gpu_count(nvml_dynamic_t* nvml) {
|
||||
if (!nvml || !nvml->available || !nvml->get_count) {
|
||||
return -1;
|
||||
}
|
||||
unsigned int count = 0;
|
||||
nvmlReturn_t result = nvml->get_count(&count);
|
||||
if (result != NVML_SUCCESS) {
|
||||
set_error(nvml, "Failed to get GPU count");
|
||||
return -1;
|
||||
}
|
||||
return (int)count;
|
||||
}
|
||||
|
||||
int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info) {
|
||||
if (!nvml || !nvml->available || !info) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->index = index;
|
||||
|
||||
nvmlDevice_t device;
|
||||
nvmlReturn_t result = nvml->get_handle_by_index(index, &device);
|
||||
if (result != NVML_SUCCESS) {
|
||||
set_error(nvml, "Failed to get device handle");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get name
|
||||
if (nvml->get_name) {
|
||||
nvml->get_name(device, info->name, sizeof(info->name));
|
||||
}
|
||||
|
||||
// Get utilization
|
||||
if (nvml->get_utilization) {
|
||||
nvmlUtilization_t util;
|
||||
result = nvml->get_utilization(device, &util);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->utilization = util.gpu;
|
||||
}
|
||||
}
|
||||
|
||||
// Get memory
|
||||
if (nvml->get_memory) {
|
||||
nvmlMemory_t mem;
|
||||
result = nvml->get_memory(device, &mem);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->memory_used = mem.used;
|
||||
info->memory_total = mem.total;
|
||||
}
|
||||
}
|
||||
|
||||
// Get temperature
|
||||
if (nvml->get_temperature) {
|
||||
unsigned int temp;
|
||||
result = nvml->get_temperature(device, NVML_TEMPERATURE_GPU, &temp);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->temperature = temp;
|
||||
}
|
||||
}
|
||||
|
||||
// Get power usage
|
||||
if (nvml->get_power_usage) {
|
||||
unsigned int power;
|
||||
result = nvml->get_power_usage(device, &power);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->power_draw = power;
|
||||
}
|
||||
}
|
||||
|
||||
// Get clocks
|
||||
if (nvml->get_clock) {
|
||||
unsigned int clock;
|
||||
result = nvml->get_clock(device, NVML_CLOCK_SM, &clock);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->clock_sm = clock;
|
||||
}
|
||||
result = nvml->get_clock(device, NVML_CLOCK_MEM, &clock);
|
||||
if (result == NVML_SUCCESS) {
|
||||
info->clock_memory = clock;
|
||||
}
|
||||
}
|
||||
|
||||
// Get UUID
|
||||
if (nvml->get_uuid) {
|
||||
nvml->get_uuid(device, info->uuid, sizeof(info->uuid));
|
||||
}
|
||||
|
||||
// Get VBIOS version
|
||||
if (nvml->get_vbios) {
|
||||
nvml->get_vbios(device, info->vbios_version, sizeof(info->vbios_version));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
53
native/nvml_gpu/nvml_dynamic.h
Normal file
53
native/nvml_gpu/nvml_dynamic.h
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#ifndef NVML_DYNAMIC_H
|
||||
#define NVML_DYNAMIC_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Opaque handle
|
||||
typedef struct nvml_dynamic nvml_dynamic_t;
|
||||
|
||||
// GPU info structure
|
||||
typedef struct {
|
||||
uint32_t index;
|
||||
char name[256];
|
||||
uint32_t utilization; // GPU utilization (0-100)
|
||||
uint64_t memory_used; // Memory used in bytes
|
||||
uint64_t memory_total; // Total memory in bytes
|
||||
uint32_t temperature; // Temperature in Celsius
|
||||
uint32_t power_draw; // Power draw in milliwatts
|
||||
uint32_t clock_sm; // SM clock in MHz
|
||||
uint32_t clock_memory; // Memory clock in MHz
|
||||
uint32_t pcie_gen; // PCIe generation
|
||||
uint32_t pcie_width; // PCIe link width
|
||||
char uuid[64]; // GPU UUID
|
||||
char vbios_version[32]; // VBIOS version
|
||||
} gpu_info_t;
|
||||
|
||||
// Load NVML dynamically (returns NULL if not available)
|
||||
nvml_dynamic_t* nvml_load(void);
|
||||
|
||||
// Unload NVML and free resources
|
||||
void nvml_unload(nvml_dynamic_t* nvml);
|
||||
|
||||
// Check if NVML is available and loaded
|
||||
int nvml_is_available(const nvml_dynamic_t* nvml);
|
||||
|
||||
// Get number of GPUs (-1 on error)
|
||||
int nvml_get_gpu_count(nvml_dynamic_t* nvml);
|
||||
|
||||
// Get GPU info by index (returns 0 on success)
|
||||
int nvml_get_gpu_info(nvml_dynamic_t* nvml, uint32_t index, gpu_info_t* info);
|
||||
|
||||
// Get last error message
|
||||
const char* nvml_last_error(const nvml_dynamic_t* nvml);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // NVML_DYNAMIC_H
|
||||
Loading…
Reference in a new issue