fetch_ml/cli/src/native/nvml.zig

const std = @import("std");
const builtin = @import("builtin");

/// NVML Dynamic Loader for CLI
/// Pure Zig implementation using dlopen/LoadLibrary
/// No build-time dependency on NVIDIA SDK

// Platform-specific dynamic loading
const DynLib = switch (builtin.os.tag) {
    .windows => struct {
        handle: std.os.windows.HMODULE,

        fn open(path: []const u8) !@This() {
            const wide_path = try std.os.windows.sliceToPrefixedFileW(path);
            const handle = std.os.windows.LoadLibraryW(&wide_path.data) orelse return error.LibraryNotFound;
            return .{ .handle = handle };
        }

        fn close(self: *@This()) void {
            _ = std.os.windows.FreeLibrary(self.handle);
        }

        fn lookup(self: @This(), name: []const u8) ?*anyopaque {
            return std.os.windows.GetProcAddress(self.handle, name);
        }
    },
    else => struct {
        handle: *anyopaque,

        // Extern declarations for dlopen/dlsym
        extern "c" fn dlopen(pathname: [*:0]const u8, mode: c_int) ?*anyopaque;
        extern "c" fn dlsym(handle: *anyopaque, symbol: [*:0]const u8) ?*anyopaque;
        extern "c" fn dlclose(handle: *anyopaque) c_int;

        const RTLD_NOW = 2;

        fn open(path: []const u8) !@This() {
            const c_path = try std.cstr.addNullByte(std.heap.c_allocator, path);
            defer std.heap.c_allocator.free(c_path);
            const handle = dlopen(c_path.ptr, RTLD_NOW) orelse return error.LibraryNotFound;
            return .{ .handle = handle };
        }

        fn close(self: *@This()) void {
            _ = dlclose(self.handle);
        }

        fn lookup(self: @This(), name: []const u8) ?*anyopaque {
            const c_name = std.cstr.addNullByte(std.heap.c_allocator, name) catch return null;
            defer std.heap.c_allocator.free(c_name);
            return dlsym(self.handle, c_name.ptr);
        }
    },
};

// NVML type definitions (mirrors nvml.h)
pub const nvmlReturn_t = c_int;
pub const nvmlDevice_t = *anyopaque;

pub const nvmlUtilization_t = extern struct {
    gpu: c_uint,
    memory: c_uint,
};

pub const nvmlMemory_t = extern struct {
    total: c_ulonglong,
    free: c_ulonglong,
    used: c_ulonglong,
};

// NVML constants
const NVML_SUCCESS = 0;
const NVML_TEMPERATURE_GPU = 0;
const NVML_CLOCK_SM = 0;
const NVML_CLOCK_MEM = 1;

// NVML function types
const nvmlInit_v2_fn = *const fn () callconv(.C) nvmlReturn_t;
const nvmlShutdown_fn = *const fn () callconv(.C) nvmlReturn_t;
const nvmlDeviceGetCount_fn = *const fn (*c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetHandleByIndex_v2_fn = *const fn (c_uint, *nvmlDevice_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetName_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetUtilizationRates_fn = *const fn (nvmlDevice_t, *nvmlUtilization_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetMemoryInfo_fn = *const fn (nvmlDevice_t, *nvmlMemory_t) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetTemperature_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetPowerUsage_fn = *const fn (nvmlDevice_t, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetClockInfo_fn = *const fn (nvmlDevice_t, c_uint, *c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetUUID_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;
const nvmlDeviceGetVbiosVersion_fn = *const fn (nvmlDevice_t, [*]u8, c_uint) callconv(.C) nvmlReturn_t;

/// GPU information structure
pub const GPUInfo = struct {
    index: u32,
    name: [256:0]u8,
    utilization: u32,
    memory_used: u64,
    memory_total: u64,
    temperature: u32,
    power_draw: u32,
    clock_sm: u32,
    clock_memory: u32,
    uuid: [64:0]u8,
    vbios_version: [32:0]u8,
};

/// NVML handle with loaded functions
pub const NVML = struct {
    lib: DynLib,
    available: bool,

    // Function pointers
    init: nvmlInit_v2_fn,
    shutdown: nvmlShutdown_fn,
    get_count: nvmlDeviceGetCount_fn,
    get_handle_by_index: nvmlDeviceGetHandleByIndex_v2_fn,
    get_name: ?nvmlDeviceGetName_fn,
    get_utilization: ?nvmlDeviceGetUtilizationRates_fn,
    get_memory: ?nvmlDeviceGetMemoryInfo_fn,
    get_temperature: ?nvmlDeviceGetTemperature_fn,
    get_power_usage: ?nvmlDeviceGetPowerUsage_fn,
    get_clock: ?nvmlDeviceGetClockInfo_fn,
    get_uuid: ?nvmlDeviceGetUUID_fn,
    get_vbios: ?nvmlDeviceGetVbiosVersion_fn,

    last_error: [256:0]u8,

    /// Load NVML dynamically
    pub fn load() !?NVML {
        var nvml: NVML = undefined;

        // Try platform-specific library names
        const lib_names = switch (builtin.os.tag) {
            .windows => &[_][]const u8{
                "nvml.dll",
                "C:\\Windows\\System32\\nvml.dll",
            },
            .linux => &[_][]const u8{
                "libnvidia-ml.so.1",
                "libnvidia-ml.so",
            },
            else => return null, // NVML not supported on other platforms
        };

        // Try to load library
        var loaded = false;
        for (lib_names) |name| {
            if (DynLib.open(name)) |lib| {
                nvml.lib = lib;
                loaded = true;
                break;
            } else |_| continue;
        }

        if (!loaded) {
            return null; // NVML not available (no NVIDIA driver)
        }

        // Load required functions
        nvml.init = @ptrCast(nvml.lib.lookup("nvmlInit_v2") orelse return error.InitNotFound);
        nvml.shutdown = @ptrCast(nvml.lib.lookup("nvmlShutdown") orelse return error.ShutdownNotFound);
        nvml.get_count = @ptrCast(nvml.lib.lookup("nvmlDeviceGetCount") orelse return error.GetCountNotFound);
        nvml.get_handle_by_index = @ptrCast(nvml.lib.lookup("nvmlDeviceGetHandleByIndex_v2") orelse return error.GetHandleNotFound);

        // Load optional functions
        nvml.get_name = @ptrCast(nvml.lib.lookup("nvmlDeviceGetName"));
        nvml.get_utilization = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUtilizationRates"));
        nvml.get_memory = @ptrCast(nvml.lib.lookup("nvmlDeviceGetMemoryInfo"));
        nvml.get_temperature = @ptrCast(nvml.lib.lookup("nvmlDeviceGetTemperature"));
        nvml.get_power_usage = @ptrCast(nvml.lib.lookup("nvmlDeviceGetPowerUsage"));
        nvml.get_clock = @ptrCast(nvml.lib.lookup("nvmlDeviceGetClockInfo"));
        nvml.get_uuid = @ptrCast(nvml.lib.lookup("nvmlDeviceGetUUID"));
        nvml.get_vbios = @ptrCast(nvml.lib.lookup("nvmlDeviceGetVbiosVersion"));

        // Initialize NVML
        const result = nvml.init();
        if (result != NVML_SUCCESS) {
            nvml.setError("NVML initialization failed");
            nvml.lib.close();
            return error.NVMLInitFailed;
        }

        nvml.available = true;
        return nvml;
    }

    /// Unload NVML
    pub fn unload(self: *NVML) void {
        if (self.available) {
            _ = self.shutdown();
        }
        self.lib.close();
    }

    /// Check if NVML is available
    pub fn isAvailable(self: NVML) bool {
        return self.available;
    }

    /// Get last error message
    pub fn getLastError(self: NVML) []const u8 {
        return std.mem.sliceTo(&self.last_error, 0);
    }

    fn setError(self: *NVML, msg: []const u8) void {
        @memset(&self.last_error, 0);
        const len = @min(msg.len, self.last_error.len - 1);
        @memcpy(self.last_error[0..len], msg[0..len]);
    }

    /// Get number of GPUs
    pub fn getGPUCount(self: *NVML) !u32 {
        var count: c_uint = 0;
        const result = self.get_count(&count);
        if (result != NVML_SUCCESS) {
            self.setError("Failed to get GPU count");
            return error.GetCountFailed;
        }
        return @intCast(count);
    }

    /// Get GPU info by index
    pub fn getGPUInfo(self: *NVML, index: u32) !GPUInfo {
        var info: GPUInfo = .{
            .index = index,
            .name = std.mem.zeroes([256:0]u8),
            .utilization = 0,
            .memory_used = 0,
            .memory_total = 0,
            .temperature = 0,
            .power_draw = 0,
            .clock_sm = 0,
            .clock_memory = 0,
            .uuid = std.mem.zeroes([64:0]u8),
            .vbios_version = std.mem.zeroes([32:0]u8),
        };

        var device: nvmlDevice_t = undefined;
        var result = self.get_handle_by_index(index, &device);
        if (result != NVML_SUCCESS) {
            self.setError("Failed to get device handle");
            return error.GetHandleFailed;
        }

        // Get name
        if (self.get_name) |func| {
            _ = func(device, &info.name, @sizeOf(@TypeOf(info.name)));
        }

        // Get utilization
        if (self.get_utilization) |func| {
            var util: nvmlUtilization_t = undefined;
            result = func(device, &util);
            if (result == NVML_SUCCESS) {
                info.utilization = @intCast(util.gpu);
            }
        }

        // Get memory
        if (self.get_memory) |func| {
            var mem: nvmlMemory_t = undefined;
            result = func(device, &mem);
            if (result == NVML_SUCCESS) {
                info.memory_used = mem.used;
                info.memory_total = mem.total;
            }
        }

        // Get temperature
        if (self.get_temperature) |func| {
            var temp: c_uint = 0;
            result = func(device, NVML_TEMPERATURE_GPU, &temp);
            if (result == NVML_SUCCESS) {
                info.temperature = @intCast(temp);
            }
        }

        // Get power usage
        if (self.get_power_usage) |func| {
            var power: c_uint = 0;
            result = func(device, &power);
            if (result == NVML_SUCCESS) {
                info.power_draw = @intCast(power);
            }
        }

        // Get clocks
        if (self.get_clock) |func| {
            var clock: c_uint = 0;
            result = func(device, NVML_CLOCK_SM, &clock);
            if (result == NVML_SUCCESS) {
                info.clock_sm = @intCast(clock);
            }
            result = func(device, NVML_CLOCK_MEM, &clock);
            if (result == NVML_SUCCESS) {
                info.clock_memory = @intCast(clock);
            }
        }

        // Get UUID
        if (self.get_uuid) |func| {
            _ = func(device, &info.uuid, @sizeOf(@TypeOf(info.uuid)));
        }

        // Get VBIOS version
        if (self.get_vbios) |func| {
            _ = func(device, &info.vbios_version, @sizeOf(@TypeOf(info.vbios_version)));
        }

        return info;
    }

    /// Get info for all GPUs
    pub fn getAllGPUInfo(self: *NVML, allocator: std.mem.Allocator) ![]GPUInfo {
        const count = try self.getGPUCount();
        if (count == 0) return &[_]GPUInfo{};

        var gpus = try allocator.alloc(GPUInfo, count);
        errdefer allocator.free(gpus);

        for (0..count) |i| {
            gpus[i] = try self.getGPUInfo(@intCast(i));
        }

        return gpus;
    }
};

// Convenience functions for simple use cases

/// Quick check if NVML is available (creates and destroys temporary handle)
pub fn isNVMLAvailable() bool {
    if (NVML.load()) |maybe_nvml| {
        if (maybe_nvml) |nvml| {
            var nvml_mut = nvml;
            defer nvml_mut.unload();
            return nvml_mut.isAvailable();
        }
    } else |_| {}
    return false;
}

/// Format GPU info as string for display
pub fn formatGPUInfo(allocator: std.mem.Allocator, gpus: []const GPUInfo) ![]u8 {
    var buf = std.ArrayList(u8).init(allocator);
    defer buf.deinit();

    const writer = buf.writer();

    try writer.writeAll("GPU Status (NVML)\n");
    try writer.writeAll("═" ** 50);
    try writer.writeAll("\n\n");

    for (gpus) |gpu| {
        const name = std.mem.sliceTo(&gpu.name, 0);
        try writer.print("GPU {d}: {s}\n", .{ gpu.index, name });
        try writer.print("\tUtilization: {d}%\n", .{gpu.utilization});
        try writer.print("\tMemory: {d}/{d} MB\n", .{
            gpu.memory_used / 1024 / 1024,
            gpu.memory_total / 1024 / 1024,
        });
        try writer.print("\tTemperature: {d}°C\n", .{gpu.temperature});
        if (gpu.power_draw > 0) {
            try writer.print("\tPower: {d:.1} W\n", .{@as(f64, @floatFromInt(gpu.power_draw)) / 1000.0});
        }
        if (gpu.clock_sm > 0) {
            try writer.print("\tSM Clock: {d} MHz\n", .{gpu.clock_sm});
        }
        try writer.writeAll("\n");
    }

    return buf.toOwnedSlice();
}