From 20fde4f79d30f538e15a385014efe9f0457da735 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Sat, 21 Feb 2026 15:17:22 -0500 Subject: [PATCH] feat: integrate NVML GPU monitoring into TUI - Update TUI controller loadGPU() to use NVML when available - Prioritize NVML over nvidia-smi command for better performance - Show additional metrics: power draw, SM clock when available - Maintain fallback to nvidia-smi and system_profiler --- cmd/tui/internal/controller/commands.go | 40 ++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/cmd/tui/internal/controller/commands.go b/cmd/tui/internal/controller/commands.go index 25dfa3a..b95b926 100644 --- a/cmd/tui/internal/controller/commands.go +++ b/cmd/tui/internal/controller/commands.go @@ -10,6 +10,7 @@ import ( tea "github.com/charmbracelet/bubbletea" "github.com/jfraeys/fetch_ml/cmd/tui/internal/model" "github.com/jfraeys/fetch_ml/internal/container" + "github.com/jfraeys/fetch_ml/internal/worker" ) func shellQuote(s string) string { @@ -128,12 +129,42 @@ func (c *Controller) loadGPU() tea.Cmd { resultChan := make(chan gpuResult, 1) go func() { + // Try NVML first for accurate GPU info + if worker.IsNVMLAvailable() { + gpus, err := worker.GetAllGPUInfo() + if err == nil && len(gpus) > 0 { + var formatted strings.Builder + formatted.WriteString("GPU Status (NVML)\n") + formatted.WriteString(strings.Repeat("═", 50) + "\n\n") + + for _, gpu := range gpus { + formatted.WriteString(fmt.Sprintf("🎮 GPU %d: %s\n", gpu.Index, gpu.Name)) + formatted.WriteString(fmt.Sprintf(" Utilization: %d%%\n", gpu.Utilization)) + formatted.WriteString(fmt.Sprintf(" Memory: %d/%d MB\n", + gpu.MemoryUsed/1024/1024, gpu.MemoryTotal/1024/1024)) + formatted.WriteString(fmt.Sprintf(" Temperature: %d°C\n", gpu.Temperature)) + if gpu.PowerDraw > 0 { + formatted.WriteString(fmt.Sprintf(" Power: %.1f W\n", float64(gpu.PowerDraw)/1000.0)) + } + if gpu.ClockSM > 0 { + formatted.WriteString(fmt.Sprintf(" SM Clock: %d MHz\n", gpu.ClockSM)) + } + formatted.WriteString("\n") + } + + c.logger.Info("loaded GPU status", "type", "nvml", "count", len(gpus)) + resultChan <- gpuResult{content: formatted.String(), err: nil} + return + } + } + + // Fall back to nvidia-smi command parsing cmd := "nvidia-smi --query-gpu=index,name,utilization.gpu," + "memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits" out, err := c.server.Exec(cmd) if err == nil && strings.TrimSpace(out) != "" { var formatted strings.Builder - formatted.WriteString("GPU Status\n") + formatted.WriteString("GPU Status (nvidia-smi)\n") formatted.WriteString(strings.Repeat("═", 50) + "\n\n") lines := strings.Split(strings.TrimSpace(out), "\n") for _, line := range lines { @@ -145,17 +176,18 @@ func (c *Controller) loadGPU() tea.Cmd { formatted.WriteString(fmt.Sprintf(" Temperature: %s°C\n\n", parts[5])) } } - c.logger.Info("loaded GPU status", "type", "nvidia") + c.logger.Info("loaded GPU status", "type", "nvidia-smi") resultChan <- gpuResult{content: formatted.String(), err: nil} return } + // Fall back to macOS system_profiler cmd = "system_profiler SPDisplaysDataType | grep 'Chipset Model\\|VRAM' | head -2" out, err = c.server.Exec(cmd) if err != nil { c.logger.Warn("GPU info unavailable", "error", err) resultChan <- gpuResult{ - content: "GPU info unavailable\n\nRun on a system with nvidia-smi or macOS GPU", + content: "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS", err: err, } return @@ -170,7 +202,7 @@ func (c *Controller) loadGPU() tea.Cmd { formatted.WriteString("🎮 " + strings.TrimSpace(line) + "\n") } } - formatted.WriteString("\n💡 Note: nvidia-smi not available on macOS\n") + formatted.WriteString("\n💡 Note: NVIDIA NVML not available on macOS\n") c.logger.Info("loaded GPU status", "type", "macos") resultChan <- gpuResult{content: formatted.String(), err: nil}