feat: integrate NVML GPU monitoring into TUI

- Update TUI controller loadGPU() to use NVML when available
- Prioritize NVML over nvidia-smi command for better performance
- Show additional metrics: power draw, SM clock when available
- Maintain fallback to nvidia-smi and system_profiler
This commit is contained in:
Jeremie Fraeys 2026-02-21 15:17:22 -05:00
parent c56e53cb52
commit 20fde4f79d
No known key found for this signature in database

View file

@ -10,6 +10,7 @@ import (
tea "github.com/charmbracelet/bubbletea"
"github.com/jfraeys/fetch_ml/cmd/tui/internal/model"
"github.com/jfraeys/fetch_ml/internal/container"
"github.com/jfraeys/fetch_ml/internal/worker"
)
func shellQuote(s string) string {
@ -128,12 +129,42 @@ func (c *Controller) loadGPU() tea.Cmd {
resultChan := make(chan gpuResult, 1)
go func() {
// Try NVML first for accurate GPU info
if worker.IsNVMLAvailable() {
gpus, err := worker.GetAllGPUInfo()
if err == nil && len(gpus) > 0 {
var formatted strings.Builder
formatted.WriteString("GPU Status (NVML)\n")
formatted.WriteString(strings.Repeat("═", 50) + "\n\n")
for _, gpu := range gpus {
formatted.WriteString(fmt.Sprintf("🎮 GPU %d: %s\n", gpu.Index, gpu.Name))
formatted.WriteString(fmt.Sprintf(" Utilization: %d%%\n", gpu.Utilization))
formatted.WriteString(fmt.Sprintf(" Memory: %d/%d MB\n",
gpu.MemoryUsed/1024/1024, gpu.MemoryTotal/1024/1024))
formatted.WriteString(fmt.Sprintf(" Temperature: %d°C\n", gpu.Temperature))
if gpu.PowerDraw > 0 {
formatted.WriteString(fmt.Sprintf(" Power: %.1f W\n", float64(gpu.PowerDraw)/1000.0))
}
if gpu.ClockSM > 0 {
formatted.WriteString(fmt.Sprintf(" SM Clock: %d MHz\n", gpu.ClockSM))
}
formatted.WriteString("\n")
}
c.logger.Info("loaded GPU status", "type", "nvml", "count", len(gpus))
resultChan <- gpuResult{content: formatted.String(), err: nil}
return
}
}
// Fall back to nvidia-smi command parsing
cmd := "nvidia-smi --query-gpu=index,name,utilization.gpu," +
"memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits"
out, err := c.server.Exec(cmd)
if err == nil && strings.TrimSpace(out) != "" {
var formatted strings.Builder
formatted.WriteString("GPU Status\n")
formatted.WriteString("GPU Status (nvidia-smi)\n")
formatted.WriteString(strings.Repeat("═", 50) + "\n\n")
lines := strings.Split(strings.TrimSpace(out), "\n")
for _, line := range lines {
@ -145,17 +176,18 @@ func (c *Controller) loadGPU() tea.Cmd {
formatted.WriteString(fmt.Sprintf(" Temperature: %s°C\n\n", parts[5]))
}
}
c.logger.Info("loaded GPU status", "type", "nvidia")
c.logger.Info("loaded GPU status", "type", "nvidia-smi")
resultChan <- gpuResult{content: formatted.String(), err: nil}
return
}
// Fall back to macOS system_profiler
cmd = "system_profiler SPDisplaysDataType | grep 'Chipset Model\\|VRAM' | head -2"
out, err = c.server.Exec(cmd)
if err != nil {
c.logger.Warn("GPU info unavailable", "error", err)
resultChan <- gpuResult{
content: "GPU info unavailable\n\nRun on a system with nvidia-smi or macOS GPU",
content: "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS",
err: err,
}
return
@ -170,7 +202,7 @@ func (c *Controller) loadGPU() tea.Cmd {
formatted.WriteString("🎮 " + strings.TrimSpace(line) + "\n")
}
}
formatted.WriteString("\n💡 Note: nvidia-smi not available on macOS\n")
formatted.WriteString("\n💡 Note: NVIDIA NVML not available on macOS\n")
c.logger.Info("loaded GPU status", "type", "macos")
resultChan <- gpuResult{content: formatted.String(), err: nil}