fetch_ml/internal/worker/gpu_macos.go
Jeremie Fraeys 3fb6902fa1
feat(worker): integrate scheduler endpoints and security hardening
Update worker system for scheduler integration:
- Worker server with scheduler registration
- Configuration with scheduler endpoint support
- Artifact handling with integrity verification
- Container executor with supply chain validation
- Local executor enhancements
- GPU detection improvements (cross-platform)
- Error handling with execution context
- Factory pattern for executor instantiation
- Hash integrity with native library support
2026-02-26 12:06:16 -05:00

282 lines
7.4 KiB
Go

//go:build darwin
// +build darwin
package worker
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"time"
)
// MacOSGPUInfo holds information about a macOS GPU
type MacOSGPUInfo struct {
Name string `json:"name"`
ChipsetModel string `json:"chipset_model"`
Index uint32 `json:"index"`
VRAM_MB uint32 `json:"vram_mb"`
UtilizationPercent uint32 `json:"utilization_percent,omitempty"`
PowerMW uint32 `json:"power_mw,omitempty"`
TemperatureC uint32 `json:"temperature_c,omitempty"`
IsIntegrated bool `json:"is_integrated"`
IsAppleSilicon bool `json:"is_apple_silicon"`
}
// PowermetricsData holds GPU metrics from powermetrics
type PowermetricsData struct {
GPUUtilization float64
GPUPower float64
GPUTemperature float64
HasData bool
}
// IsMacOS returns true if running on macOS
func IsMacOS() bool {
return runtime.GOOS == "darwin"
}
// IsAppleSilicon checks if running on Apple Silicon
func IsAppleSilicon() bool {
if runtime.GOOS != "darwin" {
return false
}
// Check machine hardware name
out, err := exec.Command("uname", "-m").Output()
if err != nil {
return false
}
return strings.TrimSpace(string(out)) == "arm64"
}
// GetMacOSGPUCount returns the number of GPUs on macOS
func GetMacOSGPUCount() (int, error) {
if runtime.GOOS != "darwin" {
return 0, fmt.Errorf("not running on macOS")
}
// Use system_profiler to get GPU count
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
out, err := cmd.Output()
if err != nil {
// Fall back to gfxutil if system_profiler fails
return getGPUCountViaGfxutil()
}
// Parse JSON output
var data map[string]interface{}
if err := json.Unmarshal(out, &data); err != nil {
return 0, err
}
// Extract display items
if spData, ok := data["SPDisplaysDataType"].([]interface{}); ok {
return len(spData), nil
}
return 0, nil
}
// getGPUCountViaGfxutil uses gfxutil to count GPUs (fallback)
func getGPUCountViaGfxutil() (int, error) {
// gfxutil is available on macOS
cmd := exec.Command("gfxutil", "-f", "display")
out, err := cmd.Output()
if err != nil {
return 0, err
}
// Count display paths (one per GPU typically)
lines := strings.Split(strings.TrimSpace(string(out)), "\n")
count := 0
for _, line := range lines {
if strings.Contains(line, "Display") {
count++
}
}
return count, nil
}
// GetMacOSGPUInfo returns detailed information about macOS GPUs
func GetMacOSGPUInfo() ([]MacOSGPUInfo, error) {
if runtime.GOOS != "darwin" {
return nil, fmt.Errorf("not running on macOS")
}
cmd := exec.Command("system_profiler", "SPDisplaysDataType", "-json")
out, err := cmd.Output()
if err != nil {
return nil, err
}
var data map[string]interface{}
if err := json.Unmarshal(out, &data); err != nil {
return nil, err
}
spData, ok := data["SPDisplaysDataType"].([]interface{})
if !ok {
return []MacOSGPUInfo{}, nil
}
isAppleSilicon := IsAppleSilicon()
var gpus []MacOSGPUInfo
for i, item := range spData {
if gpuData, ok := item.(map[string]interface{}); ok {
info := MacOSGPUInfo{
Index: uint32(i),
IsAppleSilicon: isAppleSilicon,
}
// Extract chipset model
if model, ok := gpuData["sppci_model"].(string); ok {
info.ChipsetModel = model
info.Name = model
}
// Check for shared memory (integrated GPU)
if _, ok := gpuData["sppci_vram_shared"]; ok {
info.IsIntegrated = true
}
// Extract VRAM
if vram, ok := gpuData["sppci_vram"].(string); ok {
// Parse "16384 MB"
parts := strings.Fields(vram)
if len(parts) >= 1 {
if mb, err := strconv.ParseUint(parts[0], 10, 32); err == nil {
info.VRAM_MB = uint32(mb)
}
}
}
gpus = append(gpus, info)
}
}
return gpus, nil
}
// GetPowermetricsData tries to get real-time GPU metrics from powermetrics
// Requires sudo access. Returns empty data if not available.
func GetPowermetricsData() (*PowermetricsData, error) {
// powermetrics requires sudo, so this may fail
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
cmd := exec.CommandContext(ctx, "powermetrics", "--samplers", "gpu_power", "-n", "1", "-i", "100")
out, err := cmd.Output()
if err != nil {
// powermetrics not available or no permission
if ctx.Err() != context.DeadlineExceeded {
fmt.Fprintln(os.Stderr, "Warning: powermetrics requires sudo for GPU metrics")
}
return &PowermetricsData{HasData: false}, nil
}
data := &PowermetricsData{HasData: false}
// Parse powermetrics output
// Example: "GPU Power: 5000 mW" or "GPU utilization: 45%"
scanner := bufio.NewScanner(strings.NewReader(string(out)))
for scanner.Scan() {
line := scanner.Text()
// Parse GPU utilization
if strings.Contains(line, "GPU utilization") || strings.Contains(line, "GPU active") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*%`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if util, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUUtilization = util
data.HasData = true
}
}
}
// Parse GPU power
if strings.Contains(line, "GPU Power") || strings.Contains(line, "GPU power") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*mW`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if power, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUPower = power
data.HasData = true
}
}
}
// Parse GPU temperature (if available)
if strings.Contains(line, "GPU Temperature") || strings.Contains(line, "GPU temp") {
re := regexp.MustCompile(`(\d+(?:\.\d+)?)\s*C`)
if matches := re.FindStringSubmatch(line); len(matches) > 1 {
if temp, err := strconv.ParseFloat(matches[1], 64); err == nil {
data.GPUTemperature = temp
data.HasData = true
}
}
}
}
return data, nil
}
// FormatMacOSGPUStatus formats GPU status for display
func FormatMacOSGPUStatus() (string, error) {
gpus, err := GetMacOSGPUInfo()
if err != nil {
return "", err
}
// Try to get real-time metrics from powermetrics
powermetrics, _ := GetPowermetricsData()
if len(gpus) == 0 {
return "GPU info unavailable\n\nRun on a system with NVIDIA GPU or macOS", nil
}
var b strings.Builder
if IsAppleSilicon() {
b.WriteString("GPU Status (macOS - Apple Silicon)\n")
} else {
b.WriteString("GPU Status (macOS)\n")
}
b.WriteString(strings.Repeat("═", 50) + "\n\n")
for _, gpu := range gpus {
fmt.Fprintf(&b, "🎮 GPU %d: %s\n", gpu.Index, gpu.Name)
if gpu.IsAppleSilicon {
b.WriteString(" Type: Apple Silicon (Unified Memory)\n")
} else if gpu.IsIntegrated {
b.WriteString(" Type: Integrated (Shared Memory)\n")
} else {
fmt.Fprintf(&b, " VRAM: %d MB\n", gpu.VRAM_MB)
}
// Display powermetrics data if available
if powermetrics != nil && powermetrics.HasData {
if powermetrics.GPUUtilization > 0 {
b.WriteString(fmt.Sprintf(" Utilization: %.1f%%\n", powermetrics.GPUUtilization))
}
if powermetrics.GPUPower > 0 {
b.WriteString(fmt.Sprintf(" Power: %.1f W\n", powermetrics.GPUPower/1000))
}
if powermetrics.GPUTemperature > 0 {
b.WriteString(fmt.Sprintf(" Temperature: %.0f°C\n", powermetrics.GPUTemperature))
}
}
b.WriteString("\n")
}
if powermetrics == nil || !powermetrics.HasData {
b.WriteString("💡 Note: Run with sudo for real-time GPU metrics via powermetrics\n")
}
return b.String(), nil
}