fetch_ml/internal/worker/gpu_detector_mock.go
Jeremie Fraeys de83300962
feat(worker): refactor GPU detection with macOS Metal support
GPU detection refactor:
- Major rewrite of gpu_detector.go with unified detection interface
- Support for NVIDIA (NVML), AMD (ROCm), and Apple Metal
- Runtime GPU capability querying for scheduler matching

macOS improvements:
- gpu_macos.go: native Metal device enumeration and memory queries
- Support for Apple Silicon (M1/M2/M3) unified memory reporting
- Fallback to system profiler for Intel Macs

Testing infrastructure:
- Add gpu_detector_mock.go for testing without hardware
- Update gpu_golden_test.go with platform-specific expectations
- Cross-platform GPU info validation
2026-03-12 12:02:41 -04:00

168 lines
3.9 KiB
Go

package worker
import (
"os"
"strconv"
"github.com/jfraeys/fetch_ml/internal/scheduler"
)
// MockGPUDetector provides a mock GPU detection for testing
// Use environment variables FETCH_ML_MOCK_GPU_TYPE and FETCH_ML_MOCK_GPU_COUNT
// to configure the mock detector
type MockGPUDetector struct {
gpuType GPUType
gpuCount int
vramGB float64
cpuCount int
devicePaths []string
}
// NewMockGPUDetector creates a mock GPU detector from environment variables
func NewMockGPUDetector() *MockGPUDetector {
gpuType := GPUType(os.Getenv("FETCH_ML_MOCK_GPU_TYPE"))
if gpuType == "" {
gpuType = GPUTypeNone
}
gpuCount, _ := strconv.Atoi(os.Getenv("FETCH_ML_MOCK_GPU_COUNT"))
if gpuCount < 0 {
gpuCount = 0
}
vramGB, _ := strconv.ParseFloat(os.Getenv("FETCH_ML_MOCK_VRAM_GB"), 64)
cpuCount, _ := strconv.Atoi(os.Getenv("FETCH_ML_MOCK_CPU_COUNT"))
if cpuCount == 0 {
cpuCount = 8 // Default
}
return &MockGPUDetector{
gpuType: gpuType,
gpuCount: gpuCount,
vramGB: vramGB,
cpuCount: cpuCount,
devicePaths: getMockDevicePaths(gpuType, gpuCount),
}
}
func (d *MockGPUDetector) DetectGPUCount() int {
return d.gpuCount
}
func (d *MockGPUDetector) GetGPUType() GPUType {
return d.gpuType
}
func (d *MockGPUDetector) GetDevicePaths() []string {
return d.devicePaths
}
// DetectCapabilitiesMock returns WorkerCapabilities for testing
func (d *MockGPUDetector) DetectCapabilities() scheduler.WorkerCapabilities {
backend := scheduler.BackendCPU
gpuTypeStr := "cpu"
switch d.gpuType {
case GPUTypeNVIDIA:
backend = scheduler.BackendNVIDIA
gpuTypeStr = "nvidia"
case GPUTypeApple:
backend = scheduler.BackendMetal
gpuTypeStr = "apple"
}
return scheduler.WorkerCapabilities{
GPUBackend: backend,
GPUCount: d.gpuCount,
GPUType: gpuTypeStr,
VRAMGB: d.vramGB,
CPUCount: d.cpuCount,
MemoryGB: 32.0, // Default for mock
Hostname: "mock-worker",
GPUInfo: scheduler.GPUDetectionInfo{
GPUType: gpuTypeStr,
Count: d.gpuCount,
Devices: d.devicePaths,
},
}
}
// getMockDevicePaths returns mock device paths based on GPU type
func getMockDevicePaths(gpuType GPUType, count int) []string {
var paths []string
switch gpuType {
case GPUTypeNVIDIA:
paths = append(paths, "/dev/nvidiactl", "/dev/nvidia-uvm")
for i := 0; i < count && i < 8; i++ {
paths = append(paths, "/dev/nvidia"+strconv.Itoa(i))
}
case GPUTypeApple:
paths = append(paths, "/dev/metal", "/dev/mps")
default:
paths = []string{}
}
return paths
}
// Predefined mock scenarios
const (
MockScenario2xNVIDIAA100 = "2x-nvidia-a100"
MockScenario4xMetal = "4x-metal"
MockScenarioCPUOnly = "cpu-only"
)
// NewMockGPUDetectorWithScenario creates a mock detector for a predefined scenario
func NewMockGPUDetectorWithScenario(scenario string) *MockGPUDetector {
switch scenario {
case MockScenario2xNVIDIAA100:
return &MockGPUDetector{
gpuType: GPUTypeNVIDIA,
gpuCount: 2,
vramGB: 80.0, // A100 80GB
cpuCount: 64,
devicePaths: []string{"/dev/nvidia0", "/dev/nvidia1"},
}
case MockScenario4xMetal:
return &MockGPUDetector{
gpuType: GPUTypeApple,
gpuCount: 4,
vramGB: 128.0, // Unified memory
cpuCount: 24,
devicePaths: []string{"/dev/metal"},
}
case MockScenarioCPUOnly:
return &MockGPUDetector{
gpuType: GPUTypeNone,
gpuCount: 0,
vramGB: 0,
cpuCount: 32,
devicePaths: []string{},
}
default:
return &MockGPUDetector{
gpuType: GPUTypeNone,
gpuCount: 0,
vramGB: 0,
cpuCount: 8,
devicePaths: []string{},
}
}
}
// IsMockGPUEnabled returns true if mock GPU environment is configured
func IsMockGPUEnabled() bool {
return os.Getenv("FETCH_ML_MOCK_GPU_TYPE") != "" ||
os.Getenv("FETCH_ML_MOCK_GPU_COUNT") != ""
}
// GetMockDetector returns either a mock detector or real detector based on environment
func GetMockDetector() *MockGPUDetector {
if !IsMockGPUEnabled() {
return nil
}
return NewMockGPUDetector()
}