fetch_ml/tests/unit/gpu/gpu_golden_test.go
Jeremie Fraeys 3b194ff2e8
Some checks failed
Build CLI with Embedded SQLite / build (arm64, aarch64-linux) (push) Waiting to run
Build CLI with Embedded SQLite / build (x86_64, x86_64-linux) (push) Waiting to run
Build CLI with Embedded SQLite / build-macos (arm64) (push) Waiting to run
Build CLI with Embedded SQLite / build-macos (x86_64) (push) Waiting to run
Security Scan / Security Analysis (push) Waiting to run
Security Scan / Native Library Security (push) Waiting to run
Checkout test / test (push) Successful in 6s
CI/CD Pipeline / Test (push) Failing after 1s
CI/CD Pipeline / Dev Compose Smoke Test (push) Has been skipped
CI/CD Pipeline / Build (push) Has been skipped
CI/CD Pipeline / Test Scripts (push) Has been skipped
CI/CD Pipeline / Test Native Libraries (push) Has been skipped
CI/CD Pipeline / GPU Golden Test Matrix (push) Has been skipped
Documentation / build-and-publish (push) Failing after 39s
CI/CD Pipeline / Docker Build (push) Has been skipped
feat: GPU detection transparency and artifact scanner improvements
- Surface GPUDetectionInfo from parseGPUCountFromConfig for detection metadata
- Document FETCH_ML_TOTAL_CPU and FETCH_ML_GPU_SLOTS_PER_GPU env vars
- Add debug logging for all env var overrides to stderr
- Track config-layer auto-detection in GPUDetectionInfo.ConfigLayerAutoDetected
- Add --include-all flag to artifact scanner (includeAll parameter)
- Add AMD production mode enforcement (error in non-local mode)
- Add GPU detector unit tests for env overrides and AMD aliasing
2026-02-23 12:29:34 -05:00

311 lines
10 KiB
Go

package worker_test
import (
"encoding/json"
"os"
"testing"
"github.com/jfraeys/fetch_ml/internal/worker"
)
// GoldenGPUStatus represents the expected GPU status output for golden file testing
type GoldenGPUStatus struct {
GPUCount int `json:"gpu_count"`
GPUType string `json:"gpu_type"`
ConfiguredVendor string `json:"configured_vendor"`
DetectionMethod string `json:"detection_method"`
EnvOverrideType string `json:"env_override_type,omitempty"`
EnvOverrideCount int `json:"env_override_count,omitempty"`
BuildTags map[string]bool `json:"build_tags"`
NativeAvailable bool `json:"native_available"`
Extra map[string]interface{} `json:"extra,omitempty"`
}
// detectBuildTags returns which build tags are active
func detectBuildTags() map[string]bool {
tags := map[string]bool{
"cgo": false,
"native_libs": false,
"darwin": false,
"linux": false,
}
// CGO is determined at compile time - we can detect by trying to use native
// If native functions return "disabled", we know native_libs is not set
simdName := worker.GetSIMDImplName()
tags["native_libs"] = simdName != "disabled" && simdName != "disabled (no CGO)"
tags["cgo"] = simdName != "disabled (no CGO)"
// OS detection
if worker.IsMacOS() {
tags["darwin"] = true
} else {
tags["linux"] = true
}
return tags
}
// TestGoldenGPUStatusNVML validates GPU status against golden file for NVML path
// This test runs under all build configurations but expectations differ:
// - cgo+native_libs: Real GPU count and NVML detection
// - cgo without native_libs: Returns 0, nil (stub behavior)
// - !cgo: Returns 0, nil (stub behavior)
func TestGoldenGPUStatusNVML(t *testing.T) {
// Setup: Configure for NVIDIA detection
cfg := &worker.Config{
GPUVendor: "nvidia",
}
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
// Get actual detected count (behavior varies by build tags)
count := result.Detector.DetectGPUCount()
buildTags := detectBuildTags()
// Build the golden status object
got := GoldenGPUStatus{
GPUCount: count,
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
}
// Validate against build-specific expectations
if buildTags["native_libs"] && buildTags["cgo"] {
// Real NVML build: Should detect actual GPUs or get real NVML error
// GPU count may be 0 if no NVIDIA hardware, but detection method should be config
if got.DetectionMethod != "config" {
t.Errorf("cgo+native_libs: DetectionMethod = %v, want 'config'", got.DetectionMethod)
}
} else if buildTags["cgo"] {
// CGO without native_libs: Stub returns 0
if got.GPUCount != 0 {
t.Logf("cgo-only build: GPUCount = %d (expected 0 from stub)", got.GPUCount)
}
if got.NativeAvailable {
t.Error("cgo-only build: NativeAvailable should be false")
}
} else {
// No CGO: Stub returns 0
if got.GPUCount != 0 {
t.Logf("nocgo build: GPUCount = %d (expected 0 from stub)", got.GPUCount)
}
if got.NativeAvailable {
t.Error("nocgo build: NativeAvailable should be false")
}
}
// Common validations
if got.ConfiguredVendor != "nvidia" {
t.Errorf("ConfiguredVendor = %v, want 'nvidia'", got.ConfiguredVendor)
}
}
// TestGoldenGPUStatusAMDVendorAlias validates AMD aliasing is visible in output
// Build tags: all three configurations
// Runtime scenarios: amd config
func TestGoldenGPUStatusAMDVendorAlias(t *testing.T) {
cfg := &worker.Config{
GPUVendor: "amd",
}
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
}
// The key assertion: configured_vendor should be "amd" but GPUType should be "nvidia"
// This makes the aliasing visible in status output
if got.ConfiguredVendor != "amd" {
t.Errorf("AMD config: ConfiguredVendor = %v, want 'amd'", got.ConfiguredVendor)
}
if got.GPUType != "nvidia" {
t.Errorf("AMD config: GPUType = %v, want 'nvidia' (AMD aliased to NVIDIA implementation)", got.GPUType)
}
}
// TestGoldenGPUStatusEnvOverride validates env override behavior across build configs
// Build tags: all three
// Runtime scenarios: env override set
func TestGoldenGPUStatusEnvOverride(t *testing.T) {
// Set env override
os.Setenv("FETCH_ML_GPU_TYPE", "nvidia")
os.Setenv("FETCH_ML_GPU_COUNT", "4")
defer os.Unsetenv("FETCH_ML_GPU_TYPE")
defer os.Unsetenv("FETCH_ML_GPU_COUNT")
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(&worker.Config{GPUVendor: "apple"})
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
EnvOverrideType: result.Info.EnvOverrideType,
EnvOverrideCount: result.Info.EnvOverrideCount,
BuildTags: buildTags,
}
// Env should take precedence over config
if got.DetectionMethod != "env_override_both" {
t.Errorf("Env override: DetectionMethod = %v, want 'env_override_both'", got.DetectionMethod)
}
if got.GPUType != "nvidia" {
t.Errorf("Env override: GPUType = %v, want 'nvidia'", got.GPUType)
}
if got.EnvOverrideType != "nvidia" {
t.Errorf("Env override: EnvOverrideType = %v, want 'nvidia'", got.EnvOverrideType)
}
if got.EnvOverrideCount != 4 {
t.Errorf("Env override: EnvOverrideCount = %v, want 4", got.EnvOverrideCount)
}
}
// TestGoldenGPUStatusMacOS validates macOS detection when running on Darwin
// Build tags: cgo+native_libs on Darwin
// Runtime scenarios: darwin
func TestGoldenGPUStatusMacOS(t *testing.T) {
if !worker.IsMacOS() {
t.Skip("Skipping macOS-specific test on non-Darwin platform")
}
cfg := &worker.Config{
GPUVendor: "apple",
AppleGPU: worker.AppleGPUConfig{Enabled: true},
}
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["darwin"],
}
if got.ConfiguredVendor != "apple" {
t.Errorf("macOS: ConfiguredVendor = %v, want 'apple'", got.ConfiguredVendor)
}
if got.GPUType != "apple" {
t.Errorf("macOS: GPUType = %v, want 'apple'", got.GPUType)
}
if !got.BuildTags["darwin"] {
t.Error("macOS: darwin build tag should be true")
}
}
// TestGoldenGPUStatusNone validates no-GPU configuration
// Build tags: all three
// Runtime scenarios: none
func TestGoldenGPUStatusNone(t *testing.T) {
cfg := &worker.Config{
GPUVendor: "none",
}
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
if result.Detector.DetectGPUCount() != 0 {
t.Errorf("none config: GPUCount = %v, want 0", result.Detector.DetectGPUCount())
}
if result.Info.ConfiguredVendor != "none" {
t.Errorf("none config: ConfiguredVendor = %v, want 'none'", result.Info.ConfiguredVendor)
}
}
// TestGoldenJSONSerialization validates the GPU status serializes to JSON correctly
func TestGoldenJSONSerialization(t *testing.T) {
os.Setenv("FETCH_ML_GPU_TYPE", "nvidia")
os.Setenv("FETCH_ML_GPU_COUNT", "2")
defer os.Unsetenv("FETCH_ML_GPU_TYPE")
defer os.Unsetenv("FETCH_ML_GPU_COUNT")
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(nil)
status := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
EnvOverrideType: result.Info.EnvOverrideType,
EnvOverrideCount: result.Info.EnvOverrideCount,
BuildTags: detectBuildTags(),
}
// Serialize to JSON (this mimics what ml status --json would output)
jsonData, err := json.MarshalIndent(status, "", " ")
if err != nil {
t.Fatalf("JSON serialization failed: %v", err)
}
// Verify JSON can be parsed back
var parsed GoldenGPUStatus
if err := json.Unmarshal(jsonData, &parsed); err != nil {
t.Fatalf("JSON deserialization failed: %v", err)
}
if parsed.ConfiguredVendor != status.ConfiguredVendor {
t.Errorf("JSON roundtrip: ConfiguredVendor mismatch")
}
if parsed.DetectionMethod != status.DetectionMethod {
t.Errorf("JSON roundtrip: DetectionMethod mismatch")
}
}
// TestBuildTagMatrix validates that all expected build tag combinations are testable
// This test documents the three build configurations:
// 1. cgo + native_libs: Real native library implementations
// 2. cgo without native_libs: Stubs that return errors
// 3. !cgo: Stubs that return "disabled (no CGO)"
func TestBuildTagMatrix(t *testing.T) {
tags := detectBuildTags()
// Log the current build configuration for CI visibility
t.Logf("Build configuration: cgo=%v native_libs=%v darwin=%v linux=%v",
tags["cgo"], tags["native_libs"], tags["darwin"], tags["linux"])
// Validate SIMD implementation name matches build tags
simdName := worker.GetSIMDImplName()
t.Logf("SIMD implementation: %s", simdName)
switch {
case tags["native_libs"]:
// Should have real implementation name (avx2, sha_ni, armv8_crypto, or generic)
if simdName == "disabled" || simdName == "disabled (no CGO)" {
t.Errorf("native_libs build: SIMD impl should be active, got %q", simdName)
}
case tags["cgo"]:
// Should be disabled without native_libs
if simdName != "disabled" {
t.Errorf("cgo-only build: SIMD impl should be 'disabled', got %q", simdName)
}
default:
// No CGO
if simdName != "disabled (no CGO)" {
t.Errorf("nocgo build: SIMD impl should be 'disabled (no CGO)', got %q", simdName)
}
}
}