fetch_ml/tests/unit/gpu/gpu_golden_test.go

package worker_test

import (
	"encoding/json"
	"os"
	"testing"

	"github.com/jfraeys/fetch_ml/internal/worker"
)

// GoldenGPUStatus represents the expected GPU status output for golden file testing
type GoldenGPUStatus struct {
	GPUCount         int                    `json:"gpu_count"`
	GPUType          string                 `json:"gpu_type"`
	ConfiguredVendor string                 `json:"configured_vendor"`
	DetectionMethod  string                 `json:"detection_method"`
	EnvOverrideType  string                 `json:"env_override_type,omitempty"`
	EnvOverrideCount int                    `json:"env_override_count,omitempty"`
	BuildTags        map[string]bool        `json:"build_tags"`
	NativeAvailable  bool                   `json:"native_available"`
	Extra            map[string]interface{} `json:"extra,omitempty"`
}

// detectBuildTags returns which build tags are active
func detectBuildTags() map[string]bool {
	tags := map[string]bool{
		"cgo":         false,
		"native_libs": false,
		"darwin":      false,
		"linux":       false,
	}

	// CGO is determined at compile time - we can detect by trying to use native
	// If native functions return "disabled", we know native_libs is not set
	simdName := worker.GetSIMDImplName()
	tags["native_libs"] = simdName != "disabled" && simdName != "disabled (no CGO)"
	tags["cgo"] = simdName != "disabled (no CGO)"

	// OS detection
	if worker.IsMacOS() {
		tags["darwin"] = true
	} else {
		tags["linux"] = true
	}

	return tags
}

// TestGoldenGPUStatusNVML validates GPU status against golden file for NVML path
// This test runs under all build configurations but expectations differ:
// - cgo+native_libs: Real GPU count and NVML detection
// - cgo without native_libs: Returns 0, nil (stub behavior)
// - !cgo: Returns 0, nil (stub behavior)
func TestGoldenGPUStatusNVML(t *testing.T) {
	// Setup: Configure for NVIDIA detection
	cfg := &worker.Config{
		GPUVendor: "nvidia",
	}

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(cfg)

	// Get actual detected count (behavior varies by build tags)
	count := result.Detector.DetectGPUCount()

	buildTags := detectBuildTags()

	// Build the golden status object
	got := GoldenGPUStatus{
		GPUCount:         count,
		GPUType:          string(result.Info.GPUType),
		ConfiguredVendor: result.Info.ConfiguredVendor,
		DetectionMethod:  string(result.Info.DetectionMethod),
		BuildTags:        buildTags,
		NativeAvailable:  buildTags["native_libs"] && buildTags["cgo"],
	}

	// Validate against build-specific expectations
	if buildTags["native_libs"] && buildTags["cgo"] {
		// Real NVML build: Should detect actual GPUs or get real NVML error
		// GPU count may be 0 if no NVIDIA hardware, but detection method should be config
		if got.DetectionMethod != "config" {
			t.Errorf("cgo+native_libs: DetectionMethod = %v, want 'config'", got.DetectionMethod)
		}
	} else if buildTags["cgo"] {
		// CGO without native_libs: Stub returns 0
		if got.GPUCount != 0 {
			t.Logf("cgo-only build: GPUCount = %d (expected 0 from stub)", got.GPUCount)
		}
		if got.NativeAvailable {
			t.Error("cgo-only build: NativeAvailable should be false")
		}
	} else {
		// No CGO: Stub returns 0
		if got.GPUCount != 0 {
			t.Logf("nocgo build: GPUCount = %d (expected 0 from stub)", got.GPUCount)
		}
		if got.NativeAvailable {
			t.Error("nocgo build: NativeAvailable should be false")
		}
	}

	// Common validations
	if got.ConfiguredVendor != "nvidia" {
		t.Errorf("ConfiguredVendor = %v, want 'nvidia'", got.ConfiguredVendor)
	}
}

// TestGoldenGPUStatusAMDVendorAlias validates AMD aliasing is visible in output
// Build tags: all three configurations
// Runtime scenarios: amd config
func TestGoldenGPUStatusAMDVendorAlias(t *testing.T) {
	cfg := &worker.Config{
		GPUVendor: "amd",
	}

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(cfg)

	buildTags := detectBuildTags()

	got := GoldenGPUStatus{
		GPUCount:         result.Detector.DetectGPUCount(),
		GPUType:          string(result.Info.GPUType),
		ConfiguredVendor: result.Info.ConfiguredVendor,
		DetectionMethod:  string(result.Info.DetectionMethod),
		BuildTags:        buildTags,
		NativeAvailable:  buildTags["native_libs"] && buildTags["cgo"],
	}

	// The key assertion: configured_vendor should be "amd" but GPUType should be "nvidia"
	// This makes the aliasing visible in status output
	if got.ConfiguredVendor != "amd" {
		t.Errorf("AMD config: ConfiguredVendor = %v, want 'amd'", got.ConfiguredVendor)
	}
	if got.GPUType != "nvidia" {
		t.Errorf("AMD config: GPUType = %v, want 'nvidia' (AMD aliased to NVIDIA implementation)", got.GPUType)
	}
}

// TestGoldenGPUStatusEnvOverride validates env override behavior across build configs
// Build tags: all three
// Runtime scenarios: env override set
func TestGoldenGPUStatusEnvOverride(t *testing.T) {
	// Set env override
	os.Setenv("FETCH_ML_GPU_TYPE", "nvidia")
	os.Setenv("FETCH_ML_GPU_COUNT", "4")
	defer os.Unsetenv("FETCH_ML_GPU_TYPE")
	defer os.Unsetenv("FETCH_ML_GPU_COUNT")

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(&worker.Config{GPUVendor: "apple"})

	buildTags := detectBuildTags()

	got := GoldenGPUStatus{
		GPUCount:         result.Detector.DetectGPUCount(),
		GPUType:          string(result.Info.GPUType),
		ConfiguredVendor: result.Info.ConfiguredVendor,
		DetectionMethod:  string(result.Info.DetectionMethod),
		EnvOverrideType:  result.Info.EnvOverrideType,
		EnvOverrideCount: result.Info.EnvOverrideCount,
		BuildTags:        buildTags,
	}

	// Env should take precedence over config
	if got.DetectionMethod != "env_override_both" {
		t.Errorf("Env override: DetectionMethod = %v, want 'env_override_both'", got.DetectionMethod)
	}
	if got.GPUType != "nvidia" {
		t.Errorf("Env override: GPUType = %v, want 'nvidia'", got.GPUType)
	}
	if got.EnvOverrideType != "nvidia" {
		t.Errorf("Env override: EnvOverrideType = %v, want 'nvidia'", got.EnvOverrideType)
	}
	if got.EnvOverrideCount != 4 {
		t.Errorf("Env override: EnvOverrideCount = %v, want 4", got.EnvOverrideCount)
	}
}

// TestGoldenGPUStatusMacOS validates macOS detection when running on Darwin
// Build tags: cgo+native_libs on Darwin
// Runtime scenarios: darwin
func TestGoldenGPUStatusMacOS(t *testing.T) {
	if !worker.IsMacOS() {
		t.Skip("Skipping macOS-specific test on non-Darwin platform")
	}

	cfg := &worker.Config{
		GPUVendor: "apple",
		AppleGPU:  worker.AppleGPUConfig{Enabled: true},
	}

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(cfg)

	buildTags := detectBuildTags()

	got := GoldenGPUStatus{
		GPUCount:         result.Detector.DetectGPUCount(),
		GPUType:          string(result.Info.GPUType),
		ConfiguredVendor: result.Info.ConfiguredVendor,
		DetectionMethod:  string(result.Info.DetectionMethod),
		BuildTags:        buildTags,
		NativeAvailable:  buildTags["darwin"],
	}

	if got.ConfiguredVendor != "apple" {
		t.Errorf("macOS: ConfiguredVendor = %v, want 'apple'", got.ConfiguredVendor)
	}
	if got.GPUType != "apple" {
		t.Errorf("macOS: GPUType = %v, want 'apple'", got.GPUType)
	}
	if !got.BuildTags["darwin"] {
		t.Error("macOS: darwin build tag should be true")
	}
}

// TestGoldenGPUStatusNone validates no-GPU configuration
// Build tags: all three
// Runtime scenarios: none
func TestGoldenGPUStatusNone(t *testing.T) {
	cfg := &worker.Config{
		GPUVendor: "none",
	}

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(cfg)

	if result.Detector.DetectGPUCount() != 0 {
		t.Errorf("none config: GPUCount = %v, want 0", result.Detector.DetectGPUCount())
	}
	if result.Info.ConfiguredVendor != "none" {
		t.Errorf("none config: ConfiguredVendor = %v, want 'none'", result.Info.ConfiguredVendor)
	}
}

// TestGoldenJSONSerialization validates the GPU status serializes to JSON correctly
func TestGoldenJSONSerialization(t *testing.T) {
	os.Setenv("FETCH_ML_GPU_TYPE", "nvidia")
	os.Setenv("FETCH_ML_GPU_COUNT", "2")
	defer os.Unsetenv("FETCH_ML_GPU_TYPE")
	defer os.Unsetenv("FETCH_ML_GPU_COUNT")

	factory := &worker.GPUDetectorFactory{}
	result := factory.CreateDetectorWithInfo(nil)

	status := GoldenGPUStatus{
		GPUCount:         result.Detector.DetectGPUCount(),
		GPUType:          string(result.Info.GPUType),
		ConfiguredVendor: result.Info.ConfiguredVendor,
		DetectionMethod:  string(result.Info.DetectionMethod),
		EnvOverrideType:  result.Info.EnvOverrideType,
		EnvOverrideCount: result.Info.EnvOverrideCount,
		BuildTags:        detectBuildTags(),
	}

	// Serialize to JSON (this mimics what ml status --json would output)
	jsonData, err := json.MarshalIndent(status, "", "  ")
	if err != nil {
		t.Fatalf("JSON serialization failed: %v", err)
	}

	// Verify JSON can be parsed back
	var parsed GoldenGPUStatus
	if err := json.Unmarshal(jsonData, &parsed); err != nil {
		t.Fatalf("JSON deserialization failed: %v", err)
	}

	if parsed.ConfiguredVendor != status.ConfiguredVendor {
		t.Errorf("JSON roundtrip: ConfiguredVendor mismatch")
	}
	if parsed.DetectionMethod != status.DetectionMethod {
		t.Errorf("JSON roundtrip: DetectionMethod mismatch")
	}
}

// TestBuildTagMatrix validates that all expected build tag combinations are testable
// This test documents the three build configurations:
// 1. cgo + native_libs: Real native library implementations
// 2. cgo without native_libs: Stubs that return errors
// 3. !cgo: Stubs that return "disabled (no CGO)"
func TestBuildTagMatrix(t *testing.T) {
	tags := detectBuildTags()

	// Log the current build configuration for CI visibility
	t.Logf("Build configuration: cgo=%v native_libs=%v darwin=%v linux=%v",
		tags["cgo"], tags["native_libs"], tags["darwin"], tags["linux"])

	// Validate SIMD implementation name matches build tags
	simdName := worker.GetSIMDImplName()
	t.Logf("SIMD implementation: %s", simdName)

	switch {
	case tags["native_libs"]:
		// Should have real implementation name (avx2, sha_ni, armv8_crypto, or generic)
		if simdName == "disabled" || simdName == "disabled (no CGO)" {
			t.Errorf("native_libs build: SIMD impl should be active, got %q", simdName)
		}
	case tags["cgo"]:
		// Should be disabled without native_libs
		if simdName != "disabled" {
			t.Errorf("cgo-only build: SIMD impl should be 'disabled', got %q", simdName)
		}
	default:
		// No CGO
		if simdName != "disabled (no CGO)" {
			t.Errorf("nocgo build: SIMD impl should be 'disabled (no CGO)', got %q", simdName)
		}
	}
}