fetch_ml/tests/e2e/capability_routing_e2e_test.go

package tests

import (
	"testing"
	"time"

	"github.com/jfraeys/fetch_ml/internal/scheduler"
	fixtures "github.com/jfraeys/fetch_ml/tests/fixtures"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// TestCapabilityRoutingE2E_MultiWorkerScenario validates multi-worker capability routing
func TestCapabilityRoutingE2E_MultiWorkerScenario(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Create GPU worker with NVIDIA GPUs
	gpuWorker := fixture.CreateWorker("e2e-gpu-worker", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   4,
		VRAMGB:     24.0,
		CPUCount:   8,
	})

	// Create CPU-only worker
	cpuWorker := fixture.CreateWorker("e2e-cpu-worker", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendCPU,
		GPUCount:   0,
		CPUCount:   16,
	})

	// Submit training job (needs GPU)
	fixture.SubmitJob(scheduler.JobSpec{
		ID:         "e2e-training-job",
		Type:       scheduler.JobTypeBatch,
		SlotPool:   "batch",
		JobTier:    scheduler.TierTraining,
		GPUCount:   2,
		GPUBackend: "nvidia",
		MinVRAMGB:  16.0,
		Command:    []string{"python", "train.py"},
	})

	// Submit data processing job (CPU only)
	fixture.SubmitJob(scheduler.JobSpec{
		ID:       "e2e-data-job",
		Type:     scheduler.JobTypeBatch,
		SlotPool: "batch",
		JobTier:  scheduler.TierDataProcessing,
		GPUCount: 0,
		Command:  []string{"python", "preprocess.py"},
	})

	// Both workers signal ready to trigger job assignment
	gpuWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
	cpuWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")

	// GPU worker should get training job
	msg1 := gpuWorker.RecvTimeout(2 * time.Second)
	require.Equal(t, scheduler.MsgJobAssign, msg1.Type, "GPU worker should receive training job")

	// CPU worker should get data job
	msg2 := cpuWorker.RecvTimeout(2 * time.Second)
	require.Equal(t, scheduler.MsgJobAssign, msg2.Type, "CPU worker should receive data job")
}

// TestCapabilityRoutingE2E_GPUSelection validates job lands on correct GPU worker
func TestCapabilityRoutingE2E_GPUSelection(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Create worker with 2 GPUs
	worker2GPU := fixture.CreateWorker("e2e-2gpu", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   2,
		VRAMGB:     16.0,
	})

	// Create worker with 8 GPUs
	worker8GPU := fixture.CreateWorker("e2e-8gpu", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   8,
		VRAMGB:     48.0,
	})

	// Submit job needing 4 GPUs
	fixture.SubmitJob(scheduler.JobSpec{
		ID:       "e2e-4gpu-job",
		Type:     scheduler.JobTypeBatch,
		SlotPool: "batch",
		GPUCount: 4,
	})

	// Both signal ready to trigger assignment
	worker2GPU.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
	worker8GPU.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")

	// Should go to 8GPU worker (2GPU can't handle it) - poll with retries
	var assignedWorker string
	deadline := time.Now().Add(2 * time.Second)
	for time.Now().Before(deadline) && assignedWorker == "" {
		select {
		case msg := <-worker2GPU.RecvCh:
			if msg.Type == scheduler.MsgJobAssign {
				assignedWorker = "2gpu"
			}
		case msg := <-worker8GPU.RecvCh:
			if msg.Type == scheduler.MsgJobAssign {
				assignedWorker = "8gpu"
			}
		default:
			// No message yet, signal ready again
			worker2GPU.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			worker8GPU.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			time.Sleep(100 * time.Millisecond)
		}
	}

	if assignedWorker == "" {
		t.Fatal("timeout waiting for job assignment")
	}

	assert.Equal(t, "8gpu", assignedWorker, "4-GPU job should go to 8-GPU worker")
}

// TestCapabilityRoutingE2E_BackendMismatch validates backend requirements are enforced
func TestCapabilityRoutingE2E_BackendMismatch(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Create Metal worker (macOS GPU)
	metalWorker := fixture.CreateWorker("e2e-metal", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendMetal,
		GPUCount:   4,
	})

	// Create NVIDIA worker
	nvidiaWorker := fixture.CreateWorker("e2e-nvidia", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   4,
	})

	// Submit job requiring NVIDIA
	fixture.SubmitJob(scheduler.JobSpec{
		ID:         "e2e-nvidia-job",
		Type:       scheduler.JobTypeBatch,
		SlotPool:   "batch",
		GPUCount:   2,
		GPUBackend: "nvidia",
	})

	// Both workers signal ready
	metalWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
	nvidiaWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")

	// NVIDIA worker should get the job - poll with retries
	var msg scheduler.Message
	deadline := time.Now().Add(2 * time.Second)
	for time.Now().Before(deadline) && msg.Type != scheduler.MsgJobAssign {
		select {
		case m := <-nvidiaWorker.RecvCh:
			msg = m
		default:
			// No message yet, signal ready again
			metalWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			nvidiaWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			time.Sleep(50 * time.Millisecond)
		}
	}
	require.Equal(t, scheduler.MsgJobAssign, msg.Type, "NVIDIA worker should get NVIDIA job")

	// Metal worker should receive NoWork (not job_assign) - poll to verify
	var metalMsg scheduler.Message
	metalDeadline := time.Now().Add(500 * time.Millisecond)
	for time.Now().Before(metalDeadline) {
		select {
		case m := <-metalWorker.RecvCh:
			metalMsg = m
		default:
			metalWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			time.Sleep(50 * time.Millisecond)
		}
		if metalMsg.Type == scheduler.MsgNoWork || metalMsg.Type == scheduler.MsgJobAssign {
			break
		}
	}

	// Metal worker should get NoWork, never job_assign
	assert.NotEqual(t, scheduler.MsgJobAssign, metalMsg.Type, "Metal worker should NOT receive NVIDIA job")
}

// TestCapabilityRoutingE2E_VRAMFiltering validates VRAM requirements filtering
func TestCapabilityRoutingE2E_VRAMFiltering(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Worker with 8GB VRAM
	worker8GB := fixture.CreateWorker("e2e-8gb-vram", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   2,
		VRAMGB:     8.0,
	})

	// Worker with 24GB VRAM
	worker24GB := fixture.CreateWorker("e2e-24gb-vram", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendNVIDIA,
		GPUCount:   2,
		VRAMGB:     24.0,
	})

	// Submit job needing 16GB VRAM
	fixture.SubmitJob(scheduler.JobSpec{
		ID:        "e2e-vram-job",
		Type:      scheduler.JobTypeBatch,
		SlotPool:  "batch",
		GPUCount:  1,
		MinVRAMGB: 16.0,
	})

	worker8GB.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
	worker24GB.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")

	// Should go to 24GB worker - poll with retries since scheduler may need time
	var assignedWorker string
	deadline := time.Now().Add(2 * time.Second)
	for time.Now().Before(deadline) && assignedWorker == "" {
		select {
		case msg := <-worker8GB.RecvCh:
			if msg.Type == scheduler.MsgJobAssign {
				assignedWorker = "8gb"
			}
		case msg := <-worker24GB.RecvCh:
			if msg.Type == scheduler.MsgJobAssign {
				assignedWorker = "24gb"
			}
		default:
			// No message yet, signal ready again to trigger assignment
			worker8GB.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			worker24GB.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			time.Sleep(100 * time.Millisecond)
		}
	}

	if assignedWorker == "" {
		t.Fatal("timeout waiting for job assignment")
	}

	assert.Equal(t, "24gb", assignedWorker, "16GB VRAM job should go to 24GB worker")
}

// TestCapabilityRoutingE2E_GangAllocation validates multi-node jobs across mixed workers
func TestCapabilityRoutingE2E_GangAllocation(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Create workers with different capabilities
	workers := make([]*fixtures.MockWorker, 3)
	workerIDs := []string{"gang-worker-1", "gang-worker-2", "gang-worker-3"}

	for i, id := range workerIDs {
		workers[i] = fixture.CreateWorker(id, scheduler.WorkerCapabilities{
			GPUBackend: scheduler.BackendNVIDIA,
			GPUCount:   2,
			VRAMGB:     16.0,
		})
	}

	// Submit multi-node job needing 3 nodes
	fixture.SubmitJob(scheduler.JobSpec{
		ID:         "e2e-gang-job",
		Type:       scheduler.JobTypeBatch,
		SlotPool:   "batch",
		NodeCount:  3,
		GPUCount:   1,
		GPUBackend: "nvidia",
		Command:    []string{"torchrun", "--nproc_per_node=3", "train.py"},
	})

	// Workers signal ready after job submission
	for _, worker := range workers {
		worker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
	}

	// All three workers should receive the job assignment
	assignedCount := 0
	deadline := time.After(3 * time.Second)

	for _, worker := range workers {
		select {
		case msg := <-worker.RecvCh:
			if msg.Type == scheduler.MsgJobAssign {
				assignedCount++
			}
		case <-deadline:
			// Timeout - continue to next worker
		}
	}

	// Gang allocation may assign one at a time; verify at least one gets assigned
	assert.GreaterOrEqual(t, assignedCount, 1, "at least one worker should be assigned for gang job")
}

// TestCapabilityRoutingE2E_NoSuitableWorker validates job waits when no worker matches
func TestCapabilityRoutingE2E_NoSuitableWorker(t *testing.T) {
	fixture := fixtures.NewSchedulerTestFixture(t, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Create only CPU workers
	cpuWorker := fixture.CreateWorker("e2e-cpu-only", scheduler.WorkerCapabilities{
		GPUBackend: scheduler.BackendCPU,
		GPUCount:   0,
	})

	// Submit GPU job first
	fixture.SubmitJob(scheduler.JobSpec{
		ID:       "e2e-waiting-gpu-job",
		Type:     scheduler.JobTypeBatch,
		SlotPool: "batch",
		GPUCount: 4,
	})

	// CPU worker signals ready after job submission
	cpuWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")

	// Wait a moment for any potential assignment
	time.Sleep(100 * time.Millisecond)

	// CPU worker should receive NoWork (not job_assign) - poll to verify
	var cpuMsg scheduler.Message
	cpuDeadline := time.Now().Add(500 * time.Millisecond)
	for time.Now().Before(cpuDeadline) {
		select {
		case m := <-cpuWorker.RecvCh:
			cpuMsg = m
		default:
			cpuWorker.SignalReady(scheduler.SlotStatus{BatchTotal: 4, BatchInUse: 0}, "polling")
			time.Sleep(50 * time.Millisecond)
		}
		if cpuMsg.Type == scheduler.MsgNoWork || cpuMsg.Type == scheduler.MsgJobAssign {
			break
		}
	}

	// CPU worker should get NoWork, never job_assign for GPU job
	assert.NotEqual(t, scheduler.MsgJobAssign, cpuMsg.Type, "CPU worker should NOT receive GPU job")

	// Job should be in queue
	metrics := fixture.Hub.GetMetricsPayload()
	queueDepth := metrics["queue_depth_batch"].(int)
	assert.GreaterOrEqual(t, queueDepth, 1, "GPU job should be queued waiting for GPU worker")
}