package scheduler import ( "testing" "time" ) func TestCanAdmit_BackendMatching(t *testing.T) { h := &SchedulerHub{ reservations: make(map[string]*Reservation), } tests := []struct { name string workerCaps WorkerCapabilities jobSpec JobSpec want bool }{ { name: "backend matches nvidia", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 4, }, jobSpec: JobSpec{ GPUBackend: "nvidia", GPUCount: 2, }, want: true, }, { name: "backend matches metal", workerCaps: WorkerCapabilities{ GPUBackend: BackendMetal, GPUCount: 2, }, jobSpec: JobSpec{ GPUBackend: "metal", GPUCount: 1, }, want: true, }, { name: "backend mismatch nvidia vs metal", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 4, }, jobSpec: JobSpec{ GPUBackend: "metal", GPUCount: 1, }, want: false, }, { name: "no backend required - any matches", workerCaps: WorkerCapabilities{ GPUBackend: BackendVulkan, GPUCount: 2, }, jobSpec: JobSpec{ GPUBackend: "", GPUCount: 1, }, want: true, }, { name: "cpu job on cpu worker", workerCaps: WorkerCapabilities{ GPUBackend: BackendCPU, GPUCount: 0, CPUCount: 8, }, jobSpec: JobSpec{ GPUBackend: "cpu", GPUCount: 0, }, want: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { wc := &WorkerConn{ capabilities: tt.workerCaps, slots: SlotStatus{BatchTotal: 4}, } task := &Task{ ID: "test-task", Spec: tt.jobSpec, } got := h.canAdmit(task, wc) if got != tt.want { t.Errorf("canAdmit() = %v, want %v", got, tt.want) } }) } } func TestCanAdmit_VRAMRequirements(t *testing.T) { h := &SchedulerHub{ reservations: make(map[string]*Reservation), } tests := []struct { name string workerCaps WorkerCapabilities jobSpec JobSpec want bool }{ { name: "sufficient VRAM", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 2, VRAMGB: 32.0, }, jobSpec: JobSpec{ MinVRAMGB: 16.0, GPUCount: 1, }, want: true, }, { name: "insufficient VRAM", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 2, VRAMGB: 8.0, }, jobSpec: JobSpec{ MinVRAMGB: 16.0, GPUCount: 1, }, want: false, }, { name: "no VRAM required", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 2, VRAMGB: 8.0, }, jobSpec: JobSpec{ MinVRAMGB: 0, GPUCount: 1, }, want: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { wc := &WorkerConn{ capabilities: tt.workerCaps, slots: SlotStatus{BatchTotal: 4}, } task := &Task{ ID: "test-task", Spec: tt.jobSpec, } got := h.canAdmit(task, wc) if got != tt.want { t.Errorf("canAdmit() = %v, want %v", got, tt.want) } }) } } func TestCanAdmit_CPUCoresRequirements(t *testing.T) { h := &SchedulerHub{ reservations: make(map[string]*Reservation), } tests := []struct { name string workerCaps WorkerCapabilities jobSpec JobSpec want bool }{ { name: "sufficient CPU cores", workerCaps: WorkerCapabilities{ GPUBackend: BackendCPU, CPUCount: 16, }, jobSpec: JobSpec{ MinCPUCores: 8, GPUCount: 0, }, want: true, }, { name: "insufficient CPU cores", workerCaps: WorkerCapabilities{ GPUBackend: BackendCPU, CPUCount: 4, }, jobSpec: JobSpec{ MinCPUCores: 8, GPUCount: 0, }, want: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { wc := &WorkerConn{ capabilities: tt.workerCaps, slots: SlotStatus{BatchTotal: 4}, } task := &Task{ ID: "test-task", Spec: tt.jobSpec, } got := h.canAdmit(task, wc) if got != tt.want { t.Errorf("canAdmit() = %v, want %v", got, tt.want) } }) } } func TestCanAdmit_ReservedGPUs(t *testing.T) { h := &SchedulerHub{ reservations: map[string]*Reservation{ "res-1": {TaskID: "task-1", GPUCount: 2}, "res-2": {TaskID: "task-2", GPUCount: 2}, }, } tests := []struct { name string workerCaps WorkerCapabilities jobSpec JobSpec want bool }{ { name: "enough GPUs after reservations", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 8, }, jobSpec: JobSpec{ GPUCount: 4, }, want: true, // 8 - (2+2) = 4 available }, { name: "not enough GPUs after reservations", workerCaps: WorkerCapabilities{ GPUBackend: BackendNVIDIA, GPUCount: 4, }, jobSpec: JobSpec{ GPUCount: 2, }, want: false, // 4 - (2+2) = 0 available }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { wc := &WorkerConn{ capabilities: tt.workerCaps, slots: SlotStatus{BatchTotal: 4}, } task := &Task{ ID: "test-task", Spec: tt.jobSpec, } got := h.canAdmit(task, wc) if got != tt.want { t.Errorf("canAdmit() = %v, want %v", got, tt.want) } }) } } func TestReconcileOrphans_TierGracePeriods(t *testing.T) { tests := []struct { name string jobTier JobTier accepted bool assignedAt time.Time wantRequeued bool }{ { name: "data_processing tier - short grace period", jobTier: TierDataProcessing, accepted: true, assignedAt: time.Now().Add(-35 * time.Second), // Past 30s grace wantRequeued: true, }, { name: "training tier - long grace period", jobTier: TierTraining, accepted: true, assignedAt: time.Now().Add(-5 * time.Minute), // Within 10min grace wantRequeued: false, }, { name: "training tier - past grace period", jobTier: TierTraining, accepted: true, assignedAt: time.Now().Add(-11 * time.Minute), // Past 10min grace wantRequeued: true, }, { name: "evaluation tier - 2min grace", jobTier: TierEvaluation, accepted: true, assignedAt: time.Now().Add(-3 * time.Minute), // Past 2min grace wantRequeued: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { h := &SchedulerHub{ workers: make(map[string]*WorkerConn), pendingAcceptance: make(map[string]*JobAssignment), batchQueue: NewPriorityQueue(0.1), config: HubConfig{ AcceptanceTimeoutSecs: 60, }, } task := &Task{ ID: "test-task", Status: "assigned", Spec: JobSpec{ JobTier: tt.jobTier, }, } h.pendingAcceptance["test-task"] = &JobAssignment{ TaskID: "test-task", WorkerID: "disconnected-worker", AssignedAt: tt.assignedAt, Accepted: tt.accepted, Task: task, } h.reconcileOrphans() // Check if task was requeued _, stillPending := h.pendingAcceptance["test-task"] wasRequeued := !stillPending if wasRequeued != tt.wantRequeued { t.Errorf("reconcileOrphans() requeued=%v, want=%v", wasRequeued, tt.wantRequeued) } }) } }