fetch_ml/internal/scheduler/hub_test_helpers.go
Jeremie Fraeys d0266c4a90
refactor: scheduler hub bug fix, test helpers, and orphan recovery tests
Fix bug in scheduler hub orphan reconciliation:
- Move delete(h.pendingAcceptance, taskID) inside the requeue success block
- Prevents premature cleanup when requeue fails

Add comprehensive test infrastructure:
- hub_test_helpers.go: New test helper utilities (78 lines)
  - Mock scheduler components for isolated testing
  - Test fixture setup and teardown helpers

Refactor and enhance hub capabilities tests:
- Significant restructuring of hub_capabilities_test.go (213 lines changed)
- Improved test coverage for worker capability matching

Add comprehensive orphan recovery tests:
- internal/scheduler/orphan_recovery_test.go (451 lines)
- Tests orphaned job detection and recovery
- Covers requeue logic, timeout handling, state cleanup
2026-03-12 16:38:33 -04:00

78 lines
2.5 KiB
Go

package scheduler
import "time"
// Test helpers - only compiled for tests
// These expose internal functionality for tests in tests/ directory
// CanAdmitForTest exports canAdmit for testing
func (h *SchedulerHub) CanAdmitForTest(candidate *Task, worker *WorkerConn) bool {
return h.canAdmit(candidate, worker)
}
// ReconcileOrphansForTest exports reconcileOrphans for testing
func (h *SchedulerHub) ReconcileOrphansForTest() {
h.reconcileOrphans()
}
// SetPendingAcceptanceForTest sets pending acceptance for testing
func (h *SchedulerHub) SetPendingAcceptanceForTest(taskID string, assignment *JobAssignment) {
h.mu.Lock()
defer h.mu.Unlock()
if h.pendingAcceptance == nil {
h.pendingAcceptance = make(map[string]*JobAssignment)
}
h.pendingAcceptance[taskID] = assignment
}
// GetPendingAcceptanceForTest gets pending acceptance for testing
func (h *SchedulerHub) GetPendingAcceptanceForTest(taskID string) (*JobAssignment, bool) {
h.mu.RLock()
defer h.mu.RUnlock()
a, ok := h.pendingAcceptance[taskID]
return a, ok
}
// SetWorkerConnForTest creates a WorkerConn for testing with exported fields
func SetWorkerConnForTest(wc *WorkerConn, caps WorkerCapabilities, slots SlotStatus) {
wc.capabilities = caps
wc.slots = slots
}
// NewWorkerConnForTest creates a new WorkerConn for testing
func NewWorkerConnForTest(caps WorkerCapabilities, slots SlotStatus) *WorkerConn {
return &WorkerConn{
capabilities: caps,
slots: slots,
}
}
// SetReservationsForTest sets reservations for testing
func (h *SchedulerHub) SetReservationsForTest(reservations map[string]*Reservation) {
h.mu.Lock()
defer h.mu.Unlock()
h.reservations = reservations
}
// NewTestSchedulerHub creates a scheduler hub for testing
func NewTestSchedulerHub(cfg HubConfig) *SchedulerHub {
stateStore, _ := NewStateStore("/tmp/test-scheduler.state")
return &SchedulerHub{
workers: make(map[string]*WorkerConn),
readyWorkers: make(map[string]*WorkerConn),
batchQueue: NewPriorityQueue(0.1),
serviceQueue: NewPriorityQueue(0.1),
reservations: make(map[string]*Reservation),
multiNodePending: make(map[string]*MultiNodeJob),
pendingAcceptance: make(map[string]*JobAssignment),
runningTasks: make(map[string]*Task),
state: stateStore,
starvation: &StarvationTracker{
threshold: time.Duration(cfg.StarvationThresholdMins) * time.Minute,
},
metrics: &SchedulerMetrics{
WorkerSlots: make(map[string]SlotStatus),
},
config: cfg,
}
}