fetch_ml/tests/fixtures/scheduler_fixture.go
Jeremie Fraeys 43e6446587
feat(scheduler): implement multi-tenant job scheduler with gang scheduling
Add new scheduler component for distributed ML workload orchestration:
- Hub-based coordination for multi-worker clusters
- Pacing controller for rate limiting job submissions
- Priority queue with preemption support
- Port allocator for dynamic service discovery
- Protocol handlers for worker-scheduler communication
- Service manager with OS-specific implementations
- Connection management and state persistence
- Template system for service deployment

Includes comprehensive test suite:
- Unit tests for all core components
- Integration tests for distributed scenarios
- Benchmark tests for performance validation
- Mock fixtures for isolated testing

Refs: scheduler-architecture.md
2026-02-26 12:03:23 -05:00

118 lines
3.3 KiB
Go

// Package fixtures provides shared test utilities and fixtures for scheduler tests
package tests
import (
"os"
"testing"
"time"
"github.com/jfraeys/fetch_ml/internal/scheduler"
"github.com/stretchr/testify/require"
)
// testStateDir is used for hub state storage in tests
var testStateDir string
func init() {
var err error
testStateDir, err = os.MkdirTemp("", "fetchml-test-*")
if err != nil {
panic("failed to create test state dir: " + err.Error())
}
}
// SchedulerTestFixture provides a test fixture for scheduler tests
type SchedulerTestFixture struct {
T testing.TB
Hub *scheduler.SchedulerHub
Workers map[string]*MockWorker
}
// NewSchedulerTestFixture creates a new scheduler test fixture
func NewSchedulerTestFixture(t testing.TB, cfg scheduler.HubConfig) *SchedulerTestFixture {
if cfg.BindAddr == "" {
cfg.BindAddr = "localhost:0"
}
hub, err := scheduler.NewHub(cfg, nil)
require.NoError(t, err)
// Start scheduler
err = hub.Start()
require.NoError(t, err)
return &SchedulerTestFixture{
T: t,
Hub: hub,
Workers: make(map[string]*MockWorker),
}
}
// CreateWorker creates and registers a new mock worker
func (f *SchedulerTestFixture) CreateWorker(workerID string, caps scheduler.WorkerCapabilities) *MockWorker {
worker := NewMockWorker(f.T, f.Hub, workerID)
worker.Register(caps)
f.Workers[workerID] = worker
return worker
}
// SubmitJob submits a job to the scheduler
func (f *SchedulerTestFixture) SubmitJob(spec scheduler.JobSpec) {
err := f.Hub.SubmitJob(spec)
require.NoError(f.T, err)
}
// GetTask retrieves a task by ID
func (f *SchedulerTestFixture) GetTask(taskID string) *scheduler.Task {
return f.Hub.GetTask(taskID)
}
// Cleanup stops the scheduler and closes all workers
func (f *SchedulerTestFixture) Cleanup() {
// Close all workers first
for _, worker := range f.Workers {
worker.Close()
}
// Then stop the hub
f.Hub.Stop()
}
// DefaultHubConfig returns a default hub configuration for testing
func DefaultHubConfig() scheduler.HubConfig {
return scheduler.HubConfig{
BindAddr: "localhost:0",
DefaultBatchSlots: 4,
StarvationThresholdMins: 5,
AcceptanceTimeoutSecs: 5,
GangAllocTimeoutSecs: 10,
StateDir: testStateDir,
WorkerTokens: map[string]string{
"test-token-worker-restart-1": "worker-restart-1",
"test-token-mode-switch-worker": "mode-switch-worker",
"test-token-mode-switch-worker-2": "mode-switch-worker-2",
"test-token-e2e-worker-1": "e2e-worker-1",
"test-token-e2e-worker-2": "e2e-worker-2",
"test-token-worker-death-test": "worker-death-test",
"test-token-worker-split-1": "worker-split-1",
"test-token-worker-split-2": "worker-split-2",
"test-token-worker-split-3": "worker-split-3",
"test-token-worker-timeout": "worker-timeout",
"test-token-worker-gang": "worker-gang",
"test-token-bench-worker": "bench-worker",
"test-token-bench-hb-worker": "bench-hb-worker",
"test-token-bench-assign-worker": "bench-assign-worker",
},
}
}
// WaitForTimeout is a helper to wait for a condition with timeout
func WaitForTimeout(duration time.Duration, condition func() bool) bool {
deadline := time.Now().Add(duration)
for time.Now().Before(deadline) {
if condition() {
return true
}
time.Sleep(10 * time.Millisecond)
}
return false
}