fetch_ml/tests/unit/scheduler/service_templates_test.go
Jeremie Fraeys c5524562e9
test(scheduler): remove unused fields in service slot pool separation test
Remove ID and GPUCount fields from batchJob in TestServiceSlotPoolSeparation
that were assigned but never used. The test only validates SlotPool values.
2026-03-12 12:10:33 -04:00

262 lines
7.8 KiB
Go

package scheduler_test
import (
"testing"
"time"
"github.com/jfraeys/fetch_ml/internal/scheduler"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestJupyterLabTemplate validates the JupyterLab template configuration
func TestJupyterLabTemplate(t *testing.T) {
template := scheduler.JupyterLabTemplate
assert.Equal(t, "service", template.JobType)
assert.Equal(t, "service", template.SlotPool)
assert.Equal(t, 0, template.GPUCount)
// Verify command includes required flags
require.NotEmpty(t, template.Command)
assert.Contains(t, template.Command, "jupyter")
assert.Contains(t, template.Command, "lab")
assert.Contains(t, template.Command, "--ip=0.0.0.0")
assert.Contains(t, template.Command, "--port={{SERVICE_PORT}}")
assert.Contains(t, template.Command, "--no-browser")
// Verify health checks
assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api", template.HealthCheck.Liveness)
assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api/kernels", template.HealthCheck.Readiness)
assert.Equal(t, 15, template.HealthCheck.Interval)
assert.Equal(t, 5, template.HealthCheck.Timeout)
// Verify mounts
require.Len(t, template.Mounts, 1)
assert.Equal(t, "{{WORKSPACE}}", template.Mounts[0].Source)
assert.Equal(t, "/workspace", template.Mounts[0].Destination)
}
// TestJupyterNotebookTemplate validates the classic notebook template
func TestJupyterNotebookTemplate(t *testing.T) {
template := scheduler.JupyterNotebookTemplate
assert.Equal(t, "service", template.JobType)
assert.Equal(t, "service", template.SlotPool)
assert.Equal(t, 0, template.GPUCount)
// Verify uses notebook subcommand
require.NotEmpty(t, template.Command)
assert.Contains(t, template.Command, "notebook")
}
// TestVLLMTemplate validates the vLLM inference template
func TestVLLMTemplate(t *testing.T) {
template := scheduler.VLLMTemplate
assert.Equal(t, "service", template.JobType)
assert.Equal(t, "service", template.SlotPool)
assert.Equal(t, 1, template.GPUCount) // Requires GPU
// Verify command
require.NotEmpty(t, template.Command)
assert.Contains(t, template.Command, "vllm.entrypoints.openai.api_server")
assert.Contains(t, template.Command, "{{MODEL_NAME}}")
assert.Contains(t, template.Command, "{{SERVICE_PORT}}")
}
// TestPortAllocatorForServices validates port allocation for service jobs
func TestPortAllocatorForServices(t *testing.T) {
pa := scheduler.NewPortAllocator(10000, 10010)
// Allocate a port for Jupyter service
port1, err := pa.Allocate("jupyter-task-1")
require.NoError(t, err)
assert.True(t, port1 >= 10000 && port1 <= 10010)
// Verify we can get the task for this port
taskID := pa.GetAllocation(port1)
assert.Equal(t, "jupyter-task-1", taskID)
// Allocate another port
port2, err := pa.Allocate("jupyter-task-2")
require.NoError(t, err)
assert.NotEqual(t, port1, port2)
// Release first port
pa.Release(port1)
// Verify port is now available
taskID = pa.GetAllocation(port1)
assert.Equal(t, "", taskID)
// Can reallocate the same port
port3, err := pa.Allocate("jupyter-task-3")
require.NoError(t, err)
// Should get first available (which might be port1)
assert.True(t, port3 >= 10000 && port3 <= 10010)
}
// TestPortAllocatorExhaustion validates behavior when no ports available
func TestPortAllocatorExhaustion(t *testing.T) {
// Small range for testing
pa := scheduler.NewPortAllocator(20000, 20002)
// Allocate all ports
_, err := pa.Allocate("task-1")
require.NoError(t, err)
_, err = pa.Allocate("task-2")
require.NoError(t, err)
_, err = pa.Allocate("task-3")
require.NoError(t, err)
// Fourth allocation should fail
_, err = pa.Allocate("task-4")
assert.Error(t, err)
assert.Contains(t, err.Error(), "no ports available")
}
// TestPortAllocatorTTL validates port TTL behavior
func TestPortAllocatorTTL(t *testing.T) {
pa := scheduler.NewPortAllocator(30000, 30010)
// Set short TTL for testing
pa.SetTTL(50 * time.Millisecond)
// Allocate a port
port1, err := pa.Allocate("test-task")
require.NoError(t, err)
// Release it (marks with expired timestamp due to short TTL)
pa.Release(port1)
// Immediately try to allocate - should get different port since released one is "expired"
port2, err := pa.Allocate("test-task-2")
require.NoError(t, err)
// Could be same or different depending on cleanup timing
assert.True(t, port2 >= 30000 && port2 <= 30010)
}
// TestServiceSlotPoolSeparation validates that service and batch use different pools
func TestServiceSlotPoolSeparation(t *testing.T) {
// This test validates the conceptual separation
// In practice, the scheduler maintains separate queues
// Use JupyterLabTemplate which has health checks configured
serviceJob := scheduler.JupyterLabTemplate
batchJob := scheduler.JobSpec{
SlotPool: "batch",
}
// Verify different slot pools
assert.Equal(t, "service", serviceJob.SlotPool)
assert.Equal(t, "batch", batchJob.SlotPool)
// Service job has health checks
assert.NotZero(t, serviceJob.HealthCheck.Interval)
// Batch job would typically not have health checks
// (it runs to completion)
}
// TestHealthCheckValidation validates health check configuration
func TestHealthCheckValidation(t *testing.T) {
tests := []struct {
name string
template scheduler.ServiceTemplate
valid bool
}{
{
name: "JupyterLab - valid",
template: scheduler.ServiceTemplate{
JobType: "service",
SlotPool: "service",
HealthCheck: scheduler.ServiceHealthCheck{
Liveness: "http://localhost:8888/api",
Readiness: "http://localhost:8888/api/kernels",
Interval: 15,
Timeout: 5,
},
},
valid: true,
},
{
name: "Missing liveness - invalid",
template: scheduler.ServiceTemplate{
JobType: "service",
SlotPool: "service",
HealthCheck: scheduler.ServiceHealthCheck{
Readiness: "http://localhost:8888/api",
Interval: 15,
},
},
valid: false,
},
{
name: "Zero interval - invalid",
template: scheduler.ServiceTemplate{
JobType: "service",
SlotPool: "service",
HealthCheck: scheduler.ServiceHealthCheck{
Liveness: "http://localhost:8888/api",
Readiness: "http://localhost:8888/api",
Interval: 0,
},
},
valid: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hc := tt.template.HealthCheck
isValid := hc.Liveness != "" && hc.Interval > 0 && hc.Timeout > 0
assert.Equal(t, tt.valid, isValid)
})
}
}
// TestDefaultPortRange validates the default service port range
func TestDefaultPortRange(t *testing.T) {
// Default range should be large enough for typical deployments
rangeSize := scheduler.DefaultServicePortEnd - scheduler.DefaultServicePortStart
assert.True(t, rangeSize >= 1000, "Default port range should be at least 1000 ports")
assert.Equal(t, 8000, scheduler.DefaultServicePortStart)
assert.Equal(t, 9000, scheduler.DefaultServicePortEnd)
}
// TestTemplateVariableExpansion validates template variables are present
func TestTemplateVariableExpansion(t *testing.T) {
template := scheduler.JupyterLabTemplate
// Check command contains template variables
hasServicePort := false
for _, cmd := range template.Command {
if cmd == "--port={{SERVICE_PORT}}" {
hasServicePort = true
break
}
}
assert.True(t, hasServicePort, "Command should contain {{SERVICE_PORT}} template variable")
// Check env contains token template (used for secret generation)
val, ok := template.Env["JUPYTER_TOKEN"]
assert.True(t, ok, "Should have JUPYTER_TOKEN env var")
assert.Contains(t, val, "{{TOKEN:", "Should use token template for secret generation")
}
// BenchmarkPortAllocation benchmarks port allocation performance
func BenchmarkPortAllocation(b *testing.B) {
pa := scheduler.NewPortAllocator(40000, 41000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
port, err := pa.Allocate("bench-task")
if err != nil {
b.Fatal(err)
}
pa.Release(port)
}
}