Remove ID and GPUCount fields from batchJob in TestServiceSlotPoolSeparation that were assigned but never used. The test only validates SlotPool values.
262 lines
7.8 KiB
Go
262 lines
7.8 KiB
Go
package scheduler_test
|
|
|
|
import (
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/scheduler"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// TestJupyterLabTemplate validates the JupyterLab template configuration
|
|
func TestJupyterLabTemplate(t *testing.T) {
|
|
template := scheduler.JupyterLabTemplate
|
|
|
|
assert.Equal(t, "service", template.JobType)
|
|
assert.Equal(t, "service", template.SlotPool)
|
|
assert.Equal(t, 0, template.GPUCount)
|
|
|
|
// Verify command includes required flags
|
|
require.NotEmpty(t, template.Command)
|
|
assert.Contains(t, template.Command, "jupyter")
|
|
assert.Contains(t, template.Command, "lab")
|
|
assert.Contains(t, template.Command, "--ip=0.0.0.0")
|
|
assert.Contains(t, template.Command, "--port={{SERVICE_PORT}}")
|
|
assert.Contains(t, template.Command, "--no-browser")
|
|
|
|
// Verify health checks
|
|
assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api", template.HealthCheck.Liveness)
|
|
assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api/kernels", template.HealthCheck.Readiness)
|
|
assert.Equal(t, 15, template.HealthCheck.Interval)
|
|
assert.Equal(t, 5, template.HealthCheck.Timeout)
|
|
|
|
// Verify mounts
|
|
require.Len(t, template.Mounts, 1)
|
|
assert.Equal(t, "{{WORKSPACE}}", template.Mounts[0].Source)
|
|
assert.Equal(t, "/workspace", template.Mounts[0].Destination)
|
|
}
|
|
|
|
// TestJupyterNotebookTemplate validates the classic notebook template
|
|
func TestJupyterNotebookTemplate(t *testing.T) {
|
|
template := scheduler.JupyterNotebookTemplate
|
|
|
|
assert.Equal(t, "service", template.JobType)
|
|
assert.Equal(t, "service", template.SlotPool)
|
|
assert.Equal(t, 0, template.GPUCount)
|
|
|
|
// Verify uses notebook subcommand
|
|
require.NotEmpty(t, template.Command)
|
|
assert.Contains(t, template.Command, "notebook")
|
|
}
|
|
|
|
// TestVLLMTemplate validates the vLLM inference template
|
|
func TestVLLMTemplate(t *testing.T) {
|
|
template := scheduler.VLLMTemplate
|
|
|
|
assert.Equal(t, "service", template.JobType)
|
|
assert.Equal(t, "service", template.SlotPool)
|
|
assert.Equal(t, 1, template.GPUCount) // Requires GPU
|
|
|
|
// Verify command
|
|
require.NotEmpty(t, template.Command)
|
|
assert.Contains(t, template.Command, "vllm.entrypoints.openai.api_server")
|
|
assert.Contains(t, template.Command, "{{MODEL_NAME}}")
|
|
assert.Contains(t, template.Command, "{{SERVICE_PORT}}")
|
|
}
|
|
|
|
// TestPortAllocatorForServices validates port allocation for service jobs
|
|
func TestPortAllocatorForServices(t *testing.T) {
|
|
pa := scheduler.NewPortAllocator(10000, 10010)
|
|
|
|
// Allocate a port for Jupyter service
|
|
port1, err := pa.Allocate("jupyter-task-1")
|
|
require.NoError(t, err)
|
|
assert.True(t, port1 >= 10000 && port1 <= 10010)
|
|
|
|
// Verify we can get the task for this port
|
|
taskID := pa.GetAllocation(port1)
|
|
assert.Equal(t, "jupyter-task-1", taskID)
|
|
|
|
// Allocate another port
|
|
port2, err := pa.Allocate("jupyter-task-2")
|
|
require.NoError(t, err)
|
|
assert.NotEqual(t, port1, port2)
|
|
|
|
// Release first port
|
|
pa.Release(port1)
|
|
|
|
// Verify port is now available
|
|
taskID = pa.GetAllocation(port1)
|
|
assert.Equal(t, "", taskID)
|
|
|
|
// Can reallocate the same port
|
|
port3, err := pa.Allocate("jupyter-task-3")
|
|
require.NoError(t, err)
|
|
// Should get first available (which might be port1)
|
|
assert.True(t, port3 >= 10000 && port3 <= 10010)
|
|
}
|
|
|
|
// TestPortAllocatorExhaustion validates behavior when no ports available
|
|
func TestPortAllocatorExhaustion(t *testing.T) {
|
|
// Small range for testing
|
|
pa := scheduler.NewPortAllocator(20000, 20002)
|
|
|
|
// Allocate all ports
|
|
_, err := pa.Allocate("task-1")
|
|
require.NoError(t, err)
|
|
_, err = pa.Allocate("task-2")
|
|
require.NoError(t, err)
|
|
_, err = pa.Allocate("task-3")
|
|
require.NoError(t, err)
|
|
|
|
// Fourth allocation should fail
|
|
_, err = pa.Allocate("task-4")
|
|
assert.Error(t, err)
|
|
assert.Contains(t, err.Error(), "no ports available")
|
|
}
|
|
|
|
// TestPortAllocatorTTL validates port TTL behavior
|
|
func TestPortAllocatorTTL(t *testing.T) {
|
|
pa := scheduler.NewPortAllocator(30000, 30010)
|
|
|
|
// Set short TTL for testing
|
|
pa.SetTTL(50 * time.Millisecond)
|
|
|
|
// Allocate a port
|
|
port1, err := pa.Allocate("test-task")
|
|
require.NoError(t, err)
|
|
|
|
// Release it (marks with expired timestamp due to short TTL)
|
|
pa.Release(port1)
|
|
|
|
// Immediately try to allocate - should get different port since released one is "expired"
|
|
port2, err := pa.Allocate("test-task-2")
|
|
require.NoError(t, err)
|
|
|
|
// Could be same or different depending on cleanup timing
|
|
assert.True(t, port2 >= 30000 && port2 <= 30010)
|
|
}
|
|
|
|
// TestServiceSlotPoolSeparation validates that service and batch use different pools
|
|
func TestServiceSlotPoolSeparation(t *testing.T) {
|
|
// This test validates the conceptual separation
|
|
// In practice, the scheduler maintains separate queues
|
|
|
|
// Use JupyterLabTemplate which has health checks configured
|
|
serviceJob := scheduler.JupyterLabTemplate
|
|
|
|
batchJob := scheduler.JobSpec{
|
|
SlotPool: "batch",
|
|
}
|
|
|
|
// Verify different slot pools
|
|
assert.Equal(t, "service", serviceJob.SlotPool)
|
|
assert.Equal(t, "batch", batchJob.SlotPool)
|
|
|
|
// Service job has health checks
|
|
assert.NotZero(t, serviceJob.HealthCheck.Interval)
|
|
|
|
// Batch job would typically not have health checks
|
|
// (it runs to completion)
|
|
}
|
|
|
|
// TestHealthCheckValidation validates health check configuration
|
|
func TestHealthCheckValidation(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
template scheduler.ServiceTemplate
|
|
valid bool
|
|
}{
|
|
{
|
|
name: "JupyterLab - valid",
|
|
template: scheduler.ServiceTemplate{
|
|
JobType: "service",
|
|
SlotPool: "service",
|
|
HealthCheck: scheduler.ServiceHealthCheck{
|
|
Liveness: "http://localhost:8888/api",
|
|
Readiness: "http://localhost:8888/api/kernels",
|
|
Interval: 15,
|
|
Timeout: 5,
|
|
},
|
|
},
|
|
valid: true,
|
|
},
|
|
{
|
|
name: "Missing liveness - invalid",
|
|
template: scheduler.ServiceTemplate{
|
|
JobType: "service",
|
|
SlotPool: "service",
|
|
HealthCheck: scheduler.ServiceHealthCheck{
|
|
Readiness: "http://localhost:8888/api",
|
|
Interval: 15,
|
|
},
|
|
},
|
|
valid: false,
|
|
},
|
|
{
|
|
name: "Zero interval - invalid",
|
|
template: scheduler.ServiceTemplate{
|
|
JobType: "service",
|
|
SlotPool: "service",
|
|
HealthCheck: scheduler.ServiceHealthCheck{
|
|
Liveness: "http://localhost:8888/api",
|
|
Readiness: "http://localhost:8888/api",
|
|
Interval: 0,
|
|
},
|
|
},
|
|
valid: false,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
hc := tt.template.HealthCheck
|
|
isValid := hc.Liveness != "" && hc.Interval > 0 && hc.Timeout > 0
|
|
assert.Equal(t, tt.valid, isValid)
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestDefaultPortRange validates the default service port range
|
|
func TestDefaultPortRange(t *testing.T) {
|
|
// Default range should be large enough for typical deployments
|
|
rangeSize := scheduler.DefaultServicePortEnd - scheduler.DefaultServicePortStart
|
|
assert.True(t, rangeSize >= 1000, "Default port range should be at least 1000 ports")
|
|
assert.Equal(t, 8000, scheduler.DefaultServicePortStart)
|
|
assert.Equal(t, 9000, scheduler.DefaultServicePortEnd)
|
|
}
|
|
|
|
// TestTemplateVariableExpansion validates template variables are present
|
|
func TestTemplateVariableExpansion(t *testing.T) {
|
|
template := scheduler.JupyterLabTemplate
|
|
|
|
// Check command contains template variables
|
|
hasServicePort := false
|
|
for _, cmd := range template.Command {
|
|
if cmd == "--port={{SERVICE_PORT}}" {
|
|
hasServicePort = true
|
|
break
|
|
}
|
|
}
|
|
assert.True(t, hasServicePort, "Command should contain {{SERVICE_PORT}} template variable")
|
|
|
|
// Check env contains token template (used for secret generation)
|
|
val, ok := template.Env["JUPYTER_TOKEN"]
|
|
assert.True(t, ok, "Should have JUPYTER_TOKEN env var")
|
|
assert.Contains(t, val, "{{TOKEN:", "Should use token template for secret generation")
|
|
}
|
|
|
|
// BenchmarkPortAllocation benchmarks port allocation performance
|
|
func BenchmarkPortAllocation(b *testing.B) {
|
|
pa := scheduler.NewPortAllocator(40000, 41000)
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
port, err := pa.Allocate("bench-task")
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
pa.Release(port)
|
|
}
|
|
}
|