package scheduler_test import ( "testing" "time" "github.com/jfraeys/fetch_ml/internal/scheduler" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // TestJupyterLabTemplate validates the JupyterLab template configuration func TestJupyterLabTemplate(t *testing.T) { template := scheduler.JupyterLabTemplate assert.Equal(t, "service", template.JobType) assert.Equal(t, "service", template.SlotPool) assert.Equal(t, 0, template.GPUCount) // Verify command includes required flags require.NotEmpty(t, template.Command) assert.Contains(t, template.Command, "jupyter") assert.Contains(t, template.Command, "lab") assert.Contains(t, template.Command, "--ip=0.0.0.0") assert.Contains(t, template.Command, "--port={{SERVICE_PORT}}") assert.Contains(t, template.Command, "--no-browser") // Verify health checks assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api", template.HealthCheck.Liveness) assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api/kernels", template.HealthCheck.Readiness) assert.Equal(t, 15, template.HealthCheck.Interval) assert.Equal(t, 5, template.HealthCheck.Timeout) // Verify mounts require.Len(t, template.Mounts, 1) assert.Equal(t, "{{WORKSPACE}}", template.Mounts[0].Source) assert.Equal(t, "/workspace", template.Mounts[0].Destination) } // TestJupyterNotebookTemplate validates the classic notebook template func TestJupyterNotebookTemplate(t *testing.T) { template := scheduler.JupyterNotebookTemplate assert.Equal(t, "service", template.JobType) assert.Equal(t, "service", template.SlotPool) assert.Equal(t, 0, template.GPUCount) // Verify uses notebook subcommand require.NotEmpty(t, template.Command) assert.Contains(t, template.Command, "notebook") } // TestVLLMTemplate validates the vLLM inference template func TestVLLMTemplate(t *testing.T) { template := scheduler.VLLMTemplate assert.Equal(t, "service", template.JobType) assert.Equal(t, "service", template.SlotPool) assert.Equal(t, 1, template.GPUCount) // Requires GPU // Verify command require.NotEmpty(t, template.Command) assert.Contains(t, template.Command, "vllm.entrypoints.openai.api_server") assert.Contains(t, template.Command, "{{MODEL_NAME}}") assert.Contains(t, template.Command, "{{SERVICE_PORT}}") } // TestPortAllocatorForServices validates port allocation for service jobs func TestPortAllocatorForServices(t *testing.T) { pa := scheduler.NewPortAllocator(10000, 10010) // Allocate a port for Jupyter service port1, err := pa.Allocate("jupyter-task-1") require.NoError(t, err) assert.True(t, port1 >= 10000 && port1 <= 10010) // Verify we can get the task for this port taskID := pa.GetAllocation(port1) assert.Equal(t, "jupyter-task-1", taskID) // Allocate another port port2, err := pa.Allocate("jupyter-task-2") require.NoError(t, err) assert.NotEqual(t, port1, port2) // Release first port pa.Release(port1) // Verify port is now available taskID = pa.GetAllocation(port1) assert.Equal(t, "", taskID) // Can reallocate the same port port3, err := pa.Allocate("jupyter-task-3") require.NoError(t, err) // Should get first available (which might be port1) assert.True(t, port3 >= 10000 && port3 <= 10010) } // TestPortAllocatorExhaustion validates behavior when no ports available func TestPortAllocatorExhaustion(t *testing.T) { // Small range for testing pa := scheduler.NewPortAllocator(20000, 20002) // Allocate all ports _, err := pa.Allocate("task-1") require.NoError(t, err) _, err = pa.Allocate("task-2") require.NoError(t, err) _, err = pa.Allocate("task-3") require.NoError(t, err) // Fourth allocation should fail _, err = pa.Allocate("task-4") assert.Error(t, err) assert.Contains(t, err.Error(), "no ports available") } // TestPortAllocatorTTL validates port TTL behavior func TestPortAllocatorTTL(t *testing.T) { pa := scheduler.NewPortAllocator(30000, 30010) // Set short TTL for testing pa.SetTTL(50 * time.Millisecond) // Allocate a port port1, err := pa.Allocate("test-task") require.NoError(t, err) // Release it (marks with expired timestamp due to short TTL) pa.Release(port1) // Immediately try to allocate - should get different port since released one is "expired" port2, err := pa.Allocate("test-task-2") require.NoError(t, err) // Could be same or different depending on cleanup timing assert.True(t, port2 >= 30000 && port2 <= 30010) } // TestServiceSlotPoolSeparation validates that service and batch use different pools func TestServiceSlotPoolSeparation(t *testing.T) { // This test validates the conceptual separation // In practice, the scheduler maintains separate queues // Use JupyterLabTemplate which has health checks configured serviceJob := scheduler.JupyterLabTemplate batchJob := scheduler.JobSpec{ ID: "batch-1", SlotPool: "batch", GPUCount: 1, } // Verify different slot pools assert.Equal(t, "service", serviceJob.SlotPool) assert.Equal(t, "batch", batchJob.SlotPool) // Service job has health checks assert.NotZero(t, serviceJob.HealthCheck.Interval) // Batch job would typically not have health checks // (it runs to completion) } // TestHealthCheckValidation validates health check configuration func TestHealthCheckValidation(t *testing.T) { tests := []struct { name string template scheduler.ServiceTemplate valid bool }{ { name: "JupyterLab - valid", template: scheduler.ServiceTemplate{ JobType: "service", SlotPool: "service", HealthCheck: scheduler.ServiceHealthCheck{ Liveness: "http://localhost:8888/api", Readiness: "http://localhost:8888/api/kernels", Interval: 15, Timeout: 5, }, }, valid: true, }, { name: "Missing liveness - invalid", template: scheduler.ServiceTemplate{ JobType: "service", SlotPool: "service", HealthCheck: scheduler.ServiceHealthCheck{ Readiness: "http://localhost:8888/api", Interval: 15, }, }, valid: false, }, { name: "Zero interval - invalid", template: scheduler.ServiceTemplate{ JobType: "service", SlotPool: "service", HealthCheck: scheduler.ServiceHealthCheck{ Liveness: "http://localhost:8888/api", Readiness: "http://localhost:8888/api", Interval: 0, }, }, valid: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { hc := tt.template.HealthCheck isValid := hc.Liveness != "" && hc.Interval > 0 && hc.Timeout > 0 assert.Equal(t, tt.valid, isValid) }) } } // TestDefaultPortRange validates the default service port range func TestDefaultPortRange(t *testing.T) { // Default range should be large enough for typical deployments rangeSize := scheduler.DefaultServicePortEnd - scheduler.DefaultServicePortStart assert.True(t, rangeSize >= 1000, "Default port range should be at least 1000 ports") assert.Equal(t, 8000, scheduler.DefaultServicePortStart) assert.Equal(t, 9000, scheduler.DefaultServicePortEnd) } // TestTemplateVariableExpansion validates template variables are present func TestTemplateVariableExpansion(t *testing.T) { template := scheduler.JupyterLabTemplate // Check command contains template variables hasServicePort := false for _, cmd := range template.Command { if cmd == "--port={{SERVICE_PORT}}" { hasServicePort = true break } } assert.True(t, hasServicePort, "Command should contain {{SERVICE_PORT}} template variable") // Check env contains token template (used for secret generation) val, ok := template.Env["JUPYTER_TOKEN"] assert.True(t, ok, "Should have JUPYTER_TOKEN env var") assert.Contains(t, val, "{{TOKEN:", "Should use token template for secret generation") } // BenchmarkPortAllocation benchmarks port allocation performance func BenchmarkPortAllocation(b *testing.B) { pa := scheduler.NewPortAllocator(40000, 41000) b.ResetTimer() for i := 0; i < b.N; i++ { port, err := pa.Allocate("bench-task") if err != nil { b.Fatal(err) } pa.Release(port) } }