fetch_ml/tests/unit/scheduler/service_templates_test.go

package scheduler_test

import (
	"testing"
	"time"

	"github.com/jfraeys/fetch_ml/internal/scheduler"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

// TestJupyterLabTemplate validates the JupyterLab template configuration
func TestJupyterLabTemplate(t *testing.T) {
	template := scheduler.JupyterLabTemplate

	assert.Equal(t, "service", template.JobType)
	assert.Equal(t, "service", template.SlotPool)
	assert.Equal(t, 0, template.GPUCount)

	// Verify command includes required flags
	require.NotEmpty(t, template.Command)
	assert.Contains(t, template.Command, "jupyter")
	assert.Contains(t, template.Command, "lab")
	assert.Contains(t, template.Command, "--ip=0.0.0.0")
	assert.Contains(t, template.Command, "--port={{SERVICE_PORT}}")
	assert.Contains(t, template.Command, "--no-browser")

	// Verify health checks
	assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api", template.HealthCheck.Liveness)
	assert.Equal(t, "http://localhost:{{SERVICE_PORT}}/api/kernels", template.HealthCheck.Readiness)
	assert.Equal(t, 15, template.HealthCheck.Interval)
	assert.Equal(t, 5, template.HealthCheck.Timeout)

	// Verify mounts
	require.Len(t, template.Mounts, 1)
	assert.Equal(t, "{{WORKSPACE}}", template.Mounts[0].Source)
	assert.Equal(t, "/workspace", template.Mounts[0].Destination)
}

// TestJupyterNotebookTemplate validates the classic notebook template
func TestJupyterNotebookTemplate(t *testing.T) {
	template := scheduler.JupyterNotebookTemplate

	assert.Equal(t, "service", template.JobType)
	assert.Equal(t, "service", template.SlotPool)
	assert.Equal(t, 0, template.GPUCount)

	// Verify uses notebook subcommand
	require.NotEmpty(t, template.Command)
	assert.Contains(t, template.Command, "notebook")
}

// TestVLLMTemplate validates the vLLM inference template
func TestVLLMTemplate(t *testing.T) {
	template := scheduler.VLLMTemplate

	assert.Equal(t, "service", template.JobType)
	assert.Equal(t, "service", template.SlotPool)
	assert.Equal(t, 1, template.GPUCount) // Requires GPU

	// Verify command
	require.NotEmpty(t, template.Command)
	assert.Contains(t, template.Command, "vllm.entrypoints.openai.api_server")
	assert.Contains(t, template.Command, "{{MODEL_NAME}}")
	assert.Contains(t, template.Command, "{{SERVICE_PORT}}")
}

// TestPortAllocatorForServices validates port allocation for service jobs
func TestPortAllocatorForServices(t *testing.T) {
	pa := scheduler.NewPortAllocator(10000, 10010)

	// Allocate a port for Jupyter service
	port1, err := pa.Allocate("jupyter-task-1")
	require.NoError(t, err)
	assert.True(t, port1 >= 10000 && port1 <= 10010)

	// Verify we can get the task for this port
	taskID := pa.GetAllocation(port1)
	assert.Equal(t, "jupyter-task-1", taskID)

	// Allocate another port
	port2, err := pa.Allocate("jupyter-task-2")
	require.NoError(t, err)
	assert.NotEqual(t, port1, port2)

	// Release first port
	pa.Release(port1)

	// Verify port is now available
	taskID = pa.GetAllocation(port1)
	assert.Equal(t, "", taskID)

	// Can reallocate the same port
	port3, err := pa.Allocate("jupyter-task-3")
	require.NoError(t, err)
	// Should get first available (which might be port1)
	assert.True(t, port3 >= 10000 && port3 <= 10010)
}

// TestPortAllocatorExhaustion validates behavior when no ports available
func TestPortAllocatorExhaustion(t *testing.T) {
	// Small range for testing
	pa := scheduler.NewPortAllocator(20000, 20002)

	// Allocate all ports
	_, err := pa.Allocate("task-1")
	require.NoError(t, err)
	_, err = pa.Allocate("task-2")
	require.NoError(t, err)
	_, err = pa.Allocate("task-3")
	require.NoError(t, err)

	// Fourth allocation should fail
	_, err = pa.Allocate("task-4")
	assert.Error(t, err)
	assert.Contains(t, err.Error(), "no ports available")
}

// TestPortAllocatorTTL validates port TTL behavior
func TestPortAllocatorTTL(t *testing.T) {
	pa := scheduler.NewPortAllocator(30000, 30010)

	// Set short TTL for testing
	pa.SetTTL(50 * time.Millisecond)

	// Allocate a port
	port1, err := pa.Allocate("test-task")
	require.NoError(t, err)

	// Release it (marks with expired timestamp due to short TTL)
	pa.Release(port1)

	// Immediately try to allocate - should get different port since released one is "expired"
	port2, err := pa.Allocate("test-task-2")
	require.NoError(t, err)

	// Could be same or different depending on cleanup timing
	assert.True(t, port2 >= 30000 && port2 <= 30010)
}

// TestServiceSlotPoolSeparation validates that service and batch use different pools
func TestServiceSlotPoolSeparation(t *testing.T) {
	// This test validates the conceptual separation
	// In practice, the scheduler maintains separate queues

	// Use JupyterLabTemplate which has health checks configured
	serviceJob := scheduler.JupyterLabTemplate

	batchJob := scheduler.JobSpec{
		ID:       "batch-1",
		SlotPool: "batch",
		GPUCount: 1,
	}

	// Verify different slot pools
	assert.Equal(t, "service", serviceJob.SlotPool)
	assert.Equal(t, "batch", batchJob.SlotPool)

	// Service job has health checks
	assert.NotZero(t, serviceJob.HealthCheck.Interval)

	// Batch job would typically not have health checks
	// (it runs to completion)
}

// TestHealthCheckValidation validates health check configuration
func TestHealthCheckValidation(t *testing.T) {
	tests := []struct {
		name     string
		template scheduler.ServiceTemplate
		valid    bool
	}{
		{
			name: "JupyterLab - valid",
			template: scheduler.ServiceTemplate{
				JobType:  "service",
				SlotPool: "service",
				HealthCheck: scheduler.ServiceHealthCheck{
					Liveness:  "http://localhost:8888/api",
					Readiness: "http://localhost:8888/api/kernels",
					Interval:  15,
					Timeout:   5,
				},
			},
			valid: true,
		},
		{
			name: "Missing liveness - invalid",
			template: scheduler.ServiceTemplate{
				JobType:  "service",
				SlotPool: "service",
				HealthCheck: scheduler.ServiceHealthCheck{
					Readiness: "http://localhost:8888/api",
					Interval:  15,
				},
			},
			valid: false,
		},
		{
			name: "Zero interval - invalid",
			template: scheduler.ServiceTemplate{
				JobType:  "service",
				SlotPool: "service",
				HealthCheck: scheduler.ServiceHealthCheck{
					Liveness:  "http://localhost:8888/api",
					Readiness: "http://localhost:8888/api",
					Interval:  0,
				},
			},
			valid: false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			hc := tt.template.HealthCheck
			isValid := hc.Liveness != "" && hc.Interval > 0 && hc.Timeout > 0
			assert.Equal(t, tt.valid, isValid)
		})
	}
}

// TestDefaultPortRange validates the default service port range
func TestDefaultPortRange(t *testing.T) {
	// Default range should be large enough for typical deployments
	rangeSize := scheduler.DefaultServicePortEnd - scheduler.DefaultServicePortStart
	assert.True(t, rangeSize >= 1000, "Default port range should be at least 1000 ports")
	assert.Equal(t, 8000, scheduler.DefaultServicePortStart)
	assert.Equal(t, 9000, scheduler.DefaultServicePortEnd)
}

// TestTemplateVariableExpansion validates template variables are present
func TestTemplateVariableExpansion(t *testing.T) {
	template := scheduler.JupyterLabTemplate

	// Check command contains template variables
	hasServicePort := false
	for _, cmd := range template.Command {
		if cmd == "--port={{SERVICE_PORT}}" {
			hasServicePort = true
			break
		}
	}
	assert.True(t, hasServicePort, "Command should contain {{SERVICE_PORT}} template variable")

	// Check env contains secret template
	val, ok := template.Env["JUPYTER_TOKEN"]
	assert.True(t, ok, "Should have JUPYTER_TOKEN env var")
	assert.Contains(t, val, "{{SECRET:", "Should use secret template")
}

// BenchmarkPortAllocation benchmarks port allocation performance
func BenchmarkPortAllocation(b *testing.B) {
	pa := scheduler.NewPortAllocator(40000, 41000)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		port, err := pa.Allocate("bench-task")
		if err != nil {
			b.Fatal(err)
		}
		pa.Release(port)
	}
}