fetch_ml/internal/scheduler/service_templates.go
Jeremie Fraeys 43e6446587
feat(scheduler): implement multi-tenant job scheduler with gang scheduling
Add new scheduler component for distributed ML workload orchestration:
- Hub-based coordination for multi-worker clusters
- Pacing controller for rate limiting job submissions
- Priority queue with preemption support
- Port allocator for dynamic service discovery
- Protocol handlers for worker-scheduler communication
- Service manager with OS-specific implementations
- Connection management and state persistence
- Template system for service deployment

Includes comprehensive test suite:
- Unit tests for all core components
- Integration tests for distributed scenarios
- Benchmark tests for performance validation
- Mock fixtures for isolated testing

Refs: scheduler-architecture.md
2026-02-26 12:03:23 -05:00

145 lines
4.3 KiB
Go

// Package scheduler provides service plugin templates for fetch_ml.
// These templates define how long-running services like Jupyter are configured.
package scheduler
// ServiceTemplate defines a service job that runs indefinitely until stopped.
// This is used for Jupyter, vLLM, and similar interactive services.
type ServiceTemplate struct {
// JobType identifies this as a service job
JobType string `json:"job_type"` // Always "service"
// SlotPool specifies which slot pool to use ("batch" or "service")
SlotPool string `json:"slot_pool"`
// GPUCount is the number of GPUs required (can be 0 for CPU-only services)
GPUCount int `json:"gpu_count"`
// Command is the service command with template variables
Command []string `json:"command"`
// Env defines environment variables with template variables
Env map[string]string `json:"env"`
// HealthCheck defines how to verify the service is healthy
HealthCheck ServiceHealthCheck `json:"health_check"`
// Mounts defines volume mounts for the service
Mounts []ServiceMount `json:"mounts,omitempty"`
// Ports to expose (if not using dynamic allocation)
Ports []int `json:"ports,omitempty"`
}
// ServiceHealthCheck defines liveness and readiness probes
type ServiceHealthCheck struct {
// Liveness endpoint - checks if service is running
Liveness string `json:"liveness"`
// Readiness endpoint - checks if service is ready for traffic
Readiness string `json:"readiness"`
// Interval between health checks in seconds
Interval int `json:"interval"`
// Timeout for each health check in seconds
Timeout int `json:"timeout"`
}
// ServiceMount defines a volume mount
type ServiceMount struct {
Source string `json:"source"`
Destination string `json:"destination"`
ReadOnly bool `json:"readonly,omitempty"`
}
// Template variables available in ServiceTemplate:
// {{SERVICE_PORT}} - Dynamically allocated port for the service
// {{WORKER_ID}} - ID of the worker running the service
// {{TASK_ID}} - Unique task ID for this service instance
// {{SECRET:xxx}} - Secret value from scheduler's secret store
// JupyterLabTemplate is the default JupyterLab service configuration.
// Sysadmins can disable Jupyter by setting service_slots: 0 in worker config,
// or by not registering this template with the scheduler.
var JupyterLabTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service", // Uses service slot pool, not batch
GPUCount: 0, // Jupyter typically runs CPU-only
Command: []string{
"jupyter", "lab",
"--ip=0.0.0.0",
"--port={{SERVICE_PORT}}",
"--no-browser",
"--allow-root",
"--NotebookApp.token='{{SECRET:jupyter_token}}'",
"--NotebookApp.password=''",
},
Env: map[string]string{
"JUPYTER_TOKEN": "{{SECRET:jupyter_token}}",
"JUPYTER_CONFIG_DIR": "/workspace/.jupyter",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/api",
Readiness: "http://localhost:{{SERVICE_PORT}}/api/kernels",
Interval: 15,
Timeout: 5,
},
Mounts: []ServiceMount{
{Source: "{{WORKSPACE}}", Destination: "/workspace"},
},
}
// JupyterNotebookTemplate is an alternative using classic Jupyter Notebook.
var JupyterNotebookTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service",
GPUCount: 0,
Command: []string{
"jupyter", "notebook",
"--ip=0.0.0.0",
"--port={{SERVICE_PORT}}",
"--no-browser",
"--allow-root",
"--NotebookApp.token='{{SECRET:jupyter_token}}'",
},
Env: map[string]string{
"JUPYTER_TOKEN": "{{SECRET:jupyter_token}}",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/api",
Readiness: "http://localhost:{{SERVICE_PORT}}/api/kernels",
Interval: 15,
Timeout: 5,
},
Mounts: []ServiceMount{
{Source: "{{WORKSPACE}}", Destination: "/workspace"},
},
}
// VLLMTemplate is an example vLLM inference server template (future)
var VLLMTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service",
GPUCount: 1, // Requires GPU for inference
Command: []string{
"python", "-m", "vllm.entrypoints.openai.api_server",
"--model", "{{MODEL_NAME}}",
"--port", "{{SERVICE_PORT}}",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/health",
Readiness: "http://localhost:{{SERVICE_PORT}}/health",
Interval: 30,
Timeout: 10,
},
}