fetch_ml/internal/scheduler/service_templates.go
Jeremie Fraeys 0b5e99f720
refactor(scheduler,worker): improve service management and GPU detection
Scheduler enhancements:
- auth.go: Group membership validation in authentication
- hub.go: Task distribution with group affinity
- port_allocator.go: Dynamic port allocation with conflict resolution
- scheduler_conn.go: Connection pooling and retry logic
- service_manager.go: Lifecycle management for scheduler services
- service_templates.go: Template-based service configuration
- state.go: Persistent state management with recovery

Worker improvements:
- config.go: Extended configuration for task visibility rules
- execution/setup.go: Sandboxed execution environment setup
- executor/container.go: Container runtime integration
- executor/runner.go: Task runner with visibility enforcement
- gpu_detector.go: Robust GPU detection (NVIDIA, AMD, Apple Silicon, CPU fallback)
- integrity/validate.go: Data integrity validation
- lifecycle/runloop.go: Improved runloop with graceful shutdown
- lifecycle/service_manager.go: Service lifecycle coordination
- process/isolation.go + isolation_unix.go: Process isolation with namespaces/cgroups
- tenant/manager.go: Multi-tenant resource isolation
- tenant/middleware.go: Tenant context propagation
- worker.go: Core worker with group-scoped task execution
2026-03-08 13:03:15 -04:00

147 lines
4.4 KiB
Go

// Package scheduler provides service plugin templates for fetch_ml.
// These templates define how long-running services like Jupyter are configured.
package scheduler
// ServiceTemplate defines a service job that runs indefinitely until stopped.
// This is used for Jupyter, vLLM, and similar interactive services.
type ServiceTemplate struct {
// JobType identifies this as a service job
JobType string `json:"job_type"` // Always "service"
// SlotPool specifies which slot pool to use ("batch" or "service")
SlotPool string `json:"slot_pool"`
// GPUCount is the number of GPUs required (can be 0 for CPU-only services)
GPUCount int `json:"gpu_count"`
// Command is the service command with template variables
Command []string `json:"command"`
// Env defines environment variables with template variables
Env map[string]string `json:"env"`
// HealthCheck defines how to verify the service is healthy
HealthCheck ServiceHealthCheck `json:"health_check"`
// Mounts defines volume mounts for the service
Mounts []ServiceMount `json:"mounts,omitempty"`
// Ports to expose (if not using dynamic allocation)
Ports []int `json:"ports,omitempty"`
}
// ServiceHealthCheck defines liveness and readiness probes
type ServiceHealthCheck struct {
// Liveness endpoint - checks if service is running
Liveness string `json:"liveness"`
// Readiness endpoint - checks if service is ready for traffic
Readiness string `json:"readiness"`
// Interval between health checks in seconds
Interval int `json:"interval"`
// Timeout for each health check in seconds
Timeout int `json:"timeout"`
}
// ServiceMount defines a volume mount
type ServiceMount struct {
Source string `json:"source"`
Destination string `json:"destination"`
ReadOnly bool `json:"readonly,omitempty"`
}
// Template variables available in ServiceTemplate:
// {{SERVICE_PORT}} - Dynamically allocated port for the service
// {{WORKER_ID}} - ID of the worker running the service
// {{TASK_ID}} - Unique task ID for this service instance
// {{SECRET:xxx}} - Secret value from scheduler's secret store
// JupyterLabTemplate is the default JupyterLab service configuration.
// Users can disable Jupyter by setting service_slots: 0 in worker config,
// or by not registering this template with the scheduler.
var JupyterLabTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service", // Uses service slot pool, not batch
GPUCount: 0, // Jupyter typically runs CPU-only
Command: []string{
"jupyter", "lab",
"--ip=0.0.0.0",
"--port={{SERVICE_PORT}}",
"--no-browser",
"--allow-root",
"--NotebookApp.token='{{TOKEN:jupyter_token}}'",
"--NotebookApp.password=''",
},
Env: map[string]string{
// #nosec G101 -- Template placeholder, not a real credential
"JUPYTER_TOKEN": "{{TOKEN:jupyter}}",
"JUPYTER_CONFIG_DIR": "/workspace/.jupyter",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/api",
Readiness: "http://localhost:{{SERVICE_PORT}}/api/kernels",
Interval: 15,
Timeout: 5,
},
Mounts: []ServiceMount{
{Source: "{{WORKSPACE}}", Destination: "/workspace"},
},
}
// JupyterNotebookTemplate is an alternative using classic Jupyter Notebook.
var JupyterNotebookTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service",
GPUCount: 0,
Command: []string{
"jupyter", "notebook",
"--ip=0.0.0.0",
"--port={{SERVICE_PORT}}",
"--no-browser",
"--allow-root",
"--NotebookApp.token='{{TOKEN:jupyter}}'",
},
Env: map[string]string{
// #nosec G101 -- Template placeholder, not a real credential
"JUPYTER_TOKEN": "{{TOKEN:jupyter}}",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/api",
Readiness: "http://localhost:{{SERVICE_PORT}}/api/kernels",
Interval: 15,
Timeout: 5,
},
Mounts: []ServiceMount{
{Source: "{{WORKSPACE}}", Destination: "/workspace"},
},
}
// VLLMTemplate is an example vLLM inference server template (future)
var VLLMTemplate = ServiceTemplate{
JobType: "service",
SlotPool: "service",
GPUCount: 1, // Requires GPU for inference
Command: []string{
"python", "-m", "vllm.entrypoints.openai.api_server",
"--model", "{{MODEL_NAME}}",
"--port", "{{SERVICE_PORT}}",
},
HealthCheck: ServiceHealthCheck{
Liveness: "http://localhost:{{SERVICE_PORT}}/health",
Readiness: "http://localhost:{{SERVICE_PORT}}/health",
Interval: 30,
Timeout: 10,
},
}