# Scheduler Configuration Example # Copy this to scheduler.yaml and replace placeholders with real values # DO NOT commit the actual scheduler.yaml with real tokens scheduler: bind_addr: "0.0.0.0:7777" # Auto-generate self-signed certs if files don't exist auto_generate_certs: true cert_file: "/etc/fetch_ml/scheduler.crt" key_file: "/etc/fetch_ml/scheduler.key" state_dir: "/var/lib/fetch_ml" default_batch_slots: 3 default_service_slots: 1 starvation_threshold_mins: 5 priority_aging_rate: 0.1 gang_alloc_timeout_secs: 60 acceptance_timeout_secs: 30 metrics_addr: "0.0.0.0:9090" # Generate tokens using: openssl rand -hex 32 # Example: wkr_abc123... (64 hex chars after wkr_) worker_tokens: - id: "worker-01" token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32" - id: "worker-02" token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32" # Plugin GPU Quota Configuration # Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.) plugin_quota: enabled: false # Enable quota enforcement (default: false) total_gpus: 16 # Global GPU limit across all plugins (0 = unlimited) per_user_gpus: 4 # Default per-user GPU limit (0 = unlimited) per_user_services: 2 # Default per-user service count limit (0 = unlimited) # Plugin-specific limits (optional) per_plugin_limits: vllm: max_gpus: 8 # Max GPUs for vLLM across all users max_services: 4 # Max vLLM service instances jupyter: max_gpus: 4 # Max GPUs for Jupyter across all users max_services: 10 # Max Jupyter service instances # Per-user overrides (optional) user_overrides: admin: max_gpus: 8 # Admin gets more GPUs max_services: 5 # Admin can run more services allowed_plugins: ["jupyter", "vllm"] # Restrict which plugins user can use researcher1: max_gpus: 2 # Limited GPU access max_services: 1 # Single service limit