fetch_ml/configs/scheduler/scheduler.yaml.example

# Scheduler Configuration Example
# Copy this to scheduler.yaml and replace placeholders with real values
# DO NOT commit the actual scheduler.yaml with real tokens

scheduler:
  bind_addr: "0.0.0.0:7777"

  # Auto-generate self-signed certs if files don't exist
  auto_generate_certs: true
  cert_file: "/etc/fetch_ml/scheduler.crt"
  key_file: "/etc/fetch_ml/scheduler.key"

  state_dir: "/var/lib/fetch_ml"

  default_batch_slots: 3
  default_service_slots: 1

  starvation_threshold_mins: 5
  priority_aging_rate: 0.1

  gang_alloc_timeout_secs: 60
  acceptance_timeout_secs: 30

  metrics_addr: "0.0.0.0:9090"

  # Generate tokens using: openssl rand -hex 32
  # Example: wkr_abc123... (64 hex chars after wkr_)
  worker_tokens:
    - id: "worker-01"
      token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
    - id: "worker-02"
      token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"

  # Plugin GPU Quota Configuration
  # Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.)
  plugin_quota:
    enabled: false              # Enable quota enforcement (default: false)
    total_gpus: 16              # Global GPU limit across all plugins (0 = unlimited)
    per_user_gpus: 4            # Default per-user GPU limit (0 = unlimited)
    per_user_services: 2        # Default per-user service count limit (0 = unlimited)

    # Plugin-specific limits (optional)
    per_plugin_limits:
      vllm:
        max_gpus: 8             # Max GPUs for vLLM across all users
        max_services: 4         # Max vLLM service instances
      jupyter:
        max_gpus: 4             # Max GPUs for Jupyter across all users
        max_services: 10        # Max Jupyter service instances

    # Per-user overrides (optional)
    user_overrides:
      admin:
        max_gpus: 8             # Admin gets more GPUs
        max_services: 5         # Admin can run more services
        allowed_plugins: ["jupyter", "vllm"]  # Restrict which plugins user can use
      researcher1:
        max_gpus: 2             # Limited GPU access
        max_services: 1         # Single service limit