- Add Plugin GPU Quota config section to scheduler.yaml.example - Add audit logging config to homelab-secure.yaml (HIPAA-compliant) - Add Jupyter and vLLM plugin configs to all worker configs: - Security settings (passwords, trusted channels, blocked packages) - Resource limits (GPU, memory, CPU) - Model cache paths and quantization options for vLLM - Disable plugins in HIPAA deployment mode for compliance - Update deployments README with plugin services and GPU quotas
59 lines
2.1 KiB
Text
59 lines
2.1 KiB
Text
# Scheduler Configuration Example
|
|
# Copy this to scheduler.yaml and replace placeholders with real values
|
|
# DO NOT commit the actual scheduler.yaml with real tokens
|
|
|
|
scheduler:
|
|
bind_addr: "0.0.0.0:7777"
|
|
|
|
# Auto-generate self-signed certs if files don't exist
|
|
auto_generate_certs: true
|
|
cert_file: "/etc/fetch_ml/scheduler.crt"
|
|
key_file: "/etc/fetch_ml/scheduler.key"
|
|
|
|
state_dir: "/var/lib/fetch_ml"
|
|
|
|
default_batch_slots: 3
|
|
default_service_slots: 1
|
|
|
|
starvation_threshold_mins: 5
|
|
priority_aging_rate: 0.1
|
|
|
|
gang_alloc_timeout_secs: 60
|
|
acceptance_timeout_secs: 30
|
|
|
|
metrics_addr: "0.0.0.0:9090"
|
|
|
|
# Generate tokens using: openssl rand -hex 32
|
|
# Example: wkr_abc123... (64 hex chars after wkr_)
|
|
worker_tokens:
|
|
- id: "worker-01"
|
|
token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
|
|
- id: "worker-02"
|
|
token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
|
|
|
|
# Plugin GPU Quota Configuration
|
|
# Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.)
|
|
plugin_quota:
|
|
enabled: false # Enable quota enforcement (default: false)
|
|
total_gpus: 16 # Global GPU limit across all plugins (0 = unlimited)
|
|
per_user_gpus: 4 # Default per-user GPU limit (0 = unlimited)
|
|
per_user_services: 2 # Default per-user service count limit (0 = unlimited)
|
|
|
|
# Plugin-specific limits (optional)
|
|
per_plugin_limits:
|
|
vllm:
|
|
max_gpus: 8 # Max GPUs for vLLM across all users
|
|
max_services: 4 # Max vLLM service instances
|
|
jupyter:
|
|
max_gpus: 4 # Max GPUs for Jupyter across all users
|
|
max_services: 10 # Max Jupyter service instances
|
|
|
|
# Per-user overrides (optional)
|
|
user_overrides:
|
|
admin:
|
|
max_gpus: 8 # Admin gets more GPUs
|
|
max_services: 5 # Admin can run more services
|
|
allowed_plugins: ["jupyter", "vllm"] # Restrict which plugins user can use
|
|
researcher1:
|
|
max_gpus: 2 # Limited GPU access
|
|
max_services: 1 # Single service limit
|