- Add Plugin GPU Quota config section to scheduler.yaml.example - Add audit logging config to homelab-secure.yaml (HIPAA-compliant) - Add Jupyter and vLLM plugin configs to all worker configs: - Security settings (passwords, trusted channels, blocked packages) - Resource limits (GPU, memory, CPU) - Model cache paths and quantization options for vLLM - Disable plugins in HIPAA deployment mode for compliance - Update deployments README with plugin services and GPU quotas
91 lines
1.9 KiB
YAML
91 lines
1.9 KiB
YAML
worker_id: "docker-worker"
|
|
base_path: "/data/experiments"
|
|
train_script: "train.py"
|
|
|
|
redis_url: "redis://redis:6379/0"
|
|
|
|
local_mode: true
|
|
|
|
prewarm_enabled: true
|
|
|
|
max_workers: 1
|
|
poll_interval_seconds: 2
|
|
|
|
auto_fetch_data: false
|
|
|
|
data_manager_path: "./data_manager"
|
|
dataset_cache_ttl: "30m"
|
|
|
|
data_dir: "/data/active"
|
|
|
|
snapshot_store:
|
|
enabled: true
|
|
endpoint: "minio:9000"
|
|
secure: false
|
|
bucket: "fetchml-snapshots"
|
|
prefix: "snapshots"
|
|
timeout: "2m"
|
|
max_retries: 3
|
|
|
|
podman_image: "python:3.9-slim"
|
|
container_workspace: "/workspace"
|
|
container_results: "/results"
|
|
gpu_devices:
|
|
- "/dev/dri"
|
|
gpu_vendor: "apple"
|
|
gpu_visible_devices: []
|
|
|
|
# Apple M-series GPU configuration
|
|
apple_gpu:
|
|
enabled: true
|
|
metal_device: "/dev/metal"
|
|
mps_runtime: "/dev/mps"
|
|
|
|
resources:
|
|
max_workers: 1
|
|
desired_rps_per_worker: 2
|
|
podman_cpus: "2"
|
|
podman_memory: "4Gi"
|
|
|
|
metrics:
|
|
enabled: true
|
|
listen_addr: ":9100"
|
|
metrics_flush_interval: "500ms"
|
|
|
|
# Plugin Configuration
|
|
plugins:
|
|
# Jupyter Notebook/Lab Service
|
|
jupyter:
|
|
enabled: true
|
|
image: "quay.io/jupyter/base-notebook:latest"
|
|
default_port: 8888
|
|
mode: "lab"
|
|
# Security settings
|
|
security:
|
|
trusted_channels:
|
|
- "conda-forge"
|
|
- "defaults"
|
|
blocked_packages: [] # Dev environment - less restrictive
|
|
require_password: false # No password for dev
|
|
# Resource limits
|
|
max_gpu_per_instance: 1
|
|
max_memory_per_instance: "4Gi"
|
|
|
|
# vLLM Inference Service
|
|
vllm:
|
|
enabled: true
|
|
image: "vllm/vllm-openai:latest"
|
|
default_port: 8000
|
|
# Model cache location
|
|
model_cache: "/models"
|
|
# Supported quantization methods: awq, gptq, fp8, squeezellm
|
|
default_quantization: "" # No quantization for dev
|
|
# Resource limits
|
|
max_gpu_per_instance: 1
|
|
max_model_len: 2048
|
|
tensor_parallel_size: 1
|
|
|
|
task_lease_duration: "30m"
|
|
heartbeat_interval: "1m"
|
|
max_retries: 3
|
|
graceful_timeout: "5m"
|