fetch_ml/configs/worker/worker-prod.toml
Jeremie Fraeys b3a0c78903
config: add Plugin GPU Quota, plugins, and audit logging to configs
- Add Plugin GPU Quota config section to scheduler.yaml.example

- Add audit logging config to homelab-secure.yaml (HIPAA-compliant)

- Add Jupyter and vLLM plugin configs to all worker configs:

  - Security settings (passwords, trusted channels, blocked packages)

  - Resource limits (GPU, memory, CPU)

  - Model cache paths and quantization options for vLLM

- Disable plugins in HIPAA deployment mode for compliance

- Update deployments README with plugin services and GPU quotas
2026-02-26 14:34:42 -05:00

78 lines
1.7 KiB
TOML

worker_id = "worker-prod-01"
base_path = "/data/ml-experiments"
max_workers = 4
# Redis connection
redis_addr = "localhost:6379"
redis_password = "CHANGE_ME_REDIS_PASSWORD"
redis_db = 0
# SSH connection (for remote operations)
host = "localhost"
user = "ml-user"
port = 22
ssh_key = "~/.ssh/id_rsa"
# Podman configuration
podman_image = "ml-training:latest"
gpu_vendor = "none"
gpu_visible_devices = []
gpu_devices = []
container_workspace = "/workspace"
container_results = "/results"
train_script = "train.py"
# Dataset management
auto_fetch_data = true
data_dir = "/data/datasets"
data_manager_path = "/usr/local/bin/data_manager"
dataset_cache_ttl = "24h"
# Task management
task_lease_duration = "1h"
heartbeat_interval = "30s"
graceful_timeout = "5m"
poll_interval_seconds = 1
metrics_flush_interval = "10s"
[resources]
max_workers = 4
desired_rps_per_worker = 2
podman_cpus = "4"
podman_memory = "16g"
# Metrics exporter
[metrics]
enabled = true
listen_addr = ":9100"
# Plugin Configuration
[plugins]
[plugins.jupyter]
enabled = true
image = "quay.io/jupyter/base-notebook:latest"
default_port = 8888
mode = "lab"
max_gpu_per_instance = 1
max_memory_per_instance = "8Gi"
[plugins.jupyter.security]
require_password = true
trusted_channels = ["conda-forge", "defaults", "pytorch"]
blocked_packages = ["requests", "urllib3", "httpx"]
[plugins.vllm]
enabled = true
image = "vllm/vllm-openai:latest"
default_port = 8000
model_cache = "/models"
default_quantization = "" # Options: awq, gptq, fp8, squeezellm
max_gpu_per_instance = 2
max_model_len = 4096
tensor_parallel_size = 1
# Environment variables for vLLM
[plugins.vllm.env]
HF_HOME = "/models"
VLLM_WORKER_MULTIPROC_METHOD = "spawn"