- Add Plugin GPU Quota config section to scheduler.yaml.example - Add audit logging config to homelab-secure.yaml (HIPAA-compliant) - Add Jupyter and vLLM plugin configs to all worker configs: - Security settings (passwords, trusted channels, blocked packages) - Resource limits (GPU, memory, CPU) - Model cache paths and quantization options for vLLM - Disable plugins in HIPAA deployment mode for compliance - Update deployments README with plugin services and GPU quotas
78 lines
1.7 KiB
TOML
78 lines
1.7 KiB
TOML
worker_id = "worker-prod-01"
|
|
base_path = "/data/ml-experiments"
|
|
max_workers = 4
|
|
|
|
# Redis connection
|
|
redis_addr = "localhost:6379"
|
|
redis_password = "CHANGE_ME_REDIS_PASSWORD"
|
|
redis_db = 0
|
|
|
|
# SSH connection (for remote operations)
|
|
host = "localhost"
|
|
user = "ml-user"
|
|
port = 22
|
|
ssh_key = "~/.ssh/id_rsa"
|
|
|
|
# Podman configuration
|
|
podman_image = "ml-training:latest"
|
|
gpu_vendor = "none"
|
|
gpu_visible_devices = []
|
|
gpu_devices = []
|
|
container_workspace = "/workspace"
|
|
container_results = "/results"
|
|
train_script = "train.py"
|
|
|
|
# Dataset management
|
|
auto_fetch_data = true
|
|
data_dir = "/data/datasets"
|
|
data_manager_path = "/usr/local/bin/data_manager"
|
|
dataset_cache_ttl = "24h"
|
|
|
|
# Task management
|
|
task_lease_duration = "1h"
|
|
heartbeat_interval = "30s"
|
|
graceful_timeout = "5m"
|
|
poll_interval_seconds = 1
|
|
metrics_flush_interval = "10s"
|
|
|
|
[resources]
|
|
max_workers = 4
|
|
desired_rps_per_worker = 2
|
|
podman_cpus = "4"
|
|
podman_memory = "16g"
|
|
|
|
# Metrics exporter
|
|
[metrics]
|
|
enabled = true
|
|
listen_addr = ":9100"
|
|
|
|
# Plugin Configuration
|
|
[plugins]
|
|
|
|
[plugins.jupyter]
|
|
enabled = true
|
|
image = "quay.io/jupyter/base-notebook:latest"
|
|
default_port = 8888
|
|
mode = "lab"
|
|
max_gpu_per_instance = 1
|
|
max_memory_per_instance = "8Gi"
|
|
|
|
[plugins.jupyter.security]
|
|
require_password = true
|
|
trusted_channels = ["conda-forge", "defaults", "pytorch"]
|
|
blocked_packages = ["requests", "urllib3", "httpx"]
|
|
|
|
[plugins.vllm]
|
|
enabled = true
|
|
image = "vllm/vllm-openai:latest"
|
|
default_port = 8000
|
|
model_cache = "/models"
|
|
default_quantization = "" # Options: awq, gptq, fp8, squeezellm
|
|
max_gpu_per_instance = 2
|
|
max_model_len = 4096
|
|
tensor_parallel_size = 1
|
|
|
|
# Environment variables for vLLM
|
|
[plugins.vllm.env]
|
|
HF_HOME = "/models"
|
|
VLLM_WORKER_MULTIPROC_METHOD = "spawn"
|