fetch_ml/configs/worker/dev-local.yaml
Jeremie Fraeys b3a0c78903
config: add Plugin GPU Quota, plugins, and audit logging to configs
- Add Plugin GPU Quota config section to scheduler.yaml.example

- Add audit logging config to homelab-secure.yaml (HIPAA-compliant)

- Add Jupyter and vLLM plugin configs to all worker configs:

  - Security settings (passwords, trusted channels, blocked packages)

  - Resource limits (GPU, memory, CPU)

  - Model cache paths and quantization options for vLLM

- Disable plugins in HIPAA deployment mode for compliance

- Update deployments README with plugin services and GPU quotas
2026-02-26 14:34:42 -05:00

87 lines
1.8 KiB
YAML

worker_id: "local-worker"
base_path: "data/dev/experiments"
train_script: "train.py"
redis_url: "redis://localhost:6379/0"
local_mode: true
prewarm_enabled: false
max_workers: 2
poll_interval_seconds: 2
auto_fetch_data: false
data_manager_path: "./data_manager"
dataset_cache_ttl: "30m"
data_dir: "data/dev/active"
snapshot_store:
enabled: false
podman_image: "python:3.9-slim"
container_workspace: "/workspace"
container_results: "/results"
gpu_devices: []
gpu_vendor: "apple"
gpu_visible_devices: []
# Apple M-series GPU configuration
apple_gpu:
enabled: true
metal_device: "/dev/metal"
mps_runtime: "/dev/mps"
resources:
max_workers: 2
desired_rps_per_worker: 2
podman_cpus: "2"
podman_memory: "4Gi"
metrics:
enabled: false
queue:
type: "native"
native:
data_dir: "data/dev/queue"
# Plugin Configuration (for local development)
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
# Security settings
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages: [] # Less restrictive for local dev
require_password: false # No password for local dev
# Resource limits
max_gpu_per_instance: 1
max_memory_per_instance: "4Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location
model_cache: "data/dev/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: "" # No quantization for dev (better quality)
# Resource limits
max_gpu_per_instance: 1
max_model_len: 2048
tensor_parallel_size: 1
task_lease_duration: "30m"
heartbeat_interval: "1m"
max_retries: 3
graceful_timeout: "5m"