Restructure configuration files for better organization: - Add scheduler configuration examples (scheduler.yaml.example) - Reorganize worker configs into subdirectories: - distributed/ - Multi-node cluster configurations - standalone/ - Single-node deployment configs - Add environment-specific configs: - dev-local.yaml, docker-dev.yaml, docker-prod.yaml - homelab-secure.yaml, worker-prod.toml - Add deployment configs for different security modes: - docker-standard.yaml, docker-hipaa.yaml, docker-dev.yaml Add documentation: - configs/README.md with configuration guidelines - configs/SECURITY.md with security configuration best practices
54 lines
914 B
YAML
54 lines
914 B
YAML
worker_id: "local-worker"
|
|
base_path: "data/dev/experiments"
|
|
train_script: "train.py"
|
|
|
|
redis_url: "redis://localhost:6379/0"
|
|
|
|
local_mode: true
|
|
|
|
prewarm_enabled: false
|
|
|
|
max_workers: 2
|
|
poll_interval_seconds: 2
|
|
|
|
auto_fetch_data: false
|
|
|
|
data_manager_path: "./data_manager"
|
|
dataset_cache_ttl: "30m"
|
|
|
|
data_dir: "data/dev/active"
|
|
|
|
snapshot_store:
|
|
enabled: false
|
|
|
|
podman_image: "python:3.9-slim"
|
|
container_workspace: "/workspace"
|
|
container_results: "/results"
|
|
gpu_devices: []
|
|
gpu_vendor: "apple"
|
|
gpu_visible_devices: []
|
|
|
|
# Apple M-series GPU configuration
|
|
apple_gpu:
|
|
enabled: true
|
|
metal_device: "/dev/metal"
|
|
mps_runtime: "/dev/mps"
|
|
|
|
resources:
|
|
max_workers: 2
|
|
desired_rps_per_worker: 2
|
|
podman_cpus: "2"
|
|
podman_memory: "4Gi"
|
|
|
|
metrics:
|
|
enabled: false
|
|
|
|
queue:
|
|
type: "native"
|
|
native:
|
|
data_dir: "data/dev/queue"
|
|
|
|
task_lease_duration: "30m"
|
|
heartbeat_interval: "1m"
|
|
max_retries: 3
|
|
graceful_timeout: "5m"
|