fetch_ml/configs/worker-prod.toml
Jeremie Fraeys ea15af1833 Fix multi-user authentication and clean up debug code
- Fix YAML tags in auth config struct (json -> yaml)
- Update CLI configs to use pre-hashed API keys
- Remove double hashing in WebSocket client
- Fix port mapping (9102 -> 9103) in CLI commands
- Update permission keys to use jobs:read, jobs:create, etc.
- Clean up all debug logging from CLI and server
- All user roles now authenticate correctly:
  * Admin: Can queue jobs and see all jobs
  * Researcher: Can queue jobs and see own jobs
  * Analyst: Can see status (read-only access)

Multi-user authentication is now fully functional.
2025-12-06 12:35:32 -05:00

45 lines
955 B
TOML

worker_id = "worker-prod-01"
base_path = "/data/ml-experiments"
max_workers = 4
# Redis connection
redis_addr = "localhost:6379"
redis_password = "JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k="
redis_db = 0
# SSH connection (for remote operations)
host = "localhost"
user = "ml-user"
port = 22
ssh_key = "~/.ssh/id_rsa"
# Podman configuration
podman_image = "ml-training:latest"
gpu_access = true
container_workspace = "/workspace"
container_results = "/results"
train_script = "train.py"
[resources]
max_workers = 4
desired_rps_per_worker = 2
podman_cpus = "4"
podman_memory = "16g"
# Dataset management
auto_fetch_data = true
data_dir = "/data/datasets"
data_manager_path = "/usr/local/bin/data_manager"
dataset_cache_ttl = "24h"
# Task management
task_lease_duration = "1h"
heartbeat_interval = "30s"
graceful_timeout = "5m"
poll_interval = "100ms"
metrics_flush_interval = "10s"
# Metrics exporter
[metrics]
enabled = true
listen_addr = ":9090"