# Prometheus configuration for ML experiments monitoring global: scrape_interval: 15s evaluation_interval: 15s scrape_configs: # API Server metrics and health - job_name: 'api-server' scheme: http static_configs: - targets: ['api-server:9101'] labels: service: 'api-server' metrics_path: /metrics # Future: Prometheus metrics endpoint # Benchmark metrics from Pushgateway - job_name: 'benchmark' static_configs: [] # Worker metrics (ResourceManager + task execution) # For docker-compose dev on macOS/Windows, Prometheus can reach a locally running worker # via host.docker.internal. - job_name: 'worker' scrape_interval: 15s static_configs: - targets: ['worker:9100'] labels: service: 'worker' target_type: 'container' metrics_path: /metrics # Loki metrics - job_name: 'loki' static_configs: - targets: ['loki:3100'] labels: service: 'loki' metrics_path: /metrics # Prometheus self-monitoring - job_name: 'prometheus' static_configs: - targets: ['localhost:9090']