- Fix YAML tags in auth config struct (json -> yaml) - Update CLI configs to use pre-hashed API keys - Remove double hashing in WebSocket client - Fix port mapping (9102 -> 9103) in CLI commands - Update permission keys to use jobs:read, jobs:create, etc. - Clean up all debug logging from CLI and server - All user roles now authenticate correctly: * Admin: Can queue jobs and see all jobs * Researcher: Can queue jobs and see own jobs * Analyst: Can see status (read-only access) Multi-user authentication is now fully functional.
127 lines
3.3 KiB
YAML
127 lines
3.3 KiB
YAML
# Homelab Docker Compose with Centralized Monitoring
|
|
# Includes: API, Redis, Prometheus, Grafana, Loki
|
|
|
|
services:
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: ml-experiments-redis
|
|
ports:
|
|
- "6379:6379"
|
|
volumes:
|
|
- redis_data:/data
|
|
restart: unless-stopped
|
|
command: redis-server --appendonly yes
|
|
healthcheck:
|
|
test: [ "CMD", "redis-cli", "ping" ]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
api-server:
|
|
build:
|
|
context: .
|
|
dockerfile: build/docker/simple.Dockerfile
|
|
container_name: ml-experiments-api
|
|
ports:
|
|
- "9101:9101"
|
|
- "9100:9100" # Prometheus metrics endpoint
|
|
volumes:
|
|
- ./data:/data/experiments
|
|
- ./logs:/logs
|
|
- ./configs/config-no-tls.yaml:/app/configs/config.yaml
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
environment:
|
|
- REDIS_URL=redis://redis:6379
|
|
- LOG_LEVEL=info
|
|
healthcheck:
|
|
test: [ "CMD", "curl", "http://localhost:9101/health" ]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
labels:
|
|
logging: "promtail"
|
|
job: "api-server"
|
|
|
|
# Prometheus - Metrics collection
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
container_name: ml-experiments-prometheus
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
|
|
- prometheus_data:/prometheus
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
- '--web.console.templates=/etc/prometheus/consoles'
|
|
- '--web.enable-lifecycle'
|
|
restart: unless-stopped
|
|
|
|
# Grafana - Visualization
|
|
grafana:
|
|
image: grafana/grafana:latest
|
|
container_name: ml-experiments-grafana
|
|
ports:
|
|
- "3000:3000"
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./monitoring/grafana/provisioning:/etc/grafana/provisioning
|
|
- ./monitoring/dashboards:/var/lib/grafana/dashboards
|
|
environment:
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_SERVER_ROOT_URL=http://localhost:3000
|
|
- GF_AUTH_ANONYMOUS_ENABLED=false
|
|
restart: unless-stopped
|
|
depends_on:
|
|
- prometheus
|
|
- loki
|
|
|
|
# Loki - Log aggregation
|
|
loki:
|
|
image: grafana/loki:latest
|
|
container_name: ml-experiments-loki
|
|
ports:
|
|
- "3100:3100"
|
|
volumes:
|
|
- ./monitoring/loki-config.yml:/etc/loki/local-config.yaml
|
|
- loki_data:/loki
|
|
command: -config.file=/etc/loki/local-config.yaml
|
|
restart: unless-stopped
|
|
|
|
# Promtail - Log collector
|
|
promtail:
|
|
image: grafana/promtail:latest
|
|
container_name: ml-experiments-promtail
|
|
volumes:
|
|
- ./monitoring/promtail-config.yml:/etc/promtail/config.yml
|
|
- ./logs:/var/log/app
|
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
command: -config.file=/etc/promtail/config.yml
|
|
restart: unless-stopped
|
|
depends_on:
|
|
- loki
|
|
|
|
volumes:
|
|
redis_data:
|
|
driver: local
|
|
prometheus_data:
|
|
driver: local
|
|
grafana_data:
|
|
driver: local
|
|
loki_data:
|
|
driver: local
|
|
|
|
networks:
|
|
default:
|
|
name: ml-experiments-network
|
|
backend:
|
|
name: ml-backend-network
|
|
internal: true # No external access
|