fetch_ml/deployments/docker-compose.prod.yml
Jeremie Fraeys c459285cab
chore(deploy): update deployment configs and TUI for scheduler
Update deployment and CLI tooling:
- TUI models (jobs, state) with scheduler data
- TUI store with scheduler endpoints
- TUI config with scheduler settings
- Deployment Makefile with scheduler targets
- Deploy script with scheduler registration
- Docker Compose files with scheduler services
- Remove obsolete Dockerfiles (api-server, full-prod, test)
- Update remaining Dockerfiles with scheduler integration
2026-02-26 12:08:31 -05:00

88 lines
2.8 KiB
YAML

# Full Production Docker Environment with Podman and SQLite
services:
redis:
image: redis:7-alpine
container_name: ml-prod-redis
user: "999:999"
expose:
- "6379"
volumes:
- ${PROD_DATA_DIR:-./data/prod}/redis:/data
restart: unless-stopped
command: redis-server --appendonly yes
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
interval: 30s
timeout: 10s
retries: 3
api-server:
build:
context: .
dockerfile: ./build/docker/secure-prod.Dockerfile
container_name: ml-prod-api
expose:
- "9101"
- "2222"
volumes:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${PROD_DATA_DIR:-./data/prod}/configs/api/multi-user.yaml:/app/configs/api/prod.yaml:ro
depends_on:
redis:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
command: ["/bin/sh", "-c", "mkdir -p /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/prod.yaml"]
labels:
- "traefik.enable=true"
- "traefik.docker.network=${TRAEFIK_NETWORK:-traefik}"
- "traefik.http.services.fetchml.loadbalancer.server.port=9101"
- "traefik.http.routers.fetchml.rule=Host(`${FETCHML_DOMAIN}`) && (PathPrefix(`/api`) || PathPrefix(`/ws`) || Path(`/health`))"
- "traefik.http.routers.fetchml.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.fetchml.tls=true"
networks:
- default
- traefik
worker:
build:
context: .
dockerfile: ./build/docker/simple.Dockerfile
container_name: ml-prod-worker
volumes:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${PROD_DATA_DIR:-./data/prod}/configs/worker/docker-prod.yaml:/app/configs/worker.yaml:ro
depends_on:
redis:
condition: service_healthy
api-server:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}
- AWS_REGION=${AWS_REGION}
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
volumes: {}
networks:
default:
name: ml-prod-network
traefik:
external: true
name: ${TRAEFIK_NETWORK:-traefik}