fetch_ml/deployments/docker-compose.prod.yml
Jeremie Fraeys 98a0d42213
deploy: consolidate docker-compose files using profiles
- Merge logs-debug.yml into test.yml with 'debug' profile
- Merge local.yml into dev.yml with 'local' profile
- Merge prod.smoke.yml into prod.yml with 'smoke' profile
- Reduces compose files from 8 to 5, simplifies maintenance
- Update TEST_COMPOSE to use deployments/docker-compose.test.yml
2026-03-04 13:22:17 -05:00

128 lines
3.8 KiB
YAML

# Full Production Docker Environment with Podman and SQLite
services:
redis:
image: redis:7-alpine
container_name: ml-prod-redis
user: "999:999"
expose:
- "6379"
volumes:
- ${PROD_DATA_DIR:-./data/prod}/redis:/data
restart: unless-stopped
command: redis-server --appendonly yes
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
interval: 30s
timeout: 10s
retries: 3
api-server:
build:
context: .
dockerfile: ./build/docker/secure-prod.Dockerfile
container_name: ml-prod-api
expose:
- "9101"
- "2222"
volumes:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${CONFIG_DIR:-../configs}/api/multi-user.yaml:/app/configs/api/prod.yaml:ro
depends_on:
redis:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
command: ["/bin/sh", "-c", "mkdir -p /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/prod.yaml"]
labels:
- "traefik.enable=true"
- "traefik.docker.network=${TRAEFIK_NETWORK:-traefik}"
- "traefik.http.services.fetchml.loadbalancer.server.port=9101"
- "traefik.http.routers.fetchml.rule=Host(`${FETCHML_DOMAIN}`) && (PathPrefix(`/api`) || PathPrefix(`/ws`) || Path(`/health`))"
- "traefik.http.routers.fetchml.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
- "traefik.http.routers.fetchml.tls=true"
networks:
- default
- traefik
worker:
build:
context: .
dockerfile: ./build/docker/simple.Dockerfile
container_name: ml-prod-worker
volumes:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${CONFIG_DIR:-../configs}/worker/docker-prod.yaml:/app/configs/worker.yaml:ro
depends_on:
redis:
condition: service_healthy
api-server:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}
- AWS_REGION=${AWS_REGION}
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
# Smoke test profile services
caddy:
image: caddy:2-alpine
container_name: ml-smoke-caddy
profiles: ["smoke"]
environment:
- FETCHML_DOMAIN=localhost
ports:
- "8080:80"
volumes:
- ./Caddyfile.prod.smoke:/etc/caddy/Caddyfile:ro
- ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/data:/data
- ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/config:/config
depends_on:
- api-server
ssh-test-server:
image: linuxserver/openssh-server:latest
container_name: ml-ssh-test
profiles: ["smoke"]
environment:
- PUID=1000
- PGID=1000
- TZ=America/New_York
- PUBLIC_KEY_FILE=/tmp/test_key.pub
- USER_NAME=test
- PASSWORD_ACCESS=false
volumes:
- ./deployments/test_keys:/tmp:ro
- ./bin/tui:/usr/local/bin/tui:ro
- ./deployments/tui-test-config.toml:/config/.ml/config.toml:ro
ports:
- "2222:2222"
depends_on:
- caddy
- api-server
volumes: {}
networks:
default:
name: ml-prod-network
traefik:
external: true
name: ${TRAEFIK_NETWORK:-traefik}
smoke-network:
name: ml-prod-smoke-network
profiles: ["smoke"]