deploy: consolidate docker-compose files using profiles

- Merge logs-debug.yml into test.yml with 'debug' profile
- Merge local.yml into dev.yml with 'local' profile
- Merge prod.smoke.yml into prod.yml with 'smoke' profile
- Reduces compose files from 8 to 5, simplifies maintenance
- Update TEST_COMPOSE to use deployments/docker-compose.test.yml
This commit is contained in:
Jeremie Fraeys 2026-03-04 13:22:17 -05:00
parent 16343e6c2a
commit 98a0d42213
No known key found for this signature in database
3 changed files with 181 additions and 9 deletions

View file

@ -10,7 +10,7 @@ services:
- "8080:80"
- "8443:443"
volumes:
- ./deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro
- ./Caddyfile.dev:/etc/caddy/Caddyfile:ro
- ${DATA_DIR:-./data/smoke}/caddy/data:/data
- ${DATA_DIR:-./data/smoke}/caddy/config:/config
depends_on:
@ -33,7 +33,7 @@ services:
retries: 3
api-server:
build:
context: .
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-api
user: "0:0"
@ -46,7 +46,7 @@ services:
- ${DATA_DIR:-./data/smoke}/experiments:/data/experiments
- ${DATA_DIR:-./data/smoke}/active:/data/active
- ${DATA_DIR:-./data/smoke}/workspaces:/data/active/workspaces:delegated
- ${DATA_DIR:-./data/smoke}/configs:/app/configs:ro
- ${CONFIG_DIR:-../configs}/api/dev.yaml:/app/configs/api/dev.yaml:ro
- ${DATA_DIR:-./data/smoke}/ssl:/app/ssl:ro
depends_on:
- redis
@ -100,18 +100,19 @@ services:
restart: "no"
worker:
build:
context: .
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-worker
# Remove fixed container name to allow scaling
# container_name: ml-experiments-worker
user: "0:0"
ports:
- "8888:8888"
- "8888-8891:8888" # Port range for multiple workers
volumes:
- ${DATA_DIR:-./data/smoke}/logs:/logs
- ${DATA_DIR:-./data/smoke}/active:/data/active
- ${DATA_DIR:-./data/smoke}/experiments:/data/experiments
- ${DATA_DIR:-./data/smoke}/workspaces:/data/active/workspaces:delegated
- ${DATA_DIR:-./data/smoke}/configs/worker/docker-dev.yaml:/app/configs/worker.yaml:ro
- ${CONFIG_DIR:-../configs}/worker/docker-dev.yaml:/app/configs/worker.yaml:ro
- ${DATA_DIR:-./data/smoke}/ssl:/app/ssl:ro
- /sys/fs/cgroup:/sys/fs/cgroup:rw
depends_on:
@ -130,9 +131,66 @@ services:
- FETCHML_JUPYTER_CONDA_ENV=base
- FETCHML_JUPYTER_KERNEL_NAME=python
- FETCHML_PODMAN_CGROUPS=disabled
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8888/health"]
interval: 10s
timeout: 5s
retries: 3
start_period: 30s
# Native libs enabled via build tag: -tags native_libs
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
deploy:
replicas: 2 # Scale to 2 workers for dev (adjust as needed)
# Local profile: use pre-built images instead of building from source
api-server-local:
image: fetchml-api:latest
profiles: ["local"]
ports:
- "9101:9101"
volumes:
- ${LOCAL_DATA_DIR:-./data/dev}/logs:/logs
- ${LOCAL_DATA_DIR:-./data/dev}/experiments:/data/experiments
- ${LOCAL_DATA_DIR:-./data/dev}/active:/data/active
- ${LOCAL_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated
- ${CONFIG_DIR:-../configs}/api/dev.yaml:/app/configs/api/dev.yaml:ro
environment:
- LOG_LEVEL=info
depends_on:
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
worker-local:
image: fetchml-worker:latest
profiles: ["local"]
privileged: true
ports:
- "8888:8888"
volumes:
- ${LOCAL_DATA_DIR:-./data/dev}/logs:/logs
- ${LOCAL_DATA_DIR:-./data/dev}/active:/data/active
- ${LOCAL_DATA_DIR:-./data/dev}/experiments:/data/experiments
- ${LOCAL_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated
- ${LOCAL_DATA_DIR:-./data/dev}/snapshots:/data/snapshots
- ${CONFIG_DIR:-../configs}/worker/docker-dev.yaml:/app/configs/worker.yaml:ro
- /sys/fs/cgroup:/sys/fs/cgroup:rw
environment:
- LOG_LEVEL=info
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
depends_on:
redis:
condition: service_healthy
api-server-local:
condition: service_healthy
volumes:
redis_data:
driver: local

View file

@ -28,7 +28,7 @@ services:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${PROD_DATA_DIR:-./data/prod}/configs/api/multi-user.yaml:/app/configs/api/prod.yaml:ro
- ${CONFIG_DIR:-../configs}/api/multi-user.yaml:/app/configs/api/prod.yaml:ro
depends_on:
redis:
condition: service_healthy
@ -62,7 +62,7 @@ services:
- ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments
- ${PROD_DATA_DIR:-./data/prod}/active:/data/active
- ${PROD_DATA_DIR:-./data/prod}/logs:/logs
- ${PROD_DATA_DIR:-./data/prod}/configs/worker/docker-prod.yaml:/app/configs/worker.yaml:ro
- ${CONFIG_DIR:-../configs}/worker/docker-prod.yaml:/app/configs/worker.yaml:ro
depends_on:
redis:
condition: service_healthy
@ -78,6 +78,43 @@ services:
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
# Smoke test profile services
caddy:
image: caddy:2-alpine
container_name: ml-smoke-caddy
profiles: ["smoke"]
environment:
- FETCHML_DOMAIN=localhost
ports:
- "8080:80"
volumes:
- ./Caddyfile.prod.smoke:/etc/caddy/Caddyfile:ro
- ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/data:/data
- ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/config:/config
depends_on:
- api-server
ssh-test-server:
image: linuxserver/openssh-server:latest
container_name: ml-ssh-test
profiles: ["smoke"]
environment:
- PUID=1000
- PGID=1000
- TZ=America/New_York
- PUBLIC_KEY_FILE=/tmp/test_key.pub
- USER_NAME=test
- PASSWORD_ACCESS=false
volumes:
- ./deployments/test_keys:/tmp:ro
- ./bin/tui:/usr/local/bin/tui:ro
- ./deployments/tui-test-config.toml:/config/.ml/config.toml:ro
ports:
- "2222:2222"
depends_on:
- caddy
- api-server
volumes: {}
networks:
@ -86,3 +123,6 @@ networks:
traefik:
external: true
name: ${TRAEFIK_NETWORK:-traefik}
smoke-network:
name: ml-prod-smoke-network
profiles: ["smoke"]

View file

@ -0,0 +1,74 @@
---
# Docker Compose for test infrastructure (Redis + SSH test server)
# Used by make test-e2e to provide consistent test environment
services:
redis:
image: redis:7-alpine
ports:
- "6379:6379"
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 1s
timeout: 3s
retries: 10
start_period: 2s
ssh-test-server:
image: linuxserver/openssh-server:latest
ports:
- "2222:2222"
environment:
- PUID=1000
- PGID=1000
- TZ=Etc/UTC
- USER_NAME=testuser
- USER_PASSWORD=testpass
- SUDO_ACCESS=true
- PASSWORD_ACCESS=true
volumes:
- ssh-config:/config
healthcheck:
test: ["CMD-SHELL", "nc -z localhost 2222 || exit 1"]
interval: 2s
timeout: 3s
retries: 15
start_period: 5s
# Debug profile: API server for logs/debug e2e tests
api-server:
image: golang:1.25-bookworm
profiles: ["debug"]
working_dir: /app
command: >
sh -c "
go build -o api-server ./cmd/api-server/main.go &&
./api-server --config /app/configs/api/dev.yaml
"
ports:
- "9102:9101"
environment:
- LOG_LEVEL=debug
- REDIS_ADDR=redis:6379
volumes:
- ../../:/app
- api-logs:/logs
- api-experiments:/data/experiments
- api-active:/data/active
- go-mod-cache:/go/pkg/mod
depends_on:
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9101/health"]
interval: 5s
timeout: 3s
retries: 10
start_period: 30s
volumes:
ssh-config:
api-logs:
api-experiments:
api-active:
go-mod-cache: