From 98a0d42213c40de1b7afca1c2adf025b95fdb378 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Wed, 4 Mar 2026 13:22:17 -0500 Subject: [PATCH] deploy: consolidate docker-compose files using profiles - Merge logs-debug.yml into test.yml with 'debug' profile - Merge local.yml into dev.yml with 'local' profile - Merge prod.smoke.yml into prod.yml with 'smoke' profile - Reduces compose files from 8 to 5, simplifies maintenance - Update TEST_COMPOSE to use deployments/docker-compose.test.yml --- deployments/docker-compose.dev.yml | 72 +++++++++++++++++++++++++--- deployments/docker-compose.prod.yml | 44 ++++++++++++++++- deployments/docker-compose.test.yml | 74 +++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+), 9 deletions(-) create mode 100644 deployments/docker-compose.test.yml diff --git a/deployments/docker-compose.dev.yml b/deployments/docker-compose.dev.yml index d8095da..f23eb28 100644 --- a/deployments/docker-compose.dev.yml +++ b/deployments/docker-compose.dev.yml @@ -10,7 +10,7 @@ services: - "8080:80" - "8443:443" volumes: - - ./deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro + - ./Caddyfile.dev:/etc/caddy/Caddyfile:ro - ${DATA_DIR:-./data/smoke}/caddy/data:/data - ${DATA_DIR:-./data/smoke}/caddy/config:/config depends_on: @@ -33,7 +33,7 @@ services: retries: 3 api-server: build: - context: . + context: .. dockerfile: build/docker/simple.Dockerfile container_name: ml-experiments-api user: "0:0" @@ -46,7 +46,7 @@ services: - ${DATA_DIR:-./data/smoke}/experiments:/data/experiments - ${DATA_DIR:-./data/smoke}/active:/data/active - ${DATA_DIR:-./data/smoke}/workspaces:/data/active/workspaces:delegated - - ${DATA_DIR:-./data/smoke}/configs:/app/configs:ro + - ${CONFIG_DIR:-../configs}/api/dev.yaml:/app/configs/api/dev.yaml:ro - ${DATA_DIR:-./data/smoke}/ssl:/app/ssl:ro depends_on: - redis @@ -100,18 +100,19 @@ services: restart: "no" worker: build: - context: . + context: .. dockerfile: build/docker/simple.Dockerfile - container_name: ml-experiments-worker + # Remove fixed container name to allow scaling + # container_name: ml-experiments-worker user: "0:0" ports: - - "8888:8888" + - "8888-8891:8888" # Port range for multiple workers volumes: - ${DATA_DIR:-./data/smoke}/logs:/logs - ${DATA_DIR:-./data/smoke}/active:/data/active - ${DATA_DIR:-./data/smoke}/experiments:/data/experiments - ${DATA_DIR:-./data/smoke}/workspaces:/data/active/workspaces:delegated - - ${DATA_DIR:-./data/smoke}/configs/worker/docker-dev.yaml:/app/configs/worker.yaml:ro + - ${CONFIG_DIR:-../configs}/worker/docker-dev.yaml:/app/configs/worker.yaml:ro - ${DATA_DIR:-./data/smoke}/ssl:/app/ssl:ro - /sys/fs/cgroup:/sys/fs/cgroup:rw depends_on: @@ -130,9 +131,66 @@ services: - FETCHML_JUPYTER_CONDA_ENV=base - FETCHML_JUPYTER_KERNEL_NAME=python - FETCHML_PODMAN_CGROUPS=disabled + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:8888/health"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s # Native libs enabled via build tag: -tags native_libs privileged: true command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"] + deploy: + replicas: 2 # Scale to 2 workers for dev (adjust as needed) + + # Local profile: use pre-built images instead of building from source + api-server-local: + image: fetchml-api:latest + profiles: ["local"] + ports: + - "9101:9101" + volumes: + - ${LOCAL_DATA_DIR:-./data/dev}/logs:/logs + - ${LOCAL_DATA_DIR:-./data/dev}/experiments:/data/experiments + - ${LOCAL_DATA_DIR:-./data/dev}/active:/data/active + - ${LOCAL_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated + - ${CONFIG_DIR:-../configs}/api/dev.yaml:/app/configs/api/dev.yaml:ro + environment: + - LOG_LEVEL=info + depends_on: + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9101/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + worker-local: + image: fetchml-worker:latest + profiles: ["local"] + privileged: true + ports: + - "8888:8888" + volumes: + - ${LOCAL_DATA_DIR:-./data/dev}/logs:/logs + - ${LOCAL_DATA_DIR:-./data/dev}/active:/data/active + - ${LOCAL_DATA_DIR:-./data/dev}/experiments:/data/experiments + - ${LOCAL_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated + - ${LOCAL_DATA_DIR:-./data/dev}/snapshots:/data/snapshots + - ${CONFIG_DIR:-../configs}/worker/docker-dev.yaml:/app/configs/worker.yaml:ro + - /sys/fs/cgroup:/sys/fs/cgroup:rw + environment: + - LOG_LEVEL=info + - MINIO_ROOT_USER=minioadmin + - MINIO_ROOT_PASSWORD=minioadmin123 + depends_on: + redis: + condition: service_healthy + api-server-local: + condition: service_healthy + volumes: redis_data: driver: local diff --git a/deployments/docker-compose.prod.yml b/deployments/docker-compose.prod.yml index f8e2212..352079d 100644 --- a/deployments/docker-compose.prod.yml +++ b/deployments/docker-compose.prod.yml @@ -28,7 +28,7 @@ services: - ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments - ${PROD_DATA_DIR:-./data/prod}/active:/data/active - ${PROD_DATA_DIR:-./data/prod}/logs:/logs - - ${PROD_DATA_DIR:-./data/prod}/configs/api/multi-user.yaml:/app/configs/api/prod.yaml:ro + - ${CONFIG_DIR:-../configs}/api/multi-user.yaml:/app/configs/api/prod.yaml:ro depends_on: redis: condition: service_healthy @@ -62,7 +62,7 @@ services: - ${PROD_DATA_DIR:-./data/prod}/experiments:/app/data/experiments - ${PROD_DATA_DIR:-./data/prod}/active:/data/active - ${PROD_DATA_DIR:-./data/prod}/logs:/logs - - ${PROD_DATA_DIR:-./data/prod}/configs/worker/docker-prod.yaml:/app/configs/worker.yaml:ro + - ${CONFIG_DIR:-../configs}/worker/docker-prod.yaml:/app/configs/worker.yaml:ro depends_on: redis: condition: service_healthy @@ -78,6 +78,43 @@ services: privileged: true command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"] + # Smoke test profile services + caddy: + image: caddy:2-alpine + container_name: ml-smoke-caddy + profiles: ["smoke"] + environment: + - FETCHML_DOMAIN=localhost + ports: + - "8080:80" + volumes: + - ./Caddyfile.prod.smoke:/etc/caddy/Caddyfile:ro + - ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/data:/data + - ${SMOKE_TEST_DATA_DIR:-./data/prod-smoke}/caddy/config:/config + depends_on: + - api-server + + ssh-test-server: + image: linuxserver/openssh-server:latest + container_name: ml-ssh-test + profiles: ["smoke"] + environment: + - PUID=1000 + - PGID=1000 + - TZ=America/New_York + - PUBLIC_KEY_FILE=/tmp/test_key.pub + - USER_NAME=test + - PASSWORD_ACCESS=false + volumes: + - ./deployments/test_keys:/tmp:ro + - ./bin/tui:/usr/local/bin/tui:ro + - ./deployments/tui-test-config.toml:/config/.ml/config.toml:ro + ports: + - "2222:2222" + depends_on: + - caddy + - api-server + volumes: {} networks: @@ -86,3 +123,6 @@ networks: traefik: external: true name: ${TRAEFIK_NETWORK:-traefik} + smoke-network: + name: ml-prod-smoke-network + profiles: ["smoke"] diff --git a/deployments/docker-compose.test.yml b/deployments/docker-compose.test.yml new file mode 100644 index 0000000..c3ad169 --- /dev/null +++ b/deployments/docker-compose.test.yml @@ -0,0 +1,74 @@ +--- +# Docker Compose for test infrastructure (Redis + SSH test server) +# Used by make test-e2e to provide consistent test environment + +services: + redis: + image: redis:7-alpine + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 1s + timeout: 3s + retries: 10 + start_period: 2s + + ssh-test-server: + image: linuxserver/openssh-server:latest + ports: + - "2222:2222" + environment: + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + - USER_NAME=testuser + - USER_PASSWORD=testpass + - SUDO_ACCESS=true + - PASSWORD_ACCESS=true + volumes: + - ssh-config:/config + healthcheck: + test: ["CMD-SHELL", "nc -z localhost 2222 || exit 1"] + interval: 2s + timeout: 3s + retries: 15 + start_period: 5s + + # Debug profile: API server for logs/debug e2e tests + api-server: + image: golang:1.25-bookworm + profiles: ["debug"] + working_dir: /app + command: > + sh -c " + go build -o api-server ./cmd/api-server/main.go && + ./api-server --config /app/configs/api/dev.yaml + " + ports: + - "9102:9101" + environment: + - LOG_LEVEL=debug + - REDIS_ADDR=redis:6379 + volumes: + - ../../:/app + - api-logs:/logs + - api-experiments:/data/experiments + - api-active:/data/active + - go-mod-cache:/go/pkg/mod + depends_on: + redis: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "-q", "--spider", "http://localhost:9101/health"] + interval: 5s + timeout: 3s + retries: 10 + start_period: 30s + +volumes: + ssh-config: + api-logs: + api-experiments: + api-active: + go-mod-cache: