chore(deploy): update Docker compose files and add MinIO lifecycle policies

Docker Compose updates:
- docker-compose.dev.yml: add GPU support, local scheduler and worker
- docker-compose.staging.yml: production-like staging with SSL termination
- docker-compose.test.yml: ephemeral test environment with seeded data

MinIO lifecycle management:
- Add lifecycle-dev.json: 7-day retention for dev artifacts
- Add lifecycle-staging.json: 30-day retention with transition to cold

Build improvements:
- Makefile: add native library build targets and cross-platform support
- scripts/release/cleanup.sh: improved artifact cleanup with dry-run mode
This commit is contained in:
Jeremie Fraeys 2026-03-12 12:06:16 -04:00
parent 17170667e2
commit 93d6d63d8d
No known key found for this signature in database
7 changed files with 210 additions and 10 deletions

View file

@ -288,6 +288,47 @@ clean-release: clean
@./scripts/release/cleanup.sh state 2>/dev/null || true
@echo "$(OK) Release cleanup complete"
# Clean ephemeral test data (safe, no confirmation needed)
clean-ephemeral:
rm -rf data/.ephemeral/*
docker volume prune -f --filter "label=fetchml.data.lifecycle=ephemeral" 2>/dev/null || true
@echo "$(OK) Ephemeral data cleaned"
# Clean dev environment completely
clean-dev:
$(DC) -f deployments/docker-compose.dev.yml down -v --remove-orphans 2>/dev/null || true
rm -rf data/smoke/ data/.ephemeral/
@echo "$(OK) Dev environment cleaned"
# Clean staging environment
clean-staging: staging-down
rm -rf data/staging/
$(DC) -f deployments/docker-compose.staging.yml down -v 2>/dev/null || true
@echo "$(OK) Staging environment cleaned"
# Clean all data (destructive - requires explicit CONFIRM=yes)
clean-data:
ifndef CONFIRM
$(error Run with CONFIRM=yes to proceed: make clean-data CONFIRM=yes)
endif
rm -rf data/
docker volume prune -f
@echo "$(OK) All data cleaned"
# Check lifecycle labels on containers and volumes
check-labels:
@echo "=== Checking container lifecycle labels ==="
@docker ps --format '{{.Names}}' 2>/dev/null | xargs -I{} docker inspect {} \
--format '{{.Name}}: {{index .Config.Labels "fetchml.data.lifecycle"}}' 2>/dev/null || true
@echo "=== Checking volume labels ==="
@docker volume ls --format '{{.Name}}' 2>/dev/null | xargs -I{} docker volume inspect {} \
--format '{{.Name}}: {{index .Labels "fetchml.data.lifecycle"}}' 2>/dev/null || true
# Check MinIO lifecycle policies
check-minio-lifecycle:
@docker exec ml-dev-minio mc ilm list local/fetchml-snapshots 2>/dev/null || \
echo "MinIO not running or bucket doesn't exist"
prepare-release: clean-release
@./scripts/release/verify.sh
@echo "$(OK) Release preparation complete"

View file

@ -6,6 +6,7 @@ services:
image: caddy:2-alpine
container_name: ml-dev-caddy
restart: unless-stopped
pull_policy: always
ports:
- "8080:80"
- "8443:443"
@ -16,10 +17,22 @@ services:
depends_on:
api-server:
condition: service_healthy
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=proxy"
deploy:
resources:
limits:
cpus: '0.5'
memory: 128M
reservations:
cpus: '0.25'
memory: 64M
redis:
image: redis:7-alpine
container_name: ml-experiments-redis
user: "999:999"
pull_policy: always
ports:
- "6379:6379"
volumes:
@ -31,6 +44,17 @@ services:
interval: 30s
timeout: 10s
retries: 3
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=cache"
deploy:
resources:
limits:
cpus: '0.5'
memory: 512M
reservations:
cpus: '0.25'
memory: 256M
api-server:
build:
context: ..
@ -62,11 +86,21 @@ services:
retries: 3
start_period: 40s
labels:
job: "api-server"
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=api"
deploy:
resources:
limits:
cpus: '1.0'
memory: 512M
reservations:
cpus: '0.5'
memory: 256M
# MinIO for local development (single-node filesystem backend)
minio:
image: minio/minio:latest
container_name: ml-dev-minio
pull_policy: always
ports:
- "9000:9000"
- "9001:9001"
@ -83,19 +117,28 @@ services:
timeout: 5s
retries: 5
restart: unless-stopped
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.data.max-age=168h"
- "fetchml.component=object-store"
# Initialize minio bucket (runs once)
minio-init:
image: minio/mc:latest
container_name: ml-dev-minio-init
pull_policy: always
depends_on:
minio:
condition: service_healthy
volumes:
- ./minio/lifecycle-dev.json:/tmp/lifecycle.json:ro
entrypoint: ["/bin/sh", "-c"]
command:
- |
mc alias set local http://minio:9000 minioadmin minioadmin123 || exit 1
mc mb -p local/fetchml-snapshots 2>/dev/null || echo "Bucket exists"
# Apply lifecycle policy for automatic cleanup
mc ilm import local/fetchml-snapshots /tmp/lifecycle.json 2>/dev/null || echo "Lifecycle policy may already exist"
echo "MinIO initialized"
restart: "no"
worker:
@ -140,8 +183,18 @@ services:
# Native libs enabled via build tag: -tags native_libs
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=worker"
deploy:
replicas: 2 # Scale to 2 workers for dev (adjust as needed)
replicas: 2
resources:
limits:
cpus: '2.0'
memory: 2G
reservations:
cpus: '1.0'
memory: 1G
# Local profile: use pre-built images instead of building from source
api-server-local:
@ -166,6 +219,9 @@ services:
timeout: 10s
retries: 3
start_period: 40s
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=api"
worker-local:
image: fetchml-worker:latest
@ -185,6 +241,9 @@ services:
- LOG_LEVEL=info
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=worker"
depends_on:
redis:
condition: service_healthy

View file

@ -1,5 +1,3 @@
version: '3.8'
# Staging environment Docker Compose
# This environment is for pre-production validation
# Data is persisted but isolated from production
@ -8,6 +6,7 @@ services:
caddy:
image: caddy:2-alpine
container_name: ml-staging-caddy
pull_policy: always
ports:
- "9080:80"
- "9443:443"
@ -22,6 +21,7 @@ services:
redis:
image: redis:7-alpine
container_name: ml-staging-redis
pull_policy: always
ports:
- "6380:6379"
volumes:
@ -60,6 +60,7 @@ services:
minio:
image: minio/minio:latest
container_name: ml-staging-minio
pull_policy: always
ports:
- "9002:9000"
- "9003:9001"
@ -76,18 +77,27 @@ services:
timeout: 5s
retries: 5
restart: unless-stopped
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.data.max-age=168h"
- "fetchml.component=object-store"
minio-init:
image: minio/mc:latest
container_name: ml-staging-minio-init
pull_policy: always
depends_on:
minio:
condition: service_healthy
volumes:
- ./minio/lifecycle-staging.json:/tmp/lifecycle.json:ro
entrypoint: ["/bin/sh", "-c"]
command:
- |
mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123} || exit 1
mc mb -p local/fetchml-snapshots-staging 2>/dev/null || echo "Bucket exists"
# Apply lifecycle policy for automatic cleanup
mc ilm import local/fetchml-snapshots-staging /tmp/lifecycle.json 2>/dev/null || echo "Lifecycle policy may already exist"
echo "MinIO initialized for staging"
restart: "no"
@ -96,6 +106,7 @@ services:
context: ../
dockerfile: build/docker/simple.Dockerfile
container_name: ml-staging-worker
pull_policy: always
volumes:
- ${DATA_DIR:-./data/staging}/logs:/logs
- ${DATA_DIR:-./data/staging}/experiments:/data/experiments
@ -116,6 +127,9 @@ services:
- MINIO_ENDPOINT=minio:9000
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
labels:
- "fetchml.data.lifecycle=persistent"
- "fetchml.component=worker"
# Audit log sink for staging (write-once store)
audit-sink:

View file

@ -52,10 +52,26 @@ services:
- REDIS_ADDR=redis:6379
volumes:
- ../../:/app
- api-logs:/logs
- api-experiments:/data/experiments
- api-active:/data/active
# Use tmpfs for test data - auto-cleanup on container stop
- type: tmpfs
target: /logs
tmpfs:
size: 100M
mode: 1777
- type: tmpfs
target: /data/experiments
tmpfs:
size: 500M
mode: 1777
- type: tmpfs
target: /data/active
tmpfs:
size: 200M
mode: 1777
- go-mod-cache:/go/pkg/mod
labels:
- "fetchml.data.lifecycle=ephemeral"
- "fetchml.component=test-api"
depends_on:
redis:
condition: service_healthy
@ -68,7 +84,4 @@ services:
volumes:
ssh-config:
api-logs:
api-experiments:
api-active:
go-mod-cache:

View file

@ -0,0 +1,31 @@
{
"Rules": [
{
"ID": "delete-test-snapshots",
"Status": "Enabled",
"Filter": {
"Prefix": "test-"
},
"Expiration": {
"Days": 1
}
},
{
"ID": "delete-temp-uploads",
"Status": "Enabled",
"Filter": {
"Prefix": "temp/"
},
"Expiration": {
"Days": 1
}
},
{
"ID": "delete-incomplete-multipart",
"Status": "Enabled",
"AbortIncompleteMultipartUpload": {
"DaysAfterInitiation": 1
}
}
]
}

View file

@ -0,0 +1,41 @@
{
"Rules": [
{
"ID": "delete-staging-test-snapshots",
"Status": "Enabled",
"Filter": {
"Prefix": "test-"
},
"Expiration": {
"Days": 1
}
},
{
"ID": "delete-staging-temp-uploads",
"Status": "Enabled",
"Filter": {
"Prefix": "temp/"
},
"Expiration": {
"Days": 1
}
},
{
"ID": "transition-staging-snapshots",
"Status": "Enabled",
"Filter": {
"Prefix": "staging-"
},
"Expiration": {
"Days": 7
}
},
{
"ID": "delete-incomplete-multipart",
"Status": "Enabled",
"AbortIncompleteMultipartUpload": {
"DaysAfterInitiation": 1
}
}
]
}

View file

@ -39,6 +39,7 @@ cleanup_docker() {
# Stop all project-related containers
$compose_cmd -f deployments/docker-compose.dev.yml down --volumes --remove-orphans 2>/dev/null || true
$compose_cmd -f deployments/docker-compose.staging.yml down --volumes --remove-orphans 2>/dev/null || true
$compose_cmd -f deployments/docker-compose.test.yml --profile debug down --volumes --remove-orphans 2>/dev/null || true
$compose_cmd -f deployments/docker-compose.prod.yml --profile smoke down --volumes --remove-orphans 2>/dev/null || true