chore(deploy): update Docker compose files and add MinIO lifecycle policies
Docker Compose updates: - docker-compose.dev.yml: add GPU support, local scheduler and worker - docker-compose.staging.yml: production-like staging with SSL termination - docker-compose.test.yml: ephemeral test environment with seeded data MinIO lifecycle management: - Add lifecycle-dev.json: 7-day retention for dev artifacts - Add lifecycle-staging.json: 30-day retention with transition to cold Build improvements: - Makefile: add native library build targets and cross-platform support - scripts/release/cleanup.sh: improved artifact cleanup with dry-run mode
This commit is contained in:
parent
17170667e2
commit
93d6d63d8d
7 changed files with 210 additions and 10 deletions
41
Makefile
41
Makefile
|
|
@ -288,6 +288,47 @@ clean-release: clean
|
||||||
@./scripts/release/cleanup.sh state 2>/dev/null || true
|
@./scripts/release/cleanup.sh state 2>/dev/null || true
|
||||||
@echo "$(OK) Release cleanup complete"
|
@echo "$(OK) Release cleanup complete"
|
||||||
|
|
||||||
|
# Clean ephemeral test data (safe, no confirmation needed)
|
||||||
|
clean-ephemeral:
|
||||||
|
rm -rf data/.ephemeral/*
|
||||||
|
docker volume prune -f --filter "label=fetchml.data.lifecycle=ephemeral" 2>/dev/null || true
|
||||||
|
@echo "$(OK) Ephemeral data cleaned"
|
||||||
|
|
||||||
|
# Clean dev environment completely
|
||||||
|
clean-dev:
|
||||||
|
$(DC) -f deployments/docker-compose.dev.yml down -v --remove-orphans 2>/dev/null || true
|
||||||
|
rm -rf data/smoke/ data/.ephemeral/
|
||||||
|
@echo "$(OK) Dev environment cleaned"
|
||||||
|
|
||||||
|
# Clean staging environment
|
||||||
|
clean-staging: staging-down
|
||||||
|
rm -rf data/staging/
|
||||||
|
$(DC) -f deployments/docker-compose.staging.yml down -v 2>/dev/null || true
|
||||||
|
@echo "$(OK) Staging environment cleaned"
|
||||||
|
|
||||||
|
# Clean all data (destructive - requires explicit CONFIRM=yes)
|
||||||
|
clean-data:
|
||||||
|
ifndef CONFIRM
|
||||||
|
$(error Run with CONFIRM=yes to proceed: make clean-data CONFIRM=yes)
|
||||||
|
endif
|
||||||
|
rm -rf data/
|
||||||
|
docker volume prune -f
|
||||||
|
@echo "$(OK) All data cleaned"
|
||||||
|
|
||||||
|
# Check lifecycle labels on containers and volumes
|
||||||
|
check-labels:
|
||||||
|
@echo "=== Checking container lifecycle labels ==="
|
||||||
|
@docker ps --format '{{.Names}}' 2>/dev/null | xargs -I{} docker inspect {} \
|
||||||
|
--format '{{.Name}}: {{index .Config.Labels "fetchml.data.lifecycle"}}' 2>/dev/null || true
|
||||||
|
@echo "=== Checking volume labels ==="
|
||||||
|
@docker volume ls --format '{{.Name}}' 2>/dev/null | xargs -I{} docker volume inspect {} \
|
||||||
|
--format '{{.Name}}: {{index .Labels "fetchml.data.lifecycle"}}' 2>/dev/null || true
|
||||||
|
|
||||||
|
# Check MinIO lifecycle policies
|
||||||
|
check-minio-lifecycle:
|
||||||
|
@docker exec ml-dev-minio mc ilm list local/fetchml-snapshots 2>/dev/null || \
|
||||||
|
echo "MinIO not running or bucket doesn't exist"
|
||||||
|
|
||||||
prepare-release: clean-release
|
prepare-release: clean-release
|
||||||
@./scripts/release/verify.sh
|
@./scripts/release/verify.sh
|
||||||
@echo "$(OK) Release preparation complete"
|
@echo "$(OK) Release preparation complete"
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ services:
|
||||||
image: caddy:2-alpine
|
image: caddy:2-alpine
|
||||||
container_name: ml-dev-caddy
|
container_name: ml-dev-caddy
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "8080:80"
|
- "8080:80"
|
||||||
- "8443:443"
|
- "8443:443"
|
||||||
|
|
@ -16,10 +17,22 @@ services:
|
||||||
depends_on:
|
depends_on:
|
||||||
api-server:
|
api-server:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=proxy"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 128M
|
||||||
|
reservations:
|
||||||
|
cpus: '0.25'
|
||||||
|
memory: 64M
|
||||||
redis:
|
redis:
|
||||||
image: redis:7-alpine
|
image: redis:7-alpine
|
||||||
container_name: ml-experiments-redis
|
container_name: ml-experiments-redis
|
||||||
user: "999:999"
|
user: "999:999"
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "6379:6379"
|
- "6379:6379"
|
||||||
volumes:
|
volumes:
|
||||||
|
|
@ -31,6 +44,17 @@ services:
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=cache"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 512M
|
||||||
|
reservations:
|
||||||
|
cpus: '0.25'
|
||||||
|
memory: 256M
|
||||||
api-server:
|
api-server:
|
||||||
build:
|
build:
|
||||||
context: ..
|
context: ..
|
||||||
|
|
@ -62,11 +86,21 @@ services:
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 40s
|
start_period: 40s
|
||||||
labels:
|
labels:
|
||||||
job: "api-server"
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=api"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '1.0'
|
||||||
|
memory: 512M
|
||||||
|
reservations:
|
||||||
|
cpus: '0.5'
|
||||||
|
memory: 256M
|
||||||
# MinIO for local development (single-node filesystem backend)
|
# MinIO for local development (single-node filesystem backend)
|
||||||
minio:
|
minio:
|
||||||
image: minio/minio:latest
|
image: minio/minio:latest
|
||||||
container_name: ml-dev-minio
|
container_name: ml-dev-minio
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
- "9001:9001"
|
- "9001:9001"
|
||||||
|
|
@ -83,19 +117,28 @@ services:
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.data.max-age=168h"
|
||||||
|
- "fetchml.component=object-store"
|
||||||
|
|
||||||
# Initialize minio bucket (runs once)
|
# Initialize minio bucket (runs once)
|
||||||
minio-init:
|
minio-init:
|
||||||
image: minio/mc:latest
|
image: minio/mc:latest
|
||||||
container_name: ml-dev-minio-init
|
container_name: ml-dev-minio-init
|
||||||
|
pull_policy: always
|
||||||
depends_on:
|
depends_on:
|
||||||
minio:
|
minio:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./minio/lifecycle-dev.json:/tmp/lifecycle.json:ro
|
||||||
entrypoint: ["/bin/sh", "-c"]
|
entrypoint: ["/bin/sh", "-c"]
|
||||||
command:
|
command:
|
||||||
- |
|
- |
|
||||||
mc alias set local http://minio:9000 minioadmin minioadmin123 || exit 1
|
mc alias set local http://minio:9000 minioadmin minioadmin123 || exit 1
|
||||||
mc mb -p local/fetchml-snapshots 2>/dev/null || echo "Bucket exists"
|
mc mb -p local/fetchml-snapshots 2>/dev/null || echo "Bucket exists"
|
||||||
|
# Apply lifecycle policy for automatic cleanup
|
||||||
|
mc ilm import local/fetchml-snapshots /tmp/lifecycle.json 2>/dev/null || echo "Lifecycle policy may already exist"
|
||||||
echo "MinIO initialized"
|
echo "MinIO initialized"
|
||||||
restart: "no"
|
restart: "no"
|
||||||
worker:
|
worker:
|
||||||
|
|
@ -140,8 +183,18 @@ services:
|
||||||
# Native libs enabled via build tag: -tags native_libs
|
# Native libs enabled via build tag: -tags native_libs
|
||||||
privileged: true
|
privileged: true
|
||||||
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
|
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=worker"
|
||||||
deploy:
|
deploy:
|
||||||
replicas: 2 # Scale to 2 workers for dev (adjust as needed)
|
replicas: 2
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpus: '2.0'
|
||||||
|
memory: 2G
|
||||||
|
reservations:
|
||||||
|
cpus: '1.0'
|
||||||
|
memory: 1G
|
||||||
|
|
||||||
# Local profile: use pre-built images instead of building from source
|
# Local profile: use pre-built images instead of building from source
|
||||||
api-server-local:
|
api-server-local:
|
||||||
|
|
@ -166,6 +219,9 @@ services:
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 40s
|
start_period: 40s
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=api"
|
||||||
|
|
||||||
worker-local:
|
worker-local:
|
||||||
image: fetchml-worker:latest
|
image: fetchml-worker:latest
|
||||||
|
|
@ -185,6 +241,9 @@ services:
|
||||||
- LOG_LEVEL=info
|
- LOG_LEVEL=info
|
||||||
- MINIO_ROOT_USER=minioadmin
|
- MINIO_ROOT_USER=minioadmin
|
||||||
- MINIO_ROOT_PASSWORD=minioadmin123
|
- MINIO_ROOT_PASSWORD=minioadmin123
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=worker"
|
||||||
depends_on:
|
depends_on:
|
||||||
redis:
|
redis:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
version: '3.8'
|
|
||||||
|
|
||||||
# Staging environment Docker Compose
|
# Staging environment Docker Compose
|
||||||
# This environment is for pre-production validation
|
# This environment is for pre-production validation
|
||||||
# Data is persisted but isolated from production
|
# Data is persisted but isolated from production
|
||||||
|
|
@ -8,6 +6,7 @@ services:
|
||||||
caddy:
|
caddy:
|
||||||
image: caddy:2-alpine
|
image: caddy:2-alpine
|
||||||
container_name: ml-staging-caddy
|
container_name: ml-staging-caddy
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "9080:80"
|
- "9080:80"
|
||||||
- "9443:443"
|
- "9443:443"
|
||||||
|
|
@ -22,6 +21,7 @@ services:
|
||||||
redis:
|
redis:
|
||||||
image: redis:7-alpine
|
image: redis:7-alpine
|
||||||
container_name: ml-staging-redis
|
container_name: ml-staging-redis
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "6380:6379"
|
- "6380:6379"
|
||||||
volumes:
|
volumes:
|
||||||
|
|
@ -60,6 +60,7 @@ services:
|
||||||
minio:
|
minio:
|
||||||
image: minio/minio:latest
|
image: minio/minio:latest
|
||||||
container_name: ml-staging-minio
|
container_name: ml-staging-minio
|
||||||
|
pull_policy: always
|
||||||
ports:
|
ports:
|
||||||
- "9002:9000"
|
- "9002:9000"
|
||||||
- "9003:9001"
|
- "9003:9001"
|
||||||
|
|
@ -76,18 +77,27 @@ services:
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.data.max-age=168h"
|
||||||
|
- "fetchml.component=object-store"
|
||||||
|
|
||||||
minio-init:
|
minio-init:
|
||||||
image: minio/mc:latest
|
image: minio/mc:latest
|
||||||
container_name: ml-staging-minio-init
|
container_name: ml-staging-minio-init
|
||||||
|
pull_policy: always
|
||||||
depends_on:
|
depends_on:
|
||||||
minio:
|
minio:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./minio/lifecycle-staging.json:/tmp/lifecycle.json:ro
|
||||||
entrypoint: ["/bin/sh", "-c"]
|
entrypoint: ["/bin/sh", "-c"]
|
||||||
command:
|
command:
|
||||||
- |
|
- |
|
||||||
mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123} || exit 1
|
mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123} || exit 1
|
||||||
mc mb -p local/fetchml-snapshots-staging 2>/dev/null || echo "Bucket exists"
|
mc mb -p local/fetchml-snapshots-staging 2>/dev/null || echo "Bucket exists"
|
||||||
|
# Apply lifecycle policy for automatic cleanup
|
||||||
|
mc ilm import local/fetchml-snapshots-staging /tmp/lifecycle.json 2>/dev/null || echo "Lifecycle policy may already exist"
|
||||||
echo "MinIO initialized for staging"
|
echo "MinIO initialized for staging"
|
||||||
restart: "no"
|
restart: "no"
|
||||||
|
|
||||||
|
|
@ -96,6 +106,7 @@ services:
|
||||||
context: ../
|
context: ../
|
||||||
dockerfile: build/docker/simple.Dockerfile
|
dockerfile: build/docker/simple.Dockerfile
|
||||||
container_name: ml-staging-worker
|
container_name: ml-staging-worker
|
||||||
|
pull_policy: always
|
||||||
volumes:
|
volumes:
|
||||||
- ${DATA_DIR:-./data/staging}/logs:/logs
|
- ${DATA_DIR:-./data/staging}/logs:/logs
|
||||||
- ${DATA_DIR:-./data/staging}/experiments:/data/experiments
|
- ${DATA_DIR:-./data/staging}/experiments:/data/experiments
|
||||||
|
|
@ -116,6 +127,9 @@ services:
|
||||||
- MINIO_ENDPOINT=minio:9000
|
- MINIO_ENDPOINT=minio:9000
|
||||||
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
|
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
|
||||||
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
|
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=persistent"
|
||||||
|
- "fetchml.component=worker"
|
||||||
|
|
||||||
# Audit log sink for staging (write-once store)
|
# Audit log sink for staging (write-once store)
|
||||||
audit-sink:
|
audit-sink:
|
||||||
|
|
|
||||||
|
|
@ -52,10 +52,26 @@ services:
|
||||||
- REDIS_ADDR=redis:6379
|
- REDIS_ADDR=redis:6379
|
||||||
volumes:
|
volumes:
|
||||||
- ../../:/app
|
- ../../:/app
|
||||||
- api-logs:/logs
|
# Use tmpfs for test data - auto-cleanup on container stop
|
||||||
- api-experiments:/data/experiments
|
- type: tmpfs
|
||||||
- api-active:/data/active
|
target: /logs
|
||||||
|
tmpfs:
|
||||||
|
size: 100M
|
||||||
|
mode: 1777
|
||||||
|
- type: tmpfs
|
||||||
|
target: /data/experiments
|
||||||
|
tmpfs:
|
||||||
|
size: 500M
|
||||||
|
mode: 1777
|
||||||
|
- type: tmpfs
|
||||||
|
target: /data/active
|
||||||
|
tmpfs:
|
||||||
|
size: 200M
|
||||||
|
mode: 1777
|
||||||
- go-mod-cache:/go/pkg/mod
|
- go-mod-cache:/go/pkg/mod
|
||||||
|
labels:
|
||||||
|
- "fetchml.data.lifecycle=ephemeral"
|
||||||
|
- "fetchml.component=test-api"
|
||||||
depends_on:
|
depends_on:
|
||||||
redis:
|
redis:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
@ -68,7 +84,4 @@ services:
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
ssh-config:
|
ssh-config:
|
||||||
api-logs:
|
|
||||||
api-experiments:
|
|
||||||
api-active:
|
|
||||||
go-mod-cache:
|
go-mod-cache:
|
||||||
|
|
|
||||||
31
deployments/minio/lifecycle-dev.json
Normal file
31
deployments/minio/lifecycle-dev.json
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
{
|
||||||
|
"Rules": [
|
||||||
|
{
|
||||||
|
"ID": "delete-test-snapshots",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"Filter": {
|
||||||
|
"Prefix": "test-"
|
||||||
|
},
|
||||||
|
"Expiration": {
|
||||||
|
"Days": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ID": "delete-temp-uploads",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"Filter": {
|
||||||
|
"Prefix": "temp/"
|
||||||
|
},
|
||||||
|
"Expiration": {
|
||||||
|
"Days": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ID": "delete-incomplete-multipart",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"AbortIncompleteMultipartUpload": {
|
||||||
|
"DaysAfterInitiation": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
41
deployments/minio/lifecycle-staging.json
Normal file
41
deployments/minio/lifecycle-staging.json
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
{
|
||||||
|
"Rules": [
|
||||||
|
{
|
||||||
|
"ID": "delete-staging-test-snapshots",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"Filter": {
|
||||||
|
"Prefix": "test-"
|
||||||
|
},
|
||||||
|
"Expiration": {
|
||||||
|
"Days": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ID": "delete-staging-temp-uploads",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"Filter": {
|
||||||
|
"Prefix": "temp/"
|
||||||
|
},
|
||||||
|
"Expiration": {
|
||||||
|
"Days": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ID": "transition-staging-snapshots",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"Filter": {
|
||||||
|
"Prefix": "staging-"
|
||||||
|
},
|
||||||
|
"Expiration": {
|
||||||
|
"Days": 7
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ID": "delete-incomplete-multipart",
|
||||||
|
"Status": "Enabled",
|
||||||
|
"AbortIncompleteMultipartUpload": {
|
||||||
|
"DaysAfterInitiation": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -39,6 +39,7 @@ cleanup_docker() {
|
||||||
|
|
||||||
# Stop all project-related containers
|
# Stop all project-related containers
|
||||||
$compose_cmd -f deployments/docker-compose.dev.yml down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f deployments/docker-compose.dev.yml down --volumes --remove-orphans 2>/dev/null || true
|
||||||
|
$compose_cmd -f deployments/docker-compose.staging.yml down --volumes --remove-orphans 2>/dev/null || true
|
||||||
$compose_cmd -f deployments/docker-compose.test.yml --profile debug down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f deployments/docker-compose.test.yml --profile debug down --volumes --remove-orphans 2>/dev/null || true
|
||||||
$compose_cmd -f deployments/docker-compose.prod.yml --profile smoke down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f deployments/docker-compose.prod.yml --profile smoke down --volumes --remove-orphans 2>/dev/null || true
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue