fetch_ml/deployments/docker-compose.dev.yml
Jeremie Fraeys 6d200b5ac2
fix(docker): Use named volume for Redis to fix permission errors
Replace bind mount with Docker named volume for Redis data

This fixes 'operation not permitted' errors on macOS Docker Desktop

where bind mounts fail due to file sharing restrictions
2026-02-23 14:20:23 -05:00

228 lines
7.7 KiB
YAML

---
# Homelab Docker Compose with Centralized Monitoring
# Includes: API, Redis, Prometheus, Grafana, Loki
services:
caddy:
image: caddy:2-alpine
container_name: ml-dev-caddy
restart: unless-stopped
ports:
- "8080:80"
- "8443:443"
volumes:
- ${FETCHML_REPO_ROOT:-..}/deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro
- ${FETCHML_REPO_ROOT:-..}/data/dev/caddy/data:/data
- ${FETCHML_REPO_ROOT:-..}/data/dev/caddy/config:/config
depends_on:
api-server:
condition: service_healthy
redis:
image: redis:7-alpine
container_name: ml-experiments-redis
user: "999:999"
ports:
- "6379:6379"
volumes:
- redis_data:/data
restart: unless-stopped
command: redis-server --appendonly yes
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
api-server:
build:
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-api
user: "0:0"
ports:
- "9101:9101"
expose:
- "9101" # API and health endpoints (internal; external access via Caddy)
volumes:
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-..}/configs/api/dev.yaml:/app/configs/api/dev.yaml
- ${FETCHML_REPO_ROOT:-..}/ssl:/app/ssl
depends_on:
- redis
restart: unless-stopped
command: ["/bin/sh", "-c", "mkdir -p /data/experiments /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/dev.yaml"]
environment:
- LOG_LEVEL=info
# Native libs enabled via build tag: -tags native_libs
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
labels:
logging: "promtail"
job: "api-server"
minio:
image: minio/minio:latest
container_name: ml-experiments-minio
ports:
- "9000:9000"
- "9001:9001"
volumes:
- ${FETCHML_REPO_ROOT:-..}/data/dev/minio:/data
environment:
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
command: ["server", "/data", "--console-address", ":9001"]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 10s
timeout: 5s
retries: 10
restart: unless-stopped
minio-init:
image: alpine:3.19
container_name: ml-experiments-minio-init
depends_on:
minio:
condition: service_healthy
entrypoint: ["/bin/sh", "-c"]
command:
- |
set -eu
apk add --no-cache ca-certificates curl tar gzip
ARCH=$$(uname -m)
MC_ARCH=amd64
if [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then
MC_ARCH=arm64
fi
curl -fsSL -o /usr/local/bin/mc "https://dl.min.io/client/mc/release/linux-$$MC_ARCH/mc"
chmod +x /usr/local/bin/mc
i=0
while ! mc alias set local http://minio:9000 minioadmin minioadmin123; do
i=$$((i+1))
if [ $$i -ge 30 ]; then
echo "minio not ready after 30 attempts" >&2
exit 1
fi
echo "waiting for minio... ($$i/30)"
sleep 1
done
# Skip if bucket already exists
if mc ls local/fetchml-snapshots 2>/dev/null; then
echo "Bucket fetchml-snapshots already exists, skipping init"
exit 0
fi
mc mb -p local/fetchml-snapshots || true
mkdir -p /tmp/snapshots/snap-1
echo -n "hello" > /tmp/snapshots/snap-1/hello.txt
tar -C /tmp/snapshots/snap-1 -czf /tmp/snap-1.tar.gz .
mc cp /tmp/snap-1.tar.gz local/fetchml-snapshots/snapshots/snap-1.tar.gz
FILE_SHA=$$(sha256sum /tmp/snapshots/snap-1/hello.txt | cut -d' ' -f1)
SNAP_SHA=$$(echo -n "$$FILE_SHA" | sha256sum | cut -d' ' -f1)
echo "snapshot_id=snap-1 snapshot_sha256=$$SNAP_SHA"
restart: "no"
worker:
build:
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-worker
user: "0:0"
ports:
- "8888:8888"
volumes:
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-..}/configs/workers/docker-dev.yaml:/app/configs/worker.yaml
- /sys/fs/cgroup:/sys/fs/cgroup:rw
depends_on:
redis:
condition: service_healthy
api-server:
condition: service_healthy
minio-init:
condition: service_completed_successfully
restart: unless-stopped
environment:
- LOG_LEVEL=info
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
- FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/minimal-notebook:latest
- FETCHML_JUPYTER_CONDA_ENV=base
- FETCHML_JUPYTER_KERNEL_NAME=python
- FETCHML_PODMAN_CGROUPS=disabled
# Native libs enabled via build tag: -tags native_libs
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
# # Prometheus - Metrics collection
# prometheus:
# image: prom/prometheus:latest
# container_name: ml-experiments-prometheus
# ports:
# - "9090:9090"
# volumes:
# - ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
# - prometheus_data:/prometheus
# command:
# - '--config.file=/etc/prometheus/prometheus.yml'
# - '--storage.tsdb.path=/prometheus'
# - '--web.console.libraries=/etc/prometheus/console_libraries'
# - '--web.console.templates=/etc/prometheus/consoles'
# - '--web.enable-lifecycle'
# restart: unless-stopped
#
# # Grafana - Visualization
# grafana:
# image: grafana/grafana:latest
# container_name: ml-experiments-grafana
# ports:
# - "3000:3000"
# volumes:
# - grafana_data:/var/lib/grafana
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
# environment:
# - GF_SECURITY_ADMIN_PASSWORD=admin123
# - GF_USERS_ALLOW_SIGN_UP=false
# restart: unless-stopped
# depends_on:
# - prometheus
# - loki
#
# # Loki - Log aggregation
# loki:
# image: grafana/loki:latest
# container_name: ml-experiments-loki
# ports:
# - "3100:3100"
# volumes:
# - ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
# - loki_data:/loki
# command: -config.file=/etc/loki/local-config.yaml
# restart: unless-stopped
# Promtail - Log collector
promtail:
image: grafana/promtail:latest
container_name: ml-experiments-promtail
volumes:
- ${FETCHML_REPO_ROOT:-..}/monitoring/promtail-config.yml:/etc/promtail/config.yml
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/var/log/app
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/config.yml
restart: unless-stopped
# depends_on:
# - loki
volumes:
redis_data:
driver: local
prometheus_data:
driver: local
grafana_data:
driver: local
loki_data:
driver: local