Replace all .. with proper relative paths: - Build context: Use '.' (current directory = project root when using --project-directory) - Volume mounts: Use './data/...' instead of '../data/...' - Config mounts: Use './configs/...' instead of '../configs/...' The '..' fallback was incorrect - when --project-directory is set to repo root, '..' would point to parent of repo instead of repo itself. Using '.' or './path' correctly resolves relative to project root. Environment variables for data directories (SMOKE_TEST_DATA_DIR, PROD_DATA_DIR, HOMELAB_DATA_DIR, LOCAL_DATA_DIR) are preserved for runtime customization.
228 lines
7.6 KiB
YAML
228 lines
7.6 KiB
YAML
---
|
|
# Homelab Docker Compose with Centralized Monitoring
|
|
# Includes: API, Redis, Prometheus, Grafana, Loki
|
|
services:
|
|
caddy:
|
|
image: caddy:2-alpine
|
|
container_name: ml-dev-caddy
|
|
restart: unless-stopped
|
|
ports:
|
|
- "8080:80"
|
|
- "8443:443"
|
|
volumes:
|
|
- ./deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/caddy/data:/data
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/caddy/config:/config
|
|
depends_on:
|
|
api-server:
|
|
condition: service_healthy
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: ml-experiments-redis
|
|
user: "999:999"
|
|
ports:
|
|
- "6379:6379"
|
|
volumes:
|
|
- redis_data:/data
|
|
restart: unless-stopped
|
|
command: redis-server --appendonly yes
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
api-server:
|
|
build:
|
|
context: .
|
|
dockerfile: build/docker/simple.Dockerfile
|
|
container_name: ml-experiments-api
|
|
user: "0:0"
|
|
ports:
|
|
- "9101:9101"
|
|
expose:
|
|
- "9101" # API and health endpoints (internal; external access via Caddy)
|
|
volumes:
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/logs:/logs
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/experiments:/data/experiments
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/active:/data/active
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated
|
|
- ./configs/api/dev.yaml:/app/configs/api/dev.yaml
|
|
- ./ssl:/app/ssl
|
|
depends_on:
|
|
- redis
|
|
restart: unless-stopped
|
|
command: ["/bin/sh", "-c", "mkdir -p /data/experiments /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/dev.yaml"]
|
|
environment:
|
|
- LOG_LEVEL=info
|
|
# Native libs enabled via build tag: -tags native_libs
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
labels:
|
|
logging: "promtail"
|
|
job: "api-server"
|
|
minio:
|
|
image: minio/minio:latest
|
|
container_name: ml-experiments-minio
|
|
ports:
|
|
- "9000:9000"
|
|
- "9001:9001"
|
|
volumes:
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/minio:/data
|
|
environment:
|
|
- MINIO_ROOT_USER=minioadmin
|
|
- MINIO_ROOT_PASSWORD=minioadmin123
|
|
command: ["server", "/data", "--console-address", ":9001"]
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
|
interval: 10s
|
|
timeout: 5s
|
|
retries: 10
|
|
restart: unless-stopped
|
|
minio-init:
|
|
image: alpine:3.19
|
|
container_name: ml-experiments-minio-init
|
|
depends_on:
|
|
minio:
|
|
condition: service_healthy
|
|
entrypoint: ["/bin/sh", "-c"]
|
|
command:
|
|
- |
|
|
set -eu
|
|
apk add --no-cache ca-certificates curl tar gzip
|
|
ARCH=$$(uname -m)
|
|
MC_ARCH=amd64
|
|
if [ "$$ARCH" = "aarch64" ] || [ "$$ARCH" = "arm64" ]; then
|
|
MC_ARCH=arm64
|
|
fi
|
|
curl -fsSL -o /usr/local/bin/mc "https://dl.min.io/client/mc/release/linux-$$MC_ARCH/mc"
|
|
chmod +x /usr/local/bin/mc
|
|
i=0
|
|
while ! mc alias set local http://minio:9000 minioadmin minioadmin123; do
|
|
i=$$((i+1))
|
|
if [ $$i -ge 30 ]; then
|
|
echo "minio not ready after 30 attempts" >&2
|
|
exit 1
|
|
fi
|
|
echo "waiting for minio... ($$i/30)"
|
|
sleep 1
|
|
done
|
|
# Skip if bucket already exists
|
|
if mc ls local/fetchml-snapshots 2>/dev/null; then
|
|
echo "Bucket fetchml-snapshots already exists, skipping init"
|
|
exit 0
|
|
fi
|
|
mc mb -p local/fetchml-snapshots || true
|
|
mkdir -p /tmp/snapshots/snap-1
|
|
echo -n "hello" > /tmp/snapshots/snap-1/hello.txt
|
|
tar -C /tmp/snapshots/snap-1 -czf /tmp/snap-1.tar.gz .
|
|
mc cp /tmp/snap-1.tar.gz local/fetchml-snapshots/snapshots/snap-1.tar.gz
|
|
FILE_SHA=$$(sha256sum /tmp/snapshots/snap-1/hello.txt | cut -d' ' -f1)
|
|
SNAP_SHA=$$(echo -n "$$FILE_SHA" | sha256sum | cut -d' ' -f1)
|
|
echo "snapshot_id=snap-1 snapshot_sha256=$$SNAP_SHA"
|
|
restart: "no"
|
|
worker:
|
|
build:
|
|
context: .
|
|
dockerfile: build/docker/simple.Dockerfile
|
|
container_name: ml-experiments-worker
|
|
user: "0:0"
|
|
ports:
|
|
- "8888:8888"
|
|
volumes:
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/logs:/logs
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/active:/data/active
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/experiments:/data/experiments
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/workspaces:/data/active/workspaces:delegated
|
|
- ./configs/workers/docker-dev.yaml:/app/configs/worker.yaml
|
|
- /sys/fs/cgroup:/sys/fs/cgroup:rw
|
|
depends_on:
|
|
redis:
|
|
condition: service_healthy
|
|
api-server:
|
|
condition: service_healthy
|
|
minio-init:
|
|
condition: service_completed_successfully
|
|
restart: unless-stopped
|
|
environment:
|
|
- LOG_LEVEL=info
|
|
- MINIO_ROOT_USER=minioadmin
|
|
- MINIO_ROOT_PASSWORD=minioadmin123
|
|
- FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/minimal-notebook:latest
|
|
- FETCHML_JUPYTER_CONDA_ENV=base
|
|
- FETCHML_JUPYTER_KERNEL_NAME=python
|
|
- FETCHML_PODMAN_CGROUPS=disabled
|
|
# Native libs enabled via build tag: -tags native_libs
|
|
privileged: true
|
|
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
|
|
# # Prometheus - Metrics collection
|
|
# prometheus:
|
|
# image: prom/prometheus:latest
|
|
# container_name: ml-experiments-prometheus
|
|
# ports:
|
|
# - "9090:9090"
|
|
# volumes:
|
|
# - ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
|
# - prometheus_data:/prometheus
|
|
# command:
|
|
# - '--config.file=/etc/prometheus/prometheus.yml'
|
|
# - '--storage.tsdb.path=/prometheus'
|
|
# - '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
# - '--web.console.templates=/etc/prometheus/consoles'
|
|
# - '--web.enable-lifecycle'
|
|
# restart: unless-stopped
|
|
#
|
|
# # Grafana - Visualization
|
|
# grafana:
|
|
# image: grafana/grafana:latest
|
|
# container_name: ml-experiments-grafana
|
|
# ports:
|
|
# - "3000:3000"
|
|
# volumes:
|
|
# - grafana_data:/var/lib/grafana
|
|
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
|
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
|
|
# environment:
|
|
# - GF_SECURITY_ADMIN_PASSWORD=admin123
|
|
# - GF_USERS_ALLOW_SIGN_UP=false
|
|
# restart: unless-stopped
|
|
# depends_on:
|
|
# - prometheus
|
|
# - loki
|
|
#
|
|
# # Loki - Log aggregation
|
|
# loki:
|
|
# image: grafana/loki:latest
|
|
# container_name: ml-experiments-loki
|
|
# ports:
|
|
# - "3100:3100"
|
|
# volumes:
|
|
# - ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
|
|
# - loki_data:/loki
|
|
# command: -config.file=/etc/loki/local-config.yaml
|
|
# restart: unless-stopped
|
|
# Promtail - Log collector
|
|
promtail:
|
|
image: grafana/promtail:latest
|
|
container_name: ml-experiments-promtail
|
|
volumes:
|
|
- ${SMOKE_TEST_DATA_DIR:-./monitoring}/promtail-config.yml:/etc/promtail/config.yml
|
|
- ${SMOKE_TEST_DATA_DIR:-./data/dev}/logs:/var/log/app
|
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
command: -config.file=/etc/promtail/config.yml
|
|
restart: unless-stopped
|
|
# depends_on:
|
|
# - loki
|
|
volumes:
|
|
redis_data:
|
|
driver: local
|
|
prometheus_data:
|
|
driver: local
|
|
grafana_data:
|
|
driver: local
|
|
loki_data:
|
|
driver: local
|