chore: update configurations and deployment files

- Add Redis secure configuration
- Update worker configurations for homelab and Docker
- Add Forgejo workflow configurations
- Update docker-compose files with improved networking
- Add Caddy configurations for different environments
This commit is contained in:
Jeremie Fraeys 2026-02-16 20:38:19 -05:00
parent 7305e2bc21
commit 8b4e1753d1
No known key found for this signature in database
9 changed files with 261 additions and 71 deletions

View file

@ -202,6 +202,13 @@ jobs:
FETCHML_NATIVE_LIBS: "1"
continue-on-error: true
- name: Native Smoke Test
run: |
echo "Running native libraries smoke test..."
make native-smoke
env:
FETCHML_NATIVE_LIBS: "1"
- name: Test Fallback (Go only)
run: |
echo "Running tests WITHOUT native libraries (Go fallback)..."

View file

@ -0,0 +1,61 @@
base_path: "data/dev/experiments"
data_dir: "data/dev/active"
auth:
enabled: false
server:
address: "0.0.0.0:9101"
tls:
enabled: false
cert_file: ""
key_file: ""
security:
production_mode: false
allowed_origins:
- "http://localhost:3000"
api_key_rotation_days: 90
audit_logging:
enabled: true
log_path: "/tmp/fetchml-audit.log"
rate_limit:
enabled: false
requests_per_minute: 60
burst_size: 10
ip_whitelist: []
monitoring:
prometheus:
enabled: true
port: 9101
path: "/metrics"
health_checks:
enabled: true
interval: "30s"
redis:
addr: "localhost:6379"
password: ""
db: 0
database:
type: "sqlite"
connection: "data/dev/db/fetchml.sqlite"
logging:
level: "info"
file: ""
audit_log: ""
resources:
max_workers: 1
desired_rps_per_worker: 2
podman_cpus: "2"
podman_memory: "4Gi"
queue:
type: "native"
native:
data_dir: "data/dev/queue"

View file

@ -1,6 +1,6 @@
base_path: "/data/experiments"
base_path: "./data/experiments"
data_dir: "/data/active"
data_dir: "./data/active"
auth:
enabled: false
@ -19,7 +19,7 @@ security:
api_key_rotation_days: 90
audit_logging:
enabled: true
log_path: "/tmp/fetchml-audit.log"
log_path: "./data/fetchml-audit.log"
rate_limit:
enabled: false
requests_per_minute: 60
@ -42,7 +42,7 @@ redis:
database:
type: "sqlite"
connection: "/tmp/fetchml.sqlite"
connection: "./data/fetchml.sqlite"
logging:
level: "info"

View file

@ -0,0 +1,54 @@
worker_id: "local-worker"
base_path: "data/dev/experiments"
train_script: "train.py"
redis_url: "redis://localhost:6379/0"
local_mode: true
prewarm_enabled: false
max_workers: 2
poll_interval_seconds: 2
auto_fetch_data: false
data_manager_path: "./data_manager"
dataset_cache_ttl: "30m"
data_dir: "data/dev/active"
snapshot_store:
enabled: false
podman_image: "python:3.9-slim"
container_workspace: "/workspace"
container_results: "/results"
gpu_devices: []
gpu_vendor: "apple"
gpu_visible_devices: []
# Apple M-series GPU configuration
apple_gpu:
enabled: true
metal_device: "/dev/metal"
mps_runtime: "/dev/mps"
resources:
max_workers: 2
desired_rps_per_worker: 2
podman_cpus: "2"
podman_memory: "4Gi"
metrics:
enabled: false
queue:
type: "native"
native:
data_dir: "data/dev/queue"
task_lease_duration: "30m"
heartbeat_interval: "1m"
max_retries: 3
graceful_timeout: "5m"

View file

@ -10,9 +10,9 @@ services:
- "8080:80"
- "8443:443"
volumes:
- ${FETCHML_REPO_ROOT:-.}/deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro
- ${FETCHML_REPO_ROOT:-.}/data/dev/caddy/data:/data
- ${FETCHML_REPO_ROOT:-.}/data/dev/caddy/config:/config
- ${FETCHML_REPO_ROOT:-..}/deployments/Caddyfile.dev:/etc/caddy/Caddyfile:ro
- ${FETCHML_REPO_ROOT:-..}/data/dev/caddy/data:/data
- ${FETCHML_REPO_ROOT:-..}/data/dev/caddy/config:/config
depends_on:
api-server:
condition: service_healthy
@ -23,7 +23,7 @@ services:
ports:
- "6379:6379"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/dev/redis:/data
- ${FETCHML_REPO_ROOT:-..}/data/dev/redis:/data
restart: unless-stopped
command: redis-server --appendonly yes
healthcheck:
@ -33,8 +33,8 @@ services:
retries: 3
api-server:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-api
user: "0:0"
ports:
@ -42,18 +42,19 @@ services:
expose:
- "9101" # API and health endpoints (internal; external access via Caddy)
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-.}/configs/api/dev.yaml:/app/configs/api/dev.yaml
- ${FETCHML_REPO_ROOT:-.}/ssl:/app/ssl
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-..}/configs/api/dev.yaml:/app/configs/api/dev.yaml
- ${FETCHML_REPO_ROOT:-..}/ssl:/app/ssl
depends_on:
- redis
restart: unless-stopped
command: ["/bin/sh", "-c", "mkdir -p /data/experiments /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/dev.yaml"]
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
interval: 30s
@ -70,7 +71,7 @@ services:
- "9000:9000"
- "9001:9001"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/dev/minio:/data
- ${FETCHML_REPO_ROOT:-..}/data/dev/minio:/data
environment:
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
@ -109,6 +110,11 @@ services:
echo "waiting for minio... ($$i/30)"
sleep 1
done
# Skip if bucket already exists
if mc ls local/fetchml-snapshots 2>/dev/null; then
echo "Bucket fetchml-snapshots already exists, skipping init"
exit 0
fi
mc mb -p local/fetchml-snapshots || true
mkdir -p /tmp/snapshots/snap-1
echo -n "hello" > /tmp/snapshots/snap-1/hello.txt
@ -120,18 +126,18 @@ services:
restart: "no"
worker:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ..
dockerfile: build/docker/simple.Dockerfile
container_name: ml-experiments-worker
user: "0:0"
ports:
- "8888:8888"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-.}/configs/workers/docker-dev.yaml:/app/configs/worker.yaml
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/data/dev/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/dev/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/dev/workspaces:/data/active/workspaces:delegated
- ${FETCHML_REPO_ROOT:-..}/configs/workers/docker-dev.yaml:/app/configs/worker.yaml
- /sys/fs/cgroup:/sys/fs/cgroup:rw
depends_on:
redis:
@ -149,6 +155,7 @@ services:
- FETCHML_JUPYTER_CONDA_ENV=base
- FETCHML_JUPYTER_KERNEL_NAME=python
- FETCHML_PODMAN_CGROUPS=disabled
- FETCHML_NATIVE_LIBS=1
privileged: true
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
# # Prometheus - Metrics collection
@ -202,14 +209,14 @@ services:
image: grafana/promtail:latest
container_name: ml-experiments-promtail
volumes:
- ${FETCHML_REPO_ROOT:-.}/monitoring/promtail-config.yml:/etc/promtail/config.yml
- ${FETCHML_REPO_ROOT:-.}/data/dev/logs:/var/log/app
- ${FETCHML_REPO_ROOT:-..}/monitoring/promtail-config.yml:/etc/promtail/config.yml
- ${FETCHML_REPO_ROOT:-..}/data/dev/logs:/var/log/app
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/config.yml
restart: unless-stopped
depends_on:
- loki
# depends_on:
# - loki
volumes:
prometheus_data:
driver: local

View file

@ -4,25 +4,26 @@
services:
api-server:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ${FETCHML_REPO_ROOT:-..}
dockerfile: ${FETCHML_REPO_ROOT:-..}/build/docker/simple.Dockerfile
container_name: ml-experiments-api
ports:
- "9101:9101"
- "9100:9100" # Prometheus metrics endpoint
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/homelab/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/homelab/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/homelab/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/ssl:/app/ssl:ro
- ${FETCHML_REPO_ROOT:-.}/configs/api/homelab-secure.yaml:/app/configs/api/prod.yaml:ro
- ${FETCHML_REPO_ROOT:-.}/.env.secure:/app/.env.secure:ro
- ${FETCHML_REPO_ROOT:-..}/data/homelab/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/homelab/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/homelab/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/ssl:/app/ssl:ro
- ${FETCHML_REPO_ROOT:-..}/configs/api/homelab-secure.yaml:/app/configs/api/prod.yaml:ro
- ${FETCHML_REPO_ROOT:-..}/.env.secure:/app/.env.secure:ro
depends_on:
redis:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
# Load secure environment variables
- JWT_SECRET_FILE=/app/.env.secure
healthcheck:
@ -47,7 +48,7 @@ services:
- "9000:9000"
- "9001:9001"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/homelab/minio:/data
- ${FETCHML_REPO_ROOT:-..}/data/homelab/minio:/data
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
@ -68,6 +69,11 @@ services:
curl -fsSL -o /usr/local/bin/mc https://dl.min.io/client/mc/release/linux-amd64/mc
chmod +x /usr/local/bin/mc
mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123}
# Skip if bucket already exists
if mc ls local/fetchml-snapshots 2>/dev/null; then
echo "Bucket fetchml-snapshots already exists, skipping init"
exit 0
fi
mc mb -p local/fetchml-snapshots || true
restart: "no"
networks:
@ -75,14 +81,14 @@ services:
worker:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ${FETCHML_REPO_ROOT:-..}
dockerfile: ${FETCHML_REPO_ROOT:-..}/build/docker/simple.Dockerfile
container_name: ml-experiments-worker
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/homelab/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/homelab/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/homelab/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/configs/workers/homelab-secure.yaml:/app/configs/worker.yaml
- ${FETCHML_REPO_ROOT:-..}/data/homelab/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/homelab/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/homelab/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/configs/workers/homelab-secure.yaml:/app/configs/worker.yaml
depends_on:
redis:
condition: service_healthy
@ -93,6 +99,7 @@ services:
restart: unless-stopped
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
- REDIS_PASSWORD=${REDIS_PASSWORD}
@ -109,10 +116,10 @@ services:
- "80:80"
- "443:443"
volumes:
- ${FETCHML_REPO_ROOT:-.}/deployments/Caddyfile.homelab-secure:/etc/caddy/Caddyfile:ro
- ${FETCHML_REPO_ROOT:-.}/ssl:/etc/caddy/ssl:ro
- ${FETCHML_REPO_ROOT:-.}/data/homelab/caddy/data:/data
- ${FETCHML_REPO_ROOT:-.}/data/homelab/caddy/config:/config
- ${FETCHML_REPO_ROOT:-..}/deployments/Caddyfile.homelab-secure:/etc/caddy/Caddyfile:ro
- ${FETCHML_REPO_ROOT:-..}/ssl:/etc/caddy/ssl:ro
- ${FETCHML_REPO_ROOT:-..}/data/homelab/caddy/data:/data
- ${FETCHML_REPO_ROOT:-..}/data/homelab/caddy/config:/config
environment:
- FETCHML_DOMAIN=${FETCHML_DOMAIN:-ml.local}
depends_on:
@ -129,8 +136,8 @@ services:
ports:
- "127.0.0.1:6379:6379" # Bind to localhost only
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/homelab/redis:/data
- ${FETCHML_REPO_ROOT:-.}/redis/redis-secure.conf:/usr/local/etc/redis/redis.conf:ro
- ${FETCHML_REPO_ROOT:-..}/data/homelab/redis:/data
- ${FETCHML_REPO_ROOT:-..}/redis/redis-secure.conf:/usr/local/etc/redis/redis.conf:ro
restart: unless-stopped
command: redis-server /usr/local/etc/redis/redis.conf --requirepass ${REDIS_PASSWORD}
healthcheck:

View file

@ -0,0 +1,49 @@
---
# Docker Compose override for local testing with pre-built images
services:
api-server:
image: fetchml-api:latest
ports:
- "9101:9101"
volumes:
- ../data/dev/logs:/logs
- ../data/dev/experiments:/data/experiments
- ../data/dev/active:/data/active
- ../data/dev/workspaces:/data/active/workspaces:delegated
- ../configs/api/dev.yaml:/app/configs/api/dev.yaml
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
depends_on:
redis:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
worker:
image: fetchml-worker:latest
privileged: true
ports:
- "8888:8888"
volumes:
- ../data/dev/logs:/logs
- ../data/dev/active:/data/active
- ../data/dev/experiments:/data/experiments
- ../data/dev/workspaces:/data/active/workspaces:delegated
- ../configs/workers/docker-dev.yaml:/app/configs/worker.yaml
- /sys/fs/cgroup:/sys/fs/cgroup:rw
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
- MINIO_ROOT_USER=minioadmin
- MINIO_ROOT_PASSWORD=minioadmin123
depends_on:
redis:
condition: service_healthy
api-server:
condition: service_healthy

View file

@ -8,8 +8,8 @@ services:
- "8080:80"
- "8443:443"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/caddy/data:/data
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/caddy/config:/config
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/caddy/data:/data
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/caddy/config:/config
command:
- /bin/sh
- -c
@ -41,7 +41,7 @@ services:
expose:
- "6379"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/redis:/data
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/redis:/data
command: redis-server --appendonly yes
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
@ -51,8 +51,8 @@ services:
api-server:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ${FETCHML_REPO_ROOT:-..}
dockerfile: ${FETCHML_REPO_ROOT:-..}/build/docker/simple.Dockerfile
user: "0:0"
restart: unless-stopped
expose:
@ -61,11 +61,14 @@ services:
redis:
condition: service_healthy
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/prod-smoke/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/configs/api/dev.yaml:/app/configs/api/dev.yaml:ro
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/experiments:/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/prod-smoke/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/configs/api/dev.yaml:/app/configs/api/dev.yaml:ro
command: ["/bin/sh", "-c", "mkdir -p /data/experiments /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/dev.yaml"]
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
interval: 10s

View file

@ -7,7 +7,7 @@ services:
expose:
- "6379"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod/redis:/data
- ${FETCHML_REPO_ROOT:-..}/data/prod/redis:/data
restart: unless-stopped
command: redis-server --appendonly yes
healthcheck:
@ -18,23 +18,24 @@ services:
api-server:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/secure-prod.Dockerfile
context: ${FETCHML_REPO_ROOT:-..}
dockerfile: ${FETCHML_REPO_ROOT:-..}/build/docker/secure-prod.Dockerfile
container_name: ml-prod-api
expose:
- "9101"
- "2222"
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/prod/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/prod/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/configs/api/multi-user.yaml:/app/configs/api/prod.yaml
- ${FETCHML_REPO_ROOT:-..}/data/prod/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/prod/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/prod/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/configs/api/multi-user.yaml:/app/configs/api/prod.yaml
depends_on:
redis:
condition: service_healthy
restart: unless-stopped
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
interval: 30s
@ -55,14 +56,14 @@ services:
worker:
build:
context: ${FETCHML_REPO_ROOT:-.}
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
context: ${FETCHML_REPO_ROOT:-..}
dockerfile: ${FETCHML_REPO_ROOT:-..}/build/docker/simple.Dockerfile
container_name: ml-prod-worker
volumes:
- ${FETCHML_REPO_ROOT:-.}/data/prod/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-.}/data/prod/active:/data/active
- ${FETCHML_REPO_ROOT:-.}/data/prod/logs:/logs
- ${FETCHML_REPO_ROOT:-.}/configs/workers/docker-prod.yaml:/app/configs/worker.yaml
- ${FETCHML_REPO_ROOT:-..}/data/prod/experiments:/app/data/experiments
- ${FETCHML_REPO_ROOT:-..}/data/prod/active:/data/active
- ${FETCHML_REPO_ROOT:-..}/data/prod/logs:/logs
- ${FETCHML_REPO_ROOT:-..}/configs/workers/docker-prod.yaml:/app/configs/worker.yaml
depends_on:
redis:
condition: service_healthy
@ -71,6 +72,7 @@ services:
restart: unless-stopped
environment:
- LOG_LEVEL=info
- FETCHML_NATIVE_LIBS=1
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
- AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}