chore(config): update configurations and deployment scripts

- Update API server and worker config schemas - Refine Docker Compose configurations (dev/prod) - Update deployment scripts and documentation
2026-02-12 12:05:37 -05:00 · 2026-02-12 12:05:37 -05:00 · 2209ae24c6
commit 2209ae24c6
parent 5144d291cb
7 changed files with 342 additions and 402 deletions
--- a/configs/schema/api_server_config.yaml
+++ b/configs/schema/api_server_config.yaml
@ -1,227 +1,233 @@
-# Fetch ML Configuration Schema (JSON Schema expressed as YAML)

-$schema: "http://json-schema.org/draft-07/schema#"
-title: "Fetch ML API Server Configuration"
-type: object
-additionalProperties: false
-required:
-  - auth
-  - server
-properties:
-  base_path:
-    type: string
-    description: Base path for experiment data
-    default: "/tmp/ml-experiments"
-  data_dir:
-    type: string
-    description: Data directory (datasets/snapshots) for integrity validation
-    default: "/data/active"
-  auth:
-    type: object
-    additionalProperties: false
-    required:
-      - enabled
-    properties:
-      enabled:
-        type: boolean
-        description: Enable or disable authentication
-      api_keys:
-        type: object
-        description: API key registry
-        additionalProperties:
-          type: object
-          additionalProperties: false
-          required:
-            - hash
-          properties:
-            hash:
-              type: string
-              description: SHA256 hash of the API key
-            admin:
-              type: boolean
-              default: false
-            roles:
-              type: array
-              items:
-                type: string
-            permissions:
-              type: object
-              additionalProperties:
-                type: boolean
-  server:
-    type: object
-    additionalProperties: false
-    required: [address]
-    properties:
-      address:
-        type: string
-        description: Listen address, e.g. ":9101"
-      tls:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-            default: false
-          cert_file:
-            type: string
-          key_file:
-            type: string
-  monitoring:
-    type: object
-    additionalProperties: false
-    properties:
-      prometheus:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-          port:
-            type: integer
-            minimum: 1
-            maximum: 65535
-          path:
-            type: string
-      health_checks:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-          interval:
-            type: string
-  database:
-    type: object
-    additionalProperties: false
-    properties:
-      type:
-        type: string
-        enum: [sqlite, postgres, mysql]
-        default: sqlite
-      connection:
-        type: string
-      host:
-        type: string
-      port:
-        type: integer
-        minimum: 1
-        maximum: 65535
-      username:
-        type: string
-      password:
-        type: string
-      database:
-        type: string
-  redis:
-    type: object
-    additionalProperties: false
-    properties:
-      url:
-        type: string
-        pattern: "^redis://"
-      addr:
-        type: string
-        description: Optional host:port shorthand for Redis
-      password:
-        type: string
-      db:
-        type: integer
-        minimum: 0
-        default: 0
-  queue:
-    type: object
-    additionalProperties: false
-    properties:
-      backend:
-        type: string
-        enum: [redis, sqlite]
-        default: redis
-      sqlite_path:
-        type: string
-  logging:
-    type: object
-    additionalProperties: false
-    properties:
-      level:
-        type: string
-        enum: [debug, info, warn, error]
-        default: "info"
-      file:
-        type: string
-      audit_log:
-        type: string
-  security:
-    type: object
-    additionalProperties: false
-    properties:
-      production_mode:
-        type: boolean
-        default: false
-      allowed_origins:
-        type: array
-        items:
-          type: string
-      api_key_rotation_days:
-        type: integer
-        minimum: 0
-      audit_logging:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-          log_path:
-            type: string
-      ip_whitelist:
-        type: array
-        items:
-          type: string
-      failed_login_lockout:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-          max_attempts:
-            type: integer
-            minimum: 1
-          lockout_duration:
-            type: string
-            description: Duration string, e.g. "15m"
-      rate_limit:
-        type: object
-        additionalProperties: false
-        properties:
-          enabled:
-            type: boolean
-            default: false
-          requests_per_minute:
-            type: integer
-            minimum: 1
-            default: 60
-          burst_size:
-            type: integer
-            minimum: 1
-  resources:
-    type: object
-    description: Resource configuration
-    additionalProperties: false
-    properties:
-      max_workers:
-        type: integer
-        minimum: 1
-        default: 1
-      desired_rps_per_worker:
-        type: integer
-        minimum: 1
-      requests_per_sec:
-        type: integer
-        minimum: 1
-      podman_cpus:
-        type: string
-      podman_memory:
-        type: string
-      request_burst:
-        type: integer
-        minimum: 0
+ # Fetch ML Configuration Schema (JSON Schema expressed as YAML)
+ 
+ $schema: "http://json-schema.org/draft-07/schema#"
+ title: "Fetch ML API Server Configuration"
+ type: object
+ additionalProperties: false
+ required:
+   - auth
+   - server
+ properties:
+   base_path:
+     type: string
+     description: Base path for experiment data
+     default: "/tmp/ml-experiments"
+   data_dir:
+     type: string
+     description: Data directory (datasets/snapshots) for integrity validation
+     default: "/data/active"
+   auth:
+     type: object
+     additionalProperties: false
+     required:
+       - enabled
+     properties:
+       enabled:
+         type: boolean
+         description: Enable or disable authentication
+       api_keys:
+         type: object
+         description: API key registry
+         additionalProperties:
+           type: object
+           additionalProperties: false
+           required:
+             - hash
+           properties:
+             hash:
+               type: string
+               description: SHA256 hash of the API key
+             admin:
+               type: boolean
+               default: false
+             roles:
+               type: array
+               items:
+                 type: string
+             permissions:
+               type: object
+               additionalProperties:
+                 type: boolean
+   server:
+     type: object
+     additionalProperties: false
+     required: [address]
+     properties:
+       address:
+         type: string
+         description: Listen address, e.g. ":9101"
+       tls:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+             default: false
+           cert_file:
+             type: string
+           key_file:
+             type: string
+   monitoring:
+     type: object
+     additionalProperties: false
+     properties:
+       prometheus:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+           port:
+             type: integer
+             minimum: 1
+             maximum: 65535
+           path:
+             type: string
+       health_checks:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+           interval:
+             type: string
+   database:
+     type: object
+     additionalProperties: false
+     properties:
+       type:
+         type: string
+         enum: [sqlite, postgres, mysql]
+         default: sqlite
+       connection:
+         type: string
+       host:
+         type: string
+       port:
+         type: integer
+         minimum: 1
+         maximum: 65535
+       username:
+         type: string
+       password:
+         type: string
+       database:
+         type: string
+   redis:
+     type: object
+     additionalProperties: false
+     properties:
+       url:
+         type: string
+         pattern: "^redis://"
+       addr:
+         type: string
+         description: Optional host:port shorthand for Redis
+       password:
+         type: string
+       db:
+         type: integer
+         minimum: 0
+         default: 0
+   queue:
+     type: object
+     additionalProperties: false
+     properties:
+       backend:
+         type: string
+         enum: [redis, sqlite, filesystem]
+         default: redis
+       sqlite_path:
+         type: string
+       filesystem_path:
+         type: string
+       fallback_to_filesystem:
+         type: boolean
+         default: false
+   logging:
+     type: object
+     additionalProperties: false
+     properties:
+       level:
+         type: string
+         enum: [debug, info, warn, error]
+         default: "info"
+       file:
+         type: string
+       audit_log:
+         type: string
+   security:
+     type: object
+     additionalProperties: false
+     properties:
+       production_mode:
+         type: boolean
+         default: false
+       allowed_origins:
+         type: array
+         items:
+           type: string
+       api_key_rotation_days:
+         type: integer
+         minimum: 0
+       audit_logging:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+           log_path:
+             type: string
+       ip_whitelist:
+         type: array
+         items:
+           type: string
+       failed_login_lockout:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+           max_attempts:
+             type: integer
+             minimum: 1
+           lockout_duration:
+             type: string
+             description: Duration string, e.g. "15m"
+       rate_limit:
+         type: object
+         additionalProperties: false
+         properties:
+           enabled:
+             type: boolean
+             default: false
+           requests_per_minute:
+             type: integer
+             minimum: 1
+             default: 60
+           burst_size:
+             type: integer
+             minimum: 1
+   resources:
+     type: object
+     description: Resource configuration
+     additionalProperties: false
+     properties:
+       max_workers:
+         type: integer
+         minimum: 1
+         default: 1
+       desired_rps_per_worker:
+         type: integer
+         minimum: 1
+       requests_per_sec:
+         type: integer
+         minimum: 1
+       podman_cpus:
+         type: string
+       podman_memory:
+         type: string
+       request_burst:
+         type: integer
+         minimum: 0
--- a/configs/schema/worker_config_schema.yaml
+++ b/configs/schema/worker_config_schema.yaml
@ -17,6 +17,17 @@ allOf:
      properties:
        queue:
          required: [sqlite_path]
+  - if:
+      properties:
+        queue:
+          properties:
+            backend:
+              const: filesystem
+      required: [queue]
+    then:
+      properties:
+        queue:
+          required: [filesystem_path]
    else:
      anyOf:
        - required: [redis_addr]
@ -70,11 +81,18 @@ properties:
    properties:
      backend:
        type: string
-        enum: [redis, sqlite]
+        enum: [redis, sqlite, filesystem]
        default: redis
      sqlite_path:
        type: string
        description: Path to queue.db (sqlite backend only)
+      filesystem_path:
+        type: string
+        description: Base directory for filesystem queue state
+      fallback_to_filesystem:
+        type: boolean
+        default: false
+        description: If true, fall back to filesystem queue when primary backend is unavailable
  known_hosts:
    type: string
    description: Path to SSH known hosts file
--- a/configs/workers/docker-prod.yaml
+++ b/configs/workers/docker-prod.yaml
@ -18,8 +18,8 @@ data_dir: "/data/active"

 snapshot_store:
  enabled: true
-  endpoint: "minio:9000"
-  secure: false
+  endpoint: "blizzard.jfraeys.com"
+  secure: true
  bucket: "fetchml-snapshots"
  prefix: "snapshots"
  timeout: "5m"
--- a/deployments/Caddyfile.prod
+++ b/deployments/Caddyfile.prod
@ -1,47 +0,0 @@
-{
-	email {$CADDY_EMAIL}
-	admin off
-	servers {
-		protocols h1 h2
-	}
-}
-
-{$FETCHML_DOMAIN} {
-	encode gzip
-
-	request_body {
-		max_size 10MB
-	}
-
-	header {
-		-Server
-		X-Frame-Options "DENY"
-		X-Content-Type-Options "nosniff"
-		Referrer-Policy "strict-origin-when-cross-origin"
-		Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
-		Content-Security-Policy "default-src 'self'; base-uri 'self'; frame-ancestors 'none'"
-	}
-
-	@admin path /admin/*
-	@admin_private remote_ip private_ranges
-	handle @admin {
-		respond @admin_private 404
-		respond 404
-	}
-
-	handle /health {
-		reverse_proxy api-server:9101
-	}
-
-	handle /ws* {
-		reverse_proxy api-server:9101
-	}
-
-	handle /api/* {
-		reverse_proxy api-server:9101
-	}
-
-	handle {
-		respond 404
-	}
-}
--- a/deployments/deploy.sh
+++ b/deployments/deploy.sh
@ -3,6 +3,10 @@

 set -e

+ SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+ REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
+ export FETCHML_REPO_ROOT="${FETCHML_REPO_ROOT:-${REPO_ROOT}}"
+
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -56,13 +60,13 @@ check_compose_file() {
    
    case $env in
        "dev")
-            compose_file="deployments/docker-compose.dev.yml"
+            compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.dev.yml"
            ;;
        "secure")
-            compose_file="deployments/docker-compose.homelab-secure.yml"
+            compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.homelab-secure.yml"
            ;;
        "prod")
-            compose_file="deployments/docker-compose.prod.yml"
+            compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.prod.yml"
            ;;
        *)
            print_error "Unknown environment: $env"
@ -83,14 +87,14 @@ check_compose_file() {
 check_env_file() {
    local env=$1
    
-    if [ ! -f ".env" ]; then
+    if [ ! -f "${FETCHML_REPO_ROOT}/.env" ]; then
        print_warning ".env file not found. Creating from example..."
        if [ "$env" = "dev" ]; then
-            cp deployments/env.dev.example .env
+            cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
        elif [ "$env" = "prod" ]; then
-            cp deployments/env.prod.example .env
+            cp "${FETCHML_REPO_ROOT}/deployments/env.prod.example" "${FETCHML_REPO_ROOT}/.env"
        else
-            cp deployments/env.dev.example .env
+            cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
        fi
        print_warning "Please edit .env file with your configuration"
    fi
@ -120,7 +124,7 @@ main() {
    case $action in
        "up")
            print_status "Starting $environment environment..."
-            docker-compose -f "$compose_file" up -d
+            docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" up -d
            print_success "$environment environment started successfully!"
            
            # Show service URLs
@ -134,21 +138,21 @@ main() {
            ;;
        "down")
            print_status "Stopping $environment environment..."
-            docker-compose -f "$compose_file" down
+            docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" down
            print_success "$environment environment stopped successfully!"
            ;;
        "restart")
            print_status "Restarting $environment environment..."
-            docker-compose -f "$compose_file" restart
+            docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" restart
            print_success "$environment environment restarted successfully!"
            ;;
        "logs")
            print_status "Showing logs for $environment environment..."
-            docker-compose -f "$compose_file" logs -f
+            docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" logs -f
            ;;
        "status")
            print_status "Status of $environment environment:"
-            docker-compose -f "$compose_file" ps
+            docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" ps
            ;;
        *)
            print_error "Unknown action: $action"
--- a/deployments/docker-compose.dev.yml
+++ b/deployments/docker-compose.dev.yml
@ -1,6 +1,6 @@
+---
 # Homelab Docker Compose with Centralized Monitoring
 # Includes: API, Redis, Prometheus, Grafana, Loki
-
 services:
  caddy:
    image: caddy:2-alpine
@ -16,7 +16,6 @@ services:
    depends_on:
      api-server:
        condition: service_healthy
-
  redis:
    image: redis:7-alpine
    container_name: ml-experiments-redis
@ -28,19 +27,20 @@ services:
    restart: unless-stopped
    command: redis-server --appendonly yes
    healthcheck:
-      test: [ "CMD", "redis-cli", "ping" ]
+      test: ["CMD", "redis-cli", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3
-
  api-server:
    build:
      context: ${FETCHML_REPO_ROOT:-.}
      dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
    container_name: ml-experiments-api
    user: "0:0"
+    ports:
+      - "9101:9101"
    expose:
-      - "9101"  # API and health endpoints (internal; external access via Caddy)
+      - "9101" # API and health endpoints (internal; external access via Caddy)
    volumes:
      - ${FETCHML_REPO_ROOT:-.}/data/dev/logs:/logs
      - ${FETCHML_REPO_ROOT:-.}/data/dev/experiments:/data/experiments
@ -55,7 +55,7 @@ services:
    environment:
      - LOG_LEVEL=info
    healthcheck:
-      test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
+      test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
      interval: 30s
      timeout: 10s
      retries: 3
@ -63,7 +63,6 @@ services:
    labels:
      logging: "promtail"
      job: "api-server"
-
  minio:
    image: minio/minio:latest
    container_name: ml-experiments-minio
@ -82,7 +81,6 @@ services:
      timeout: 5s
      retries: 10
    restart: unless-stopped
-
  minio-init:
    image: alpine:3.19
    container_name: ml-experiments-minio-init
@ -120,7 +118,6 @@ services:
        SNAP_SHA=$$(echo -n "$$FILE_SHA" | sha256sum | cut -d' ' -f1)
        echo "snapshot_id=snap-1 snapshot_sha256=$$SNAP_SHA"
    restart: "no"
-
  worker:
    build:
      context: ${FETCHML_REPO_ROOT:-.}
@ -148,60 +145,58 @@ services:
      - LOG_LEVEL=info
      - MINIO_ROOT_USER=minioadmin
      - MINIO_ROOT_PASSWORD=minioadmin123
-      - FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/base-notebook:latest
+      - FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/minimal-notebook:latest
      - FETCHML_JUPYTER_CONDA_ENV=base
      - FETCHML_JUPYTER_KERNEL_NAME=python
      - FETCHML_PODMAN_CGROUPS=disabled
    privileged: true
    command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
-
-  # Prometheus - Metrics collection
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: ml-experiments-prometheus
-    ports:
-      - "9090:9090"
-    volumes:
-      - ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
-      - prometheus_data:/prometheus
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-      - '--storage.tsdb.path=/prometheus'
-      - '--web.console.libraries=/etc/prometheus/console_libraries'
-      - '--web.console.templates=/etc/prometheus/consoles'
-      - '--web.enable-lifecycle'
-    restart: unless-stopped
-
-  # Grafana - Visualization
-  grafana:
-    image: grafana/grafana:latest
-    container_name: ml-experiments-grafana
-    ports:
-      - "3000:3000"
-    volumes:
-      - grafana_data:/var/lib/grafana
-      - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
-      - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
-    environment:
-      - GF_SECURITY_ADMIN_PASSWORD=admin123
-      - GF_USERS_ALLOW_SIGN_UP=false
-    restart: unless-stopped
-    depends_on:
-      - prometheus
-      - loki
-
-  # Loki - Log aggregation
-  loki:
-    image: grafana/loki:latest
-    container_name: ml-experiments-loki
-    ports:
-      - "3100:3100"
-    volumes:
-      - ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
-      - loki_data:/loki
-    command: -config.file=/etc/loki/local-config.yaml
-    restart: unless-stopped
-
+    # # Prometheus - Metrics collection
+    # prometheus:
+    #   image: prom/prometheus:latest
+    #   container_name: ml-experiments-prometheus
+    #   ports:
+    #     - "9090:9090"
+    #   volumes:
+    #     - ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+    #     - prometheus_data:/prometheus
+    #   command:
+    #     - '--config.file=/etc/prometheus/prometheus.yml'
+    #     - '--storage.tsdb.path=/prometheus'
+    #     - '--web.console.libraries=/etc/prometheus/console_libraries'
+    #     - '--web.console.templates=/etc/prometheus/consoles'
+    #     - '--web.enable-lifecycle'
+    #   restart: unless-stopped
+    #
+    # # Grafana - Visualization
+    # grafana:
+    #   image: grafana/grafana:latest
+    #   container_name: ml-experiments-grafana
+    #   ports:
+    #     - "3000:3000"
+    #   volumes:
+    #     - grafana_data:/var/lib/grafana
+    #     - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
+    #     - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
+    #   environment:
+    #     - GF_SECURITY_ADMIN_PASSWORD=admin123
+    #     - GF_USERS_ALLOW_SIGN_UP=false
+    #   restart: unless-stopped
+    #   depends_on:
+    #     - prometheus
+    #     - loki
+    #
+    # # Loki - Log aggregation
+    # loki:
+    #   image: grafana/loki:latest
+    #   container_name: ml-experiments-loki
+    #   ports:
+    #     - "3100:3100"
+    #   volumes:
+    #     - ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
+    #     - loki_data:/loki
+    #   command: -config.file=/etc/loki/local-config.yaml
+    # restart: unless-stopped
  # Promtail - Log collector
  promtail:
    image: grafana/promtail:latest
@ -215,7 +210,6 @@ services:
    restart: unless-stopped
    depends_on:
      - loki
-
 volumes:
  prometheus_data:
    driver: local
--- a/deployments/docker-compose.prod.yml
+++ b/deployments/docker-compose.prod.yml
@ -1,23 +1,5 @@
 # Full Production Docker Environment with Podman and SQLite
 services:
-  caddy:
-    image: caddy:2-alpine
-    container_name: ml-prod-caddy
-    restart: unless-stopped
-    ports:
-      - "80:80"
-      - "443:443"
-    volumes:
-      - ./Caddyfile.prod:/etc/caddy/Caddyfile:ro
-      - ${FETCHML_REPO_ROOT:-.}/data/prod/caddy/data:/data
-      - ${FETCHML_REPO_ROOT:-.}/data/prod/caddy/config:/config
-    environment:
-      - FETCHML_DOMAIN=${FETCHML_DOMAIN}
-      - CADDY_EMAIL=${CADDY_EMAIL}
-    depends_on:
-      api-server:
-        condition: service_healthy
-
  redis:
    image: redis:7-alpine
    container_name: ml-prod-redis
@ -40,8 +22,8 @@ services:
      dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/secure-prod.Dockerfile
    container_name: ml-prod-api
    expose:
-      - "9101"  # API server port (internal; external access via Caddy)
-      - "2222"  # Secure SSH port for Podman communication (internal)
+      - "9101"
+      - "2222"
    volumes:
      - ${FETCHML_REPO_ROOT:-.}/data/prod/experiments:/app/data/experiments
      - ${FETCHML_REPO_ROOT:-.}/data/prod/active:/data/active
@ -59,37 +41,17 @@ services:
      timeout: 10s
      retries: 3
      start_period: 40s
-    # Start API server (ensure data_dir exists for snapshot/dataset validation)
    command: ["/bin/sh", "-c", "mkdir -p /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/prod.yaml"]
-
-  minio:
-    image: minio/minio:latest
-    container_name: ml-prod-minio
-    expose:
-      - "9000"
-      - "9001"
-    volumes:
-      - ${FETCHML_REPO_ROOT:-.}/data/prod/minio:/data
-    environment:
-      - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
-      - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
-    command: ["server", "/data", "--console-address", ":9001"]
-    restart: unless-stopped
-
-  minio-init:
-    image: alpine:3.19
-    container_name: ml-prod-minio-init
-    depends_on:
-      - minio
-    entrypoint: ["/bin/sh", "-c"]
-    command:
-      - |
-        apk add --no-cache ca-certificates curl >/dev/null
-        curl -fsSL -o /usr/local/bin/mc https://dl.min.io/client/mc/release/linux-amd64/mc
-        chmod +x /usr/local/bin/mc
-        mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123}
-        mc mb -p local/fetchml-snapshots || true
-    restart: "no"
+    labels:
+      - "traefik.enable=true"
+      - "traefik.docker.network=${TRAEFIK_NETWORK:-traefik}"
+      - "traefik.http.services.fetchml.loadbalancer.server.port=9101"
+      - "traefik.http.routers.fetchml.rule=Host(`${FETCHML_DOMAIN}`) && (PathPrefix(`/api`) || PathPrefix(`/ws`) || Path(`/health`))"
+      - "traefik.http.routers.fetchml.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
+      - "traefik.http.routers.fetchml.tls=true"
+    networks:
+      - default
+      - traefik

  worker:
    build:
@ -106,14 +68,14 @@ services:
        condition: service_healthy
      api-server:
        condition: service_healthy
-      minio-init:
-        condition: service_started
    restart: unless-stopped
    environment:
      - LOG_LEVEL=info
-      - MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
-      - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
-    privileged: true  # Required for Podman to work in Docker
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+      - AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}
+      - AWS_REGION=${AWS_REGION}
+    privileged: true
    command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]

 volumes: {}
@ -121,3 +83,6 @@ volumes: {}
 networks:
  default:
    name: ml-prod-network
+  traefik:
+    external: true
+    name: ${TRAEFIK_NETWORK:-traefik}