chore(config): update configurations and deployment scripts
- Update API server and worker config schemas - Refine Docker Compose configurations (dev/prod) - Update deployment scripts and documentation
This commit is contained in:
parent
5144d291cb
commit
2209ae24c6
7 changed files with 342 additions and 402 deletions
|
|
@ -1,227 +1,233 @@
|
|||
# Fetch ML Configuration Schema (JSON Schema expressed as YAML)
|
||||
|
||||
$schema: "http://json-schema.org/draft-07/schema#"
|
||||
title: "Fetch ML API Server Configuration"
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- auth
|
||||
- server
|
||||
properties:
|
||||
base_path:
|
||||
type: string
|
||||
description: Base path for experiment data
|
||||
default: "/tmp/ml-experiments"
|
||||
data_dir:
|
||||
type: string
|
||||
description: Data directory (datasets/snapshots) for integrity validation
|
||||
default: "/data/active"
|
||||
auth:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- enabled
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
description: Enable or disable authentication
|
||||
api_keys:
|
||||
type: object
|
||||
description: API key registry
|
||||
additionalProperties:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- hash
|
||||
properties:
|
||||
hash:
|
||||
type: string
|
||||
description: SHA256 hash of the API key
|
||||
admin:
|
||||
type: boolean
|
||||
default: false
|
||||
roles:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
permissions:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: boolean
|
||||
server:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required: [address]
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
description: Listen address, e.g. ":9101"
|
||||
tls:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
default: false
|
||||
cert_file:
|
||||
type: string
|
||||
key_file:
|
||||
type: string
|
||||
monitoring:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
prometheus:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
port:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 65535
|
||||
path:
|
||||
type: string
|
||||
health_checks:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
interval:
|
||||
type: string
|
||||
database:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum: [sqlite, postgres, mysql]
|
||||
default: sqlite
|
||||
connection:
|
||||
type: string
|
||||
host:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 65535
|
||||
username:
|
||||
type: string
|
||||
password:
|
||||
type: string
|
||||
database:
|
||||
type: string
|
||||
redis:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
pattern: "^redis://"
|
||||
addr:
|
||||
type: string
|
||||
description: Optional host:port shorthand for Redis
|
||||
password:
|
||||
type: string
|
||||
db:
|
||||
type: integer
|
||||
minimum: 0
|
||||
default: 0
|
||||
queue:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
backend:
|
||||
type: string
|
||||
enum: [redis, sqlite]
|
||||
default: redis
|
||||
sqlite_path:
|
||||
type: string
|
||||
logging:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
level:
|
||||
type: string
|
||||
enum: [debug, info, warn, error]
|
||||
default: "info"
|
||||
file:
|
||||
type: string
|
||||
audit_log:
|
||||
type: string
|
||||
security:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
production_mode:
|
||||
type: boolean
|
||||
default: false
|
||||
allowed_origins:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
api_key_rotation_days:
|
||||
type: integer
|
||||
minimum: 0
|
||||
audit_logging:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
log_path:
|
||||
type: string
|
||||
ip_whitelist:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
failed_login_lockout:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
max_attempts:
|
||||
type: integer
|
||||
minimum: 1
|
||||
lockout_duration:
|
||||
type: string
|
||||
description: Duration string, e.g. "15m"
|
||||
rate_limit:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
default: false
|
||||
requests_per_minute:
|
||||
type: integer
|
||||
minimum: 1
|
||||
default: 60
|
||||
burst_size:
|
||||
type: integer
|
||||
minimum: 1
|
||||
resources:
|
||||
type: object
|
||||
description: Resource configuration
|
||||
additionalProperties: false
|
||||
properties:
|
||||
max_workers:
|
||||
type: integer
|
||||
minimum: 1
|
||||
default: 1
|
||||
desired_rps_per_worker:
|
||||
type: integer
|
||||
minimum: 1
|
||||
requests_per_sec:
|
||||
type: integer
|
||||
minimum: 1
|
||||
podman_cpus:
|
||||
type: string
|
||||
podman_memory:
|
||||
type: string
|
||||
request_burst:
|
||||
type: integer
|
||||
minimum: 0
|
||||
# Fetch ML Configuration Schema (JSON Schema expressed as YAML)
|
||||
|
||||
$schema: "http://json-schema.org/draft-07/schema#"
|
||||
title: "Fetch ML API Server Configuration"
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- auth
|
||||
- server
|
||||
properties:
|
||||
base_path:
|
||||
type: string
|
||||
description: Base path for experiment data
|
||||
default: "/tmp/ml-experiments"
|
||||
data_dir:
|
||||
type: string
|
||||
description: Data directory (datasets/snapshots) for integrity validation
|
||||
default: "/data/active"
|
||||
auth:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- enabled
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
description: Enable or disable authentication
|
||||
api_keys:
|
||||
type: object
|
||||
description: API key registry
|
||||
additionalProperties:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- hash
|
||||
properties:
|
||||
hash:
|
||||
type: string
|
||||
description: SHA256 hash of the API key
|
||||
admin:
|
||||
type: boolean
|
||||
default: false
|
||||
roles:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
permissions:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: boolean
|
||||
server:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
required: [address]
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
description: Listen address, e.g. ":9101"
|
||||
tls:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
default: false
|
||||
cert_file:
|
||||
type: string
|
||||
key_file:
|
||||
type: string
|
||||
monitoring:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
prometheus:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
port:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 65535
|
||||
path:
|
||||
type: string
|
||||
health_checks:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
interval:
|
||||
type: string
|
||||
database:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
enum: [sqlite, postgres, mysql]
|
||||
default: sqlite
|
||||
connection:
|
||||
type: string
|
||||
host:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
minimum: 1
|
||||
maximum: 65535
|
||||
username:
|
||||
type: string
|
||||
password:
|
||||
type: string
|
||||
database:
|
||||
type: string
|
||||
redis:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
pattern: "^redis://"
|
||||
addr:
|
||||
type: string
|
||||
description: Optional host:port shorthand for Redis
|
||||
password:
|
||||
type: string
|
||||
db:
|
||||
type: integer
|
||||
minimum: 0
|
||||
default: 0
|
||||
queue:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
backend:
|
||||
type: string
|
||||
enum: [redis, sqlite, filesystem]
|
||||
default: redis
|
||||
sqlite_path:
|
||||
type: string
|
||||
filesystem_path:
|
||||
type: string
|
||||
fallback_to_filesystem:
|
||||
type: boolean
|
||||
default: false
|
||||
logging:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
level:
|
||||
type: string
|
||||
enum: [debug, info, warn, error]
|
||||
default: "info"
|
||||
file:
|
||||
type: string
|
||||
audit_log:
|
||||
type: string
|
||||
security:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
production_mode:
|
||||
type: boolean
|
||||
default: false
|
||||
allowed_origins:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
api_key_rotation_days:
|
||||
type: integer
|
||||
minimum: 0
|
||||
audit_logging:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
log_path:
|
||||
type: string
|
||||
ip_whitelist:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
failed_login_lockout:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
max_attempts:
|
||||
type: integer
|
||||
minimum: 1
|
||||
lockout_duration:
|
||||
type: string
|
||||
description: Duration string, e.g. "15m"
|
||||
rate_limit:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
enabled:
|
||||
type: boolean
|
||||
default: false
|
||||
requests_per_minute:
|
||||
type: integer
|
||||
minimum: 1
|
||||
default: 60
|
||||
burst_size:
|
||||
type: integer
|
||||
minimum: 1
|
||||
resources:
|
||||
type: object
|
||||
description: Resource configuration
|
||||
additionalProperties: false
|
||||
properties:
|
||||
max_workers:
|
||||
type: integer
|
||||
minimum: 1
|
||||
default: 1
|
||||
desired_rps_per_worker:
|
||||
type: integer
|
||||
minimum: 1
|
||||
requests_per_sec:
|
||||
type: integer
|
||||
minimum: 1
|
||||
podman_cpus:
|
||||
type: string
|
||||
podman_memory:
|
||||
type: string
|
||||
request_burst:
|
||||
type: integer
|
||||
minimum: 0
|
||||
|
|
|
|||
|
|
@ -17,6 +17,17 @@ allOf:
|
|||
properties:
|
||||
queue:
|
||||
required: [sqlite_path]
|
||||
- if:
|
||||
properties:
|
||||
queue:
|
||||
properties:
|
||||
backend:
|
||||
const: filesystem
|
||||
required: [queue]
|
||||
then:
|
||||
properties:
|
||||
queue:
|
||||
required: [filesystem_path]
|
||||
else:
|
||||
anyOf:
|
||||
- required: [redis_addr]
|
||||
|
|
@ -70,11 +81,18 @@ properties:
|
|||
properties:
|
||||
backend:
|
||||
type: string
|
||||
enum: [redis, sqlite]
|
||||
enum: [redis, sqlite, filesystem]
|
||||
default: redis
|
||||
sqlite_path:
|
||||
type: string
|
||||
description: Path to queue.db (sqlite backend only)
|
||||
filesystem_path:
|
||||
type: string
|
||||
description: Base directory for filesystem queue state
|
||||
fallback_to_filesystem:
|
||||
type: boolean
|
||||
default: false
|
||||
description: If true, fall back to filesystem queue when primary backend is unavailable
|
||||
known_hosts:
|
||||
type: string
|
||||
description: Path to SSH known hosts file
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ data_dir: "/data/active"
|
|||
|
||||
snapshot_store:
|
||||
enabled: true
|
||||
endpoint: "minio:9000"
|
||||
secure: false
|
||||
endpoint: "blizzard.jfraeys.com"
|
||||
secure: true
|
||||
bucket: "fetchml-snapshots"
|
||||
prefix: "snapshots"
|
||||
timeout: "5m"
|
||||
|
|
|
|||
|
|
@ -1,47 +0,0 @@
|
|||
{
|
||||
email {$CADDY_EMAIL}
|
||||
admin off
|
||||
servers {
|
||||
protocols h1 h2
|
||||
}
|
||||
}
|
||||
|
||||
{$FETCHML_DOMAIN} {
|
||||
encode gzip
|
||||
|
||||
request_body {
|
||||
max_size 10MB
|
||||
}
|
||||
|
||||
header {
|
||||
-Server
|
||||
X-Frame-Options "DENY"
|
||||
X-Content-Type-Options "nosniff"
|
||||
Referrer-Policy "strict-origin-when-cross-origin"
|
||||
Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
|
||||
Content-Security-Policy "default-src 'self'; base-uri 'self'; frame-ancestors 'none'"
|
||||
}
|
||||
|
||||
@admin path /admin/*
|
||||
@admin_private remote_ip private_ranges
|
||||
handle @admin {
|
||||
respond @admin_private 404
|
||||
respond 404
|
||||
}
|
||||
|
||||
handle /health {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle /ws* {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle /api/* {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle {
|
||||
respond 404
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,10 @@
|
|||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
|
||||
export FETCHML_REPO_ROOT="${FETCHML_REPO_ROOT:-${REPO_ROOT}}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
|
|
@ -56,13 +60,13 @@ check_compose_file() {
|
|||
|
||||
case $env in
|
||||
"dev")
|
||||
compose_file="deployments/docker-compose.dev.yml"
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.dev.yml"
|
||||
;;
|
||||
"secure")
|
||||
compose_file="deployments/docker-compose.homelab-secure.yml"
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.homelab-secure.yml"
|
||||
;;
|
||||
"prod")
|
||||
compose_file="deployments/docker-compose.prod.yml"
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.prod.yml"
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown environment: $env"
|
||||
|
|
@ -83,14 +87,14 @@ check_compose_file() {
|
|||
check_env_file() {
|
||||
local env=$1
|
||||
|
||||
if [ ! -f ".env" ]; then
|
||||
if [ ! -f "${FETCHML_REPO_ROOT}/.env" ]; then
|
||||
print_warning ".env file not found. Creating from example..."
|
||||
if [ "$env" = "dev" ]; then
|
||||
cp deployments/env.dev.example .env
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
elif [ "$env" = "prod" ]; then
|
||||
cp deployments/env.prod.example .env
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.prod.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
else
|
||||
cp deployments/env.dev.example .env
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
fi
|
||||
print_warning "Please edit .env file with your configuration"
|
||||
fi
|
||||
|
|
@ -120,7 +124,7 @@ main() {
|
|||
case $action in
|
||||
"up")
|
||||
print_status "Starting $environment environment..."
|
||||
docker-compose -f "$compose_file" up -d
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" up -d
|
||||
print_success "$environment environment started successfully!"
|
||||
|
||||
# Show service URLs
|
||||
|
|
@ -134,21 +138,21 @@ main() {
|
|||
;;
|
||||
"down")
|
||||
print_status "Stopping $environment environment..."
|
||||
docker-compose -f "$compose_file" down
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" down
|
||||
print_success "$environment environment stopped successfully!"
|
||||
;;
|
||||
"restart")
|
||||
print_status "Restarting $environment environment..."
|
||||
docker-compose -f "$compose_file" restart
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" restart
|
||||
print_success "$environment environment restarted successfully!"
|
||||
;;
|
||||
"logs")
|
||||
print_status "Showing logs for $environment environment..."
|
||||
docker-compose -f "$compose_file" logs -f
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" logs -f
|
||||
;;
|
||||
"status")
|
||||
print_status "Status of $environment environment:"
|
||||
docker-compose -f "$compose_file" ps
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" ps
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown action: $action"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
# Homelab Docker Compose with Centralized Monitoring
|
||||
# Includes: API, Redis, Prometheus, Grafana, Loki
|
||||
|
||||
services:
|
||||
caddy:
|
||||
image: caddy:2-alpine
|
||||
|
|
@ -16,7 +16,6 @@ services:
|
|||
depends_on:
|
||||
api-server:
|
||||
condition: service_healthy
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: ml-experiments-redis
|
||||
|
|
@ -28,19 +27,20 @@ services:
|
|||
restart: unless-stopped
|
||||
command: redis-server --appendonly yes
|
||||
healthcheck:
|
||||
test: [ "CMD", "redis-cli", "ping" ]
|
||||
test: ["CMD", "redis-cli", "ping"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
api-server:
|
||||
build:
|
||||
context: ${FETCHML_REPO_ROOT:-.}
|
||||
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/simple.Dockerfile
|
||||
container_name: ml-experiments-api
|
||||
user: "0:0"
|
||||
ports:
|
||||
- "9101:9101"
|
||||
expose:
|
||||
- "9101" # API and health endpoints (internal; external access via Caddy)
|
||||
- "9101" # API and health endpoints (internal; external access via Caddy)
|
||||
volumes:
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/dev/logs:/logs
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/dev/experiments:/data/experiments
|
||||
|
|
@ -55,7 +55,7 @@ services:
|
|||
environment:
|
||||
- LOG_LEVEL=info
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:9101/health" ]
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9101/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
|
@ -63,7 +63,6 @@ services:
|
|||
labels:
|
||||
logging: "promtail"
|
||||
job: "api-server"
|
||||
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
container_name: ml-experiments-minio
|
||||
|
|
@ -82,7 +81,6 @@ services:
|
|||
timeout: 5s
|
||||
retries: 10
|
||||
restart: unless-stopped
|
||||
|
||||
minio-init:
|
||||
image: alpine:3.19
|
||||
container_name: ml-experiments-minio-init
|
||||
|
|
@ -120,7 +118,6 @@ services:
|
|||
SNAP_SHA=$$(echo -n "$$FILE_SHA" | sha256sum | cut -d' ' -f1)
|
||||
echo "snapshot_id=snap-1 snapshot_sha256=$$SNAP_SHA"
|
||||
restart: "no"
|
||||
|
||||
worker:
|
||||
build:
|
||||
context: ${FETCHML_REPO_ROOT:-.}
|
||||
|
|
@ -148,60 +145,58 @@ services:
|
|||
- LOG_LEVEL=info
|
||||
- MINIO_ROOT_USER=minioadmin
|
||||
- MINIO_ROOT_PASSWORD=minioadmin123
|
||||
- FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/base-notebook:latest
|
||||
- FETCHML_JUPYTER_DEFAULT_IMAGE=quay.io/jupyter/minimal-notebook:latest
|
||||
- FETCHML_JUPYTER_CONDA_ENV=base
|
||||
- FETCHML_JUPYTER_KERNEL_NAME=python
|
||||
- FETCHML_PODMAN_CGROUPS=disabled
|
||||
privileged: true
|
||||
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
|
||||
|
||||
# Prometheus - Metrics collection
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: ml-experiments-prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
- '--web.console.templates=/etc/prometheus/consoles'
|
||||
- '--web.enable-lifecycle'
|
||||
restart: unless-stopped
|
||||
|
||||
# Grafana - Visualization
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: ml-experiments-grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
||||
- ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin123
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- prometheus
|
||||
- loki
|
||||
|
||||
# Loki - Log aggregation
|
||||
loki:
|
||||
image: grafana/loki:latest
|
||||
container_name: ml-experiments-loki
|
||||
ports:
|
||||
- "3100:3100"
|
||||
volumes:
|
||||
- ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
|
||||
- loki_data:/loki
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
restart: unless-stopped
|
||||
|
||||
# # Prometheus - Metrics collection
|
||||
# prometheus:
|
||||
# image: prom/prometheus:latest
|
||||
# container_name: ml-experiments-prometheus
|
||||
# ports:
|
||||
# - "9090:9090"
|
||||
# volumes:
|
||||
# - ${FETCHML_REPO_ROOT:-.}/monitoring/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
# - prometheus_data:/prometheus
|
||||
# command:
|
||||
# - '--config.file=/etc/prometheus/prometheus.yml'
|
||||
# - '--storage.tsdb.path=/prometheus'
|
||||
# - '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
# - '--web.console.templates=/etc/prometheus/consoles'
|
||||
# - '--web.enable-lifecycle'
|
||||
# restart: unless-stopped
|
||||
#
|
||||
# # Grafana - Visualization
|
||||
# grafana:
|
||||
# image: grafana/grafana:latest
|
||||
# container_name: ml-experiments-grafana
|
||||
# ports:
|
||||
# - "3000:3000"
|
||||
# volumes:
|
||||
# - grafana_data:/var/lib/grafana
|
||||
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/provisioning:/etc/grafana/provisioning
|
||||
# - ${FETCHML_REPO_ROOT:-.}/monitoring/grafana/dashboards:/var/lib/grafana/dashboards
|
||||
# environment:
|
||||
# - GF_SECURITY_ADMIN_PASSWORD=admin123
|
||||
# - GF_USERS_ALLOW_SIGN_UP=false
|
||||
# restart: unless-stopped
|
||||
# depends_on:
|
||||
# - prometheus
|
||||
# - loki
|
||||
#
|
||||
# # Loki - Log aggregation
|
||||
# loki:
|
||||
# image: grafana/loki:latest
|
||||
# container_name: ml-experiments-loki
|
||||
# ports:
|
||||
# - "3100:3100"
|
||||
# volumes:
|
||||
# - ${FETCHML_REPO_ROOT:-.}/monitoring/loki-config.yml:/etc/loki/local-config.yaml
|
||||
# - loki_data:/loki
|
||||
# command: -config.file=/etc/loki/local-config.yaml
|
||||
# restart: unless-stopped
|
||||
# Promtail - Log collector
|
||||
promtail:
|
||||
image: grafana/promtail:latest
|
||||
|
|
@ -215,7 +210,6 @@ services:
|
|||
restart: unless-stopped
|
||||
depends_on:
|
||||
- loki
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
driver: local
|
||||
|
|
|
|||
|
|
@ -1,23 +1,5 @@
|
|||
# Full Production Docker Environment with Podman and SQLite
|
||||
services:
|
||||
caddy:
|
||||
image: caddy:2-alpine
|
||||
container_name: ml-prod-caddy
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "80:80"
|
||||
- "443:443"
|
||||
volumes:
|
||||
- ./Caddyfile.prod:/etc/caddy/Caddyfile:ro
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/prod/caddy/data:/data
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/prod/caddy/config:/config
|
||||
environment:
|
||||
- FETCHML_DOMAIN=${FETCHML_DOMAIN}
|
||||
- CADDY_EMAIL=${CADDY_EMAIL}
|
||||
depends_on:
|
||||
api-server:
|
||||
condition: service_healthy
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
container_name: ml-prod-redis
|
||||
|
|
@ -40,8 +22,8 @@ services:
|
|||
dockerfile: ${FETCHML_REPO_ROOT:-.}/build/docker/secure-prod.Dockerfile
|
||||
container_name: ml-prod-api
|
||||
expose:
|
||||
- "9101" # API server port (internal; external access via Caddy)
|
||||
- "2222" # Secure SSH port for Podman communication (internal)
|
||||
- "9101"
|
||||
- "2222"
|
||||
volumes:
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/prod/experiments:/app/data/experiments
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/prod/active:/data/active
|
||||
|
|
@ -59,37 +41,17 @@ services:
|
|||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
# Start API server (ensure data_dir exists for snapshot/dataset validation)
|
||||
command: ["/bin/sh", "-c", "mkdir -p /data/active/datasets /data/active/snapshots && exec /usr/local/bin/api-server -config /app/configs/api/prod.yaml"]
|
||||
|
||||
minio:
|
||||
image: minio/minio:latest
|
||||
container_name: ml-prod-minio
|
||||
expose:
|
||||
- "9000"
|
||||
- "9001"
|
||||
volumes:
|
||||
- ${FETCHML_REPO_ROOT:-.}/data/prod/minio:/data
|
||||
environment:
|
||||
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
restart: unless-stopped
|
||||
|
||||
minio-init:
|
||||
image: alpine:3.19
|
||||
container_name: ml-prod-minio-init
|
||||
depends_on:
|
||||
- minio
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
apk add --no-cache ca-certificates curl >/dev/null
|
||||
curl -fsSL -o /usr/local/bin/mc https://dl.min.io/client/mc/release/linux-amd64/mc
|
||||
chmod +x /usr/local/bin/mc
|
||||
mc alias set local http://minio:9000 ${MINIO_ROOT_USER:-minioadmin} ${MINIO_ROOT_PASSWORD:-minioadmin123}
|
||||
mc mb -p local/fetchml-snapshots || true
|
||||
restart: "no"
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network=${TRAEFIK_NETWORK:-traefik}"
|
||||
- "traefik.http.services.fetchml.loadbalancer.server.port=9101"
|
||||
- "traefik.http.routers.fetchml.rule=Host(`${FETCHML_DOMAIN}`) && (PathPrefix(`/api`) || PathPrefix(`/ws`) || Path(`/health`))"
|
||||
- "traefik.http.routers.fetchml.entrypoints=${TRAEFIK_ENTRYPOINT:-websecure}"
|
||||
- "traefik.http.routers.fetchml.tls=true"
|
||||
networks:
|
||||
- default
|
||||
- traefik
|
||||
|
||||
worker:
|
||||
build:
|
||||
|
|
@ -106,14 +68,14 @@ services:
|
|||
condition: service_healthy
|
||||
api-server:
|
||||
condition: service_healthy
|
||||
minio-init:
|
||||
condition: service_started
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- LOG_LEVEL=info
|
||||
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
|
||||
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin123}
|
||||
privileged: true # Required for Podman to work in Docker
|
||||
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
||||
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
||||
- AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN}
|
||||
- AWS_REGION=${AWS_REGION}
|
||||
privileged: true
|
||||
command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
|
||||
|
||||
volumes: {}
|
||||
|
|
@ -121,3 +83,6 @@ volumes: {}
|
|||
networks:
|
||||
default:
|
||||
name: ml-prod-network
|
||||
traefik:
|
||||
external: true
|
||||
name: ${TRAEFIK_NETWORK:-traefik}
|
||||
|
|
|
|||
Loading…
Reference in a new issue