chore(tools): update scripts, native libs, and documentation
Update tooling and documentation: - Smoke test script with scheduler health checks - Release cleanup script - Native test scripts with Redis integration - TUI SSH test script - Performance regression detector with scheduler metrics - Profiler with distributed tracing - Native CMake with test targets - Dataset hash tests - Storage symlink resistance tests - Configuration reference documentation updates
This commit is contained in:
parent
d87c556afa
commit
dddc2913e1
12 changed files with 771 additions and 354 deletions
|
|
@ -10,34 +10,70 @@ This document provides a comprehensive reference for all configuration options i
|
||||||
**File:** `configs/api/dev.yaml`
|
**File:** `configs/api/dev.yaml`
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
base_path: "./data/dev/experiments"
|
||||||
|
data_dir: "./data/dev/active"
|
||||||
|
|
||||||
auth:
|
auth:
|
||||||
enabled: true
|
enabled: false
|
||||||
api_keys:
|
|
||||||
dev_user:
|
|
||||||
hash: "CHANGE_ME_SHA256_DEV_USER_KEY"
|
|
||||||
admin: true
|
|
||||||
roles: ["admin"]
|
|
||||||
permissions:
|
|
||||||
"*": true
|
|
||||||
|
|
||||||
server:
|
server:
|
||||||
address: ":9101"
|
address: "0.0.0.0:9101"
|
||||||
tls:
|
tls:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
cert_file: "/app/ssl/cert.pem"
|
||||||
|
key_file: "/app/ssl/key.pem"
|
||||||
|
|
||||||
security:
|
security:
|
||||||
|
production_mode: false
|
||||||
|
allowed_origins:
|
||||||
|
- "http://localhost:3000"
|
||||||
|
api_key_rotation_days: 90
|
||||||
|
audit_logging:
|
||||||
|
enabled: true
|
||||||
|
log_path: "./data/dev/logs/fetchml-audit.log"
|
||||||
rate_limit:
|
rate_limit:
|
||||||
enabled: false
|
enabled: false
|
||||||
ip_whitelist:
|
requests_per_minute: 60
|
||||||
- "127.0.0.1"
|
burst_size: 10
|
||||||
- "::1"
|
ip_whitelist: []
|
||||||
- "localhost"
|
|
||||||
|
monitoring:
|
||||||
|
prometheus:
|
||||||
|
enabled: true
|
||||||
|
port: 9101
|
||||||
|
path: "/metrics"
|
||||||
|
health_checks:
|
||||||
|
enabled: true
|
||||||
|
interval: "30s"
|
||||||
|
|
||||||
|
redis:
|
||||||
|
addr: "redis:6379"
|
||||||
|
password: ""
|
||||||
|
db: 0
|
||||||
|
|
||||||
|
database:
|
||||||
|
type: "sqlite"
|
||||||
|
connection: "./data/dev/fetchml.sqlite"
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level: "info"
|
||||||
|
file: "./data/dev/logs/fetchml.log"
|
||||||
|
audit_log: "./data/dev/logs/fetchml-audit.log"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 1
|
||||||
|
desired_rps_per_worker: 2
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Multi-User Setup
|
### Multi-User Setup
|
||||||
**File:** `configs/api/multi-user.yaml`
|
**File:** `configs/api/multi-user.yaml`
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
base_path: "/app/data/experiments"
|
||||||
|
data_dir: "/data/active"
|
||||||
|
|
||||||
auth:
|
auth:
|
||||||
enabled: true
|
enabled: true
|
||||||
api_keys:
|
api_keys:
|
||||||
|
|
@ -46,39 +82,87 @@ auth:
|
||||||
admin: true
|
admin: true
|
||||||
roles: ["user", "admin"]
|
roles: ["user", "admin"]
|
||||||
permissions:
|
permissions:
|
||||||
read: true
|
"*": true
|
||||||
write: true
|
|
||||||
delete: true
|
|
||||||
|
|
||||||
researcher1:
|
researcher1:
|
||||||
hash: "CHANGE_ME_SHA256_RESEARCHER1_KEY"
|
hash: "CHANGE_ME_SHA256_RESEARCHER1_KEY"
|
||||||
admin: false
|
admin: false
|
||||||
roles: ["user", "researcher"]
|
roles: ["user", "researcher"]
|
||||||
permissions:
|
permissions:
|
||||||
jobs:read: true
|
"jobs:read": true
|
||||||
jobs:create: true
|
"jobs:create": true
|
||||||
jobs:update: true
|
"jobs:update": true
|
||||||
jobs:delete: false
|
"jobs:delete": false
|
||||||
|
|
||||||
analyst1:
|
analyst1:
|
||||||
hash: "CHANGE_ME_SHA256_ANALYST1_KEY"
|
hash: "CHANGE_ME_SHA256_ANALYST1_KEY"
|
||||||
admin: false
|
admin: false
|
||||||
roles: ["user", "analyst"]
|
roles: ["user", "analyst"]
|
||||||
permissions:
|
permissions:
|
||||||
jobs:read: true
|
"jobs:read": true
|
||||||
jobs:create: false
|
"jobs:create": false
|
||||||
jobs:update: false
|
"jobs:update": false
|
||||||
jobs:delete: false
|
"jobs:delete": false
|
||||||
|
|
||||||
|
server:
|
||||||
|
address: ":9101"
|
||||||
|
tls:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
security:
|
||||||
|
production_mode: false
|
||||||
|
allowed_origins: []
|
||||||
|
rate_limit:
|
||||||
|
enabled: true
|
||||||
|
requests_per_minute: 60
|
||||||
|
burst_size: 20
|
||||||
|
ip_whitelist: []
|
||||||
|
|
||||||
|
monitoring:
|
||||||
|
prometheus:
|
||||||
|
enabled: true
|
||||||
|
port: 9101
|
||||||
|
path: "/metrics"
|
||||||
|
health_checks:
|
||||||
|
enabled: true
|
||||||
|
interval: "30s"
|
||||||
|
|
||||||
|
redis:
|
||||||
|
url: "redis://redis:6379"
|
||||||
|
password: ""
|
||||||
|
db: 0
|
||||||
|
|
||||||
|
database:
|
||||||
|
type: "sqlite"
|
||||||
|
connection: "/app/data/experiments/fetch_ml.sqlite"
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level: "info"
|
||||||
|
file: "/logs/app.log"
|
||||||
|
audit_log: ""
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 3
|
||||||
|
desired_rps_per_worker: 3
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Production
|
### Production
|
||||||
**File:** `configs/api/prod.yaml`
|
**File:** `configs/api/prod.yaml`
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
base_path: "/app/data/prod/experiments"
|
||||||
|
data_dir: "/app/data/prod/active"
|
||||||
|
|
||||||
auth:
|
auth:
|
||||||
enabled: true
|
enabled: true
|
||||||
api_keys:
|
api_keys:
|
||||||
# Production users configured here
|
admin:
|
||||||
|
hash: "replace-with-sha256-of-your-api-key"
|
||||||
|
admin: true
|
||||||
|
roles:
|
||||||
|
- admin
|
||||||
|
permissions:
|
||||||
|
"*": true
|
||||||
|
|
||||||
server:
|
server:
|
||||||
address: ":9101"
|
address: ":9101"
|
||||||
|
|
@ -88,29 +172,270 @@ server:
|
||||||
key_file: "/app/ssl/key.pem"
|
key_file: "/app/ssl/key.pem"
|
||||||
|
|
||||||
security:
|
security:
|
||||||
|
production_mode: false
|
||||||
|
allowed_origins: []
|
||||||
rate_limit:
|
rate_limit:
|
||||||
enabled: true
|
enabled: true
|
||||||
requests_per_minute: 30
|
requests_per_minute: 60
|
||||||
ip_whitelist:
|
burst_size: 10
|
||||||
- "127.0.0.1"
|
ip_whitelist: []
|
||||||
- "::1"
|
|
||||||
- "192.168.0.0/16"
|
monitoring:
|
||||||
- "10.0.0.0/8"
|
prometheus:
|
||||||
|
enabled: true
|
||||||
|
port: 9101
|
||||||
|
path: "/metrics"
|
||||||
|
health_checks:
|
||||||
|
enabled: true
|
||||||
|
interval: "30s"
|
||||||
|
|
||||||
redis:
|
redis:
|
||||||
addr: "redis:6379"
|
addr: "redis:6379"
|
||||||
password: ""
|
password: ""
|
||||||
db: 0
|
db: 0
|
||||||
|
|
||||||
|
database:
|
||||||
|
type: "sqlite"
|
||||||
|
connection: "/app/data/prod/fetch_ml.sqlite"
|
||||||
|
|
||||||
logging:
|
logging:
|
||||||
level: "info"
|
level: "info"
|
||||||
file: "/app/logs/app.log"
|
file: "/app/data/prod/logs/fetch_ml.log"
|
||||||
audit_log: "/app/logs/audit.log"
|
audit_log: "/app/data/prod/logs/audit.log"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 2
|
||||||
|
desired_rps_per_worker: 5
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Homelab Secure
|
||||||
|
**File:** `configs/api/homelab-secure.yaml`
|
||||||
|
|
||||||
|
Secure configuration for homelab deployments with production-grade security settings:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
base_path: "/data/experiments"
|
||||||
|
data_dir: "/data/active"
|
||||||
|
|
||||||
|
auth:
|
||||||
|
enabled: true
|
||||||
|
api_keys:
|
||||||
|
homelab_admin:
|
||||||
|
hash: "CHANGE_ME_SHA256_HOMELAB_ADMIN_KEY"
|
||||||
|
admin: true
|
||||||
|
roles:
|
||||||
|
- admin
|
||||||
|
permissions:
|
||||||
|
"*": true
|
||||||
|
homelab_user:
|
||||||
|
hash: "CHANGE_ME_SHA256_HOMELAB_USER_KEY"
|
||||||
|
admin: false
|
||||||
|
roles:
|
||||||
|
- researcher
|
||||||
|
permissions:
|
||||||
|
experiments: true
|
||||||
|
datasets: true
|
||||||
|
jupyter: true
|
||||||
|
|
||||||
|
server:
|
||||||
|
address: ":9101"
|
||||||
|
tls:
|
||||||
|
enabled: false
|
||||||
|
cert_file: "/app/ssl/cert.pem"
|
||||||
|
key_file: "/app/ssl/key.pem"
|
||||||
|
|
||||||
|
security:
|
||||||
|
production_mode: true
|
||||||
|
allowed_origins:
|
||||||
|
- "https://ml-experiments.example.com"
|
||||||
|
rate_limit:
|
||||||
|
enabled: true
|
||||||
|
requests_per_minute: 60
|
||||||
|
burst_size: 10
|
||||||
|
ip_whitelist:
|
||||||
|
- "127.0.0.1"
|
||||||
|
- "192.168.0.0/16"
|
||||||
|
|
||||||
|
monitoring:
|
||||||
|
prometheus:
|
||||||
|
enabled: true
|
||||||
|
port: 9101
|
||||||
|
path: "/metrics"
|
||||||
|
health_checks:
|
||||||
|
enabled: true
|
||||||
|
interval: "30s"
|
||||||
|
|
||||||
|
redis:
|
||||||
|
url: "redis://:CHANGE_ME_REDIS_PASSWORD@redis:6379"
|
||||||
|
password: ""
|
||||||
|
db: 0
|
||||||
|
|
||||||
|
database:
|
||||||
|
type: "sqlite"
|
||||||
|
connection: "/data/experiments/fetch_ml.sqlite"
|
||||||
|
|
||||||
|
logging:
|
||||||
|
level: "info"
|
||||||
|
file: "/logs/fetch_ml.log"
|
||||||
|
audit_log: ""
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 1
|
||||||
|
desired_rps_per_worker: 2
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Worker Configurations
|
## Worker Configurations
|
||||||
|
|
||||||
### Production Worker
|
### Local Development Worker
|
||||||
|
**File:** `configs/workers/dev-local.yaml`
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
worker_id: "local-worker"
|
||||||
|
base_path: "data/dev/experiments"
|
||||||
|
train_script: "train.py"
|
||||||
|
|
||||||
|
redis_url: "redis://localhost:6379/0"
|
||||||
|
|
||||||
|
local_mode: true
|
||||||
|
|
||||||
|
prewarm_enabled: false
|
||||||
|
|
||||||
|
max_workers: 2
|
||||||
|
poll_interval_seconds: 2
|
||||||
|
|
||||||
|
auto_fetch_data: false
|
||||||
|
|
||||||
|
data_manager_path: "./data_manager"
|
||||||
|
dataset_cache_ttl: "30m"
|
||||||
|
|
||||||
|
data_dir: "data/dev/active"
|
||||||
|
|
||||||
|
snapshot_store:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
podman_image: "python:3.9-slim"
|
||||||
|
container_workspace: "/workspace"
|
||||||
|
container_results: "/results"
|
||||||
|
gpu_devices: []
|
||||||
|
gpu_vendor: "apple"
|
||||||
|
gpu_visible_devices: []
|
||||||
|
|
||||||
|
# Apple M-series GPU configuration
|
||||||
|
apple_gpu:
|
||||||
|
enabled: true
|
||||||
|
metal_device: "/dev/metal"
|
||||||
|
mps_runtime: "/dev/mps"
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 2
|
||||||
|
desired_rps_per_worker: 2
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
queue:
|
||||||
|
type: "native"
|
||||||
|
native:
|
||||||
|
data_dir: "data/dev/queue"
|
||||||
|
|
||||||
|
task_lease_duration: "30m"
|
||||||
|
heartbeat_interval: "1m"
|
||||||
|
max_retries: 3
|
||||||
|
graceful_timeout: "5m"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Homelab Secure Worker
|
||||||
|
**File:** `configs/workers/homelab-secure.yaml`
|
||||||
|
|
||||||
|
Secure worker configuration with snapshot store and Redis authentication:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
worker_id: "homelab-worker"
|
||||||
|
base_path: "/tmp/fetchml-jobs"
|
||||||
|
train_script: "train.py"
|
||||||
|
|
||||||
|
redis_url: "redis://:${REDIS_PASSWORD}@redis:6379/0"
|
||||||
|
|
||||||
|
local_mode: true
|
||||||
|
|
||||||
|
max_workers: 1
|
||||||
|
poll_interval_seconds: 2
|
||||||
|
|
||||||
|
auto_fetch_data: false
|
||||||
|
|
||||||
|
data_manager_path: "./data_manager"
|
||||||
|
dataset_cache_ttl: "30m"
|
||||||
|
|
||||||
|
data_dir: "/data/active"
|
||||||
|
|
||||||
|
snapshot_store:
|
||||||
|
enabled: true
|
||||||
|
endpoint: "minio:9000"
|
||||||
|
secure: false
|
||||||
|
bucket: "fetchml-snapshots"
|
||||||
|
prefix: "snapshots"
|
||||||
|
timeout: "5m"
|
||||||
|
max_retries: 3
|
||||||
|
|
||||||
|
podman_image: "python:3.9-slim"
|
||||||
|
container_workspace: "/workspace"
|
||||||
|
container_results: "/results"
|
||||||
|
gpu_devices: []
|
||||||
|
|
||||||
|
resources:
|
||||||
|
max_workers: 1
|
||||||
|
desired_rps_per_worker: 2
|
||||||
|
podman_cpus: "2"
|
||||||
|
podman_memory: "4Gi"
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
listen_addr: ":9100"
|
||||||
|
metrics_flush_interval: "500ms"
|
||||||
|
|
||||||
|
task_lease_duration: "30m"
|
||||||
|
heartbeat_interval: "1m"
|
||||||
|
max_retries: 3
|
||||||
|
graceful_timeout: "5m"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Development Worker
|
||||||
|
**File:** `configs/workers/docker.yaml`
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
worker_id: "docker-worker"
|
||||||
|
base_path: "/tmp/fetchml-jobs"
|
||||||
|
train_script: "train.py"
|
||||||
|
|
||||||
|
redis_addr: "redis:6379"
|
||||||
|
redis_password: ""
|
||||||
|
redis_db: 0
|
||||||
|
|
||||||
|
local_mode: true
|
||||||
|
|
||||||
|
max_workers: 1
|
||||||
|
poll_interval_seconds: 5
|
||||||
|
|
||||||
|
podman_image: "python:3.9-slim"
|
||||||
|
container_workspace: "/workspace"
|
||||||
|
container_results: "/results"
|
||||||
|
gpu_devices: []
|
||||||
|
gpu_vendor: "none"
|
||||||
|
gpu_visible_devices: []
|
||||||
|
|
||||||
|
metrics:
|
||||||
|
enabled: true
|
||||||
|
listen_addr: ":9100"
|
||||||
|
metrics_flush_interval: "500ms"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Legacy TOML Worker (Deprecated)
|
||||||
**File:** `configs/workers/worker-prod.toml`
|
**File:** `configs/workers/worker-prod.toml`
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
|
|
@ -146,48 +471,57 @@ enabled = true
|
||||||
listen_addr = ":9100"
|
listen_addr = ":9100"
|
||||||
```
|
```
|
||||||
|
|
||||||
```toml
|
## Security Hardening
|
||||||
# Production Worker (NVIDIA, UUID-based GPU selection)
|
|
||||||
worker_id = "worker-prod-01"
|
|
||||||
base_path = "/data/ml-experiments"
|
|
||||||
|
|
||||||
podman_image = "ml-training:latest"
|
### Seccomp Profiles
|
||||||
gpu_vendor = "nvidia"
|
|
||||||
gpu_visible_device_ids = ["GPU-REPLACE_WITH_REAL_UUID"]
|
FetchML includes a hardened seccomp profile for container sandboxing at `configs/seccomp/default-hardened.json`.
|
||||||
gpu_devices = ["/dev/dri"]
|
|
||||||
container_workspace = "/workspace"
|
**Features:**
|
||||||
container_results = "/results"
|
- **Default-deny policy**: `SCMP_ACT_ERRNO` blocks all syscalls by default
|
||||||
train_script = "train.py"
|
- **Allowlist approach**: Only explicitly permitted syscalls are allowed
|
||||||
|
- **Multi-architecture support**: x86_64, x86, aarch64
|
||||||
|
- **Blocked dangerous syscalls**: ptrace, mount, umount2, reboot, kexec_load, open_by_handle_at, perf_event_open
|
||||||
|
|
||||||
|
**Usage with Docker/Podman:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Docker with seccomp
|
||||||
|
docker run --security-opt seccomp=configs/seccomp/default-hardened.json \
|
||||||
|
-v /data:/data:ro \
|
||||||
|
my-image:latest
|
||||||
|
|
||||||
|
# Podman with seccomp
|
||||||
|
podman run --security-opt seccomp=configs/seccomp/default-hardened.json \
|
||||||
|
--read-only \
|
||||||
|
--no-new-privileges \
|
||||||
|
my-image:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
### Docker Worker
|
**Key Allowed Syscalls:**
|
||||||
**File:** `configs/workers/docker.yaml`
|
- File operations: `open`, `openat`, `read`, `write`, `close`
|
||||||
|
- Memory: `mmap`, `munmap`, `mprotect`, `brk`
|
||||||
|
- Process: `clone`, `fork`, `execve`, `exit`, `wait4`
|
||||||
|
- Network: `socket`, `bind`, `listen`, `accept`, `connect`, `sendto`, `recvfrom`
|
||||||
|
- Signals: `rt_sigaction`, `rt_sigprocmask`, `kill`, `tkill`
|
||||||
|
- Time: `clock_gettime`, `gettimeofday`, `nanosleep`
|
||||||
|
- I/O: `epoll_create`, `epoll_ctl`, `epoll_wait`, `poll`, `select`
|
||||||
|
|
||||||
```yaml
|
**Customization:**
|
||||||
worker_id: "docker-worker"
|
|
||||||
base_path: "/tmp/fetchml-jobs"
|
|
||||||
train_script: "train.py"
|
|
||||||
|
|
||||||
redis_addr: "redis:6379"
|
Copy the default profile and modify for your needs:
|
||||||
redis_password: ""
|
|
||||||
redis_db: 0
|
|
||||||
|
|
||||||
local_mode: true
|
```bash
|
||||||
|
cp configs/seccomp/default-hardened.json configs/seccomp/custom-profile.json
|
||||||
|
# Edit to add/remove syscalls
|
||||||
|
```
|
||||||
|
|
||||||
max_workers: 1
|
**Testing Seccomp:**
|
||||||
poll_interval_seconds: 5
|
|
||||||
|
|
||||||
podman_image: "python:3.9-slim"
|
```bash
|
||||||
container_workspace: "/workspace"
|
# Test with a simple container
|
||||||
container_results: "/results"
|
docker run --rm --security-opt seccomp=configs/seccomp/default-hardened.json \
|
||||||
gpu_devices: []
|
alpine:latest echo "Seccomp test passed"
|
||||||
gpu_vendor: "none"
|
|
||||||
gpu_visible_devices: []
|
|
||||||
|
|
||||||
metrics:
|
|
||||||
enabled: true
|
|
||||||
listen_addr: ":9100"
|
|
||||||
metrics_flush_interval: "500ms"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## CLI Configuration
|
## CLI Configuration
|
||||||
|
|
@ -274,15 +608,70 @@ api_key = "<analyst-api-key>"
|
||||||
|
|
||||||
| Option | Type | Default | Description |
|
| Option | Type | Default | Description |
|
||||||
|--------|------|---------|-------------|
|
|--------|------|---------|-------------|
|
||||||
|
| `security.production_mode` | bool | false | Enable production hardening |
|
||||||
|
| `security.allowed_origins` | array | [] | Allowed CORS origins |
|
||||||
|
| `security.api_key_rotation_days` | int | 90 | Days until API key rotation required |
|
||||||
|
| `security.audit_logging.enabled` | bool | false | Enable audit logging |
|
||||||
|
| `security.audit_logging.log_path` | string | - | Audit log file path |
|
||||||
| `security.rate_limit.enabled` | bool | true | Enable rate limiting |
|
| `security.rate_limit.enabled` | bool | true | Enable rate limiting |
|
||||||
| `security.rate_limit.requests_per_minute` | int | 60 | Rate limit |
|
| `security.rate_limit.requests_per_minute` | int | 60 | Requests per minute limit |
|
||||||
| `security.ip_whitelist` | array | [] | Allowed IP addresses |
|
| `security.rate_limit.burst_size` | int | 10 | Burst request allowance |
|
||||||
|
| `security.ip_whitelist` | array | [] | Allowed IP addresses/CIDR ranges |
|
||||||
|
| `security.failed_login_lockout.enabled` | bool | false | Enable login lockout |
|
||||||
|
| `security.failed_login_lockout.max_attempts` | int | 5 | Max failed attempts before lockout |
|
||||||
|
| `security.failed_login_lockout.lockout_duration` | string | "15m" | Lockout duration (e.g., "15m") |
|
||||||
|
|
||||||
|
### Monitoring
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `monitoring.prometheus.enabled` | bool | true | Enable Prometheus metrics |
|
||||||
|
| `monitoring.prometheus.port` | int | 9101 | Prometheus metrics port |
|
||||||
|
| `monitoring.prometheus.path` | string | "/metrics" | Metrics endpoint path |
|
||||||
|
| `monitoring.health_checks.enabled` | bool | true | Enable health checks |
|
||||||
|
| `monitoring.health_checks.interval` | string | "30s" | Health check interval |
|
||||||
|
|
||||||
|
### Database
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `database.type` | string | "sqlite" | Database type (sqlite, postgres, mysql) |
|
||||||
|
| `database.connection` | string | - | Connection string or path |
|
||||||
|
| `database.host` | string | - | Database host (for postgres/mysql) |
|
||||||
|
| `database.port` | int | - | Database port (for postgres/mysql) |
|
||||||
|
| `database.username` | string | - | Database username |
|
||||||
|
| `database.password` | string | - | Database password |
|
||||||
|
| `database.database` | string | - | Database name |
|
||||||
|
|
||||||
|
### Queue
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `queue.type` | string | "native" | Queue backend type (native, redis, sqlite, filesystem) |
|
||||||
|
| `queue.native.data_dir` | string | - | Data directory for native queue |
|
||||||
|
| `queue.sqlite_path` | string | - | SQLite database path for queue |
|
||||||
|
| `queue.filesystem_path` | string | - | Filesystem queue path |
|
||||||
|
| `queue.fallback_to_filesystem` | bool | false | Fallback to filesystem on Redis failure |
|
||||||
|
|
||||||
|
### Resources
|
||||||
|
|
||||||
|
| Option | Type | Default | Description |
|
||||||
|
|--------|------|---------|-------------|
|
||||||
|
| `resources.max_workers` | int | 1 | Maximum concurrent workers |
|
||||||
|
| `resources.desired_rps_per_worker` | int | 2 | Desired requests per second per worker |
|
||||||
|
| `resources.requests_per_sec` | int | - | Global request rate limit |
|
||||||
|
| `resources.request_burst` | int | - | Request burst allowance |
|
||||||
|
| `resources.podman_cpus` | string | "2" | CPU limit for Podman containers |
|
||||||
|
| `resources.podman_memory` | string | "4Gi" | Memory limit for Podman containers |
|
||||||
|
|
||||||
### Redis
|
### Redis
|
||||||
|
|
||||||
| Option | Type | Default | Description |
|
| Option | Type | Default | Description |
|
||||||
|--------|------|---------|-------------|
|
|--------|------|---------|-------------|
|
||||||
| `redis.url` | string | "redis://localhost:6379" | Redis connection URL |
|
| `redis.url` | string | "redis://localhost:6379" | Redis connection URL |
|
||||||
|
| `redis.addr` | string | - | Redis host:port shorthand |
|
||||||
|
| `redis.password` | string | - | Redis password |
|
||||||
|
| `redis.db` | int | 0 | Redis database number |
|
||||||
| `redis.max_connections` | int | 10 | Max Redis connections |
|
| `redis.max_connections` | int | 10 | Max Redis connections |
|
||||||
|
|
||||||
### Logging
|
### Logging
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,9 @@ set(COMMON_SOURCES
|
||||||
|
|
||||||
add_library(fetchml_common STATIC ${COMMON_SOURCES})
|
add_library(fetchml_common STATIC ${COMMON_SOURCES})
|
||||||
|
|
||||||
|
# Required for linking into shared libraries on Alpine Linux
|
||||||
|
set_property(TARGET fetchml_common PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||||
|
|
||||||
target_include_directories(fetchml_common PUBLIC
|
target_include_directories(fetchml_common PUBLIC
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -248,8 +248,8 @@ int test_performance() {
|
||||||
auto create_time = duration_cast<milliseconds>(create_end - start);
|
auto create_time = duration_cast<milliseconds>(create_end - start);
|
||||||
auto hash_time = duration_cast<milliseconds>(hash_end - create_end);
|
auto hash_time = duration_cast<milliseconds>(hash_end - create_end);
|
||||||
|
|
||||||
printf(" Created %d files in %lld ms\n", num_files, create_time.count());
|
printf(" Created %d files in %ld ms\n", num_files, create_time.count());
|
||||||
printf(" Hashed %d files in %lld ms\n", num_files, hash_time.count());
|
printf(" Hashed %d files in %ld ms\n", num_files, hash_time.count());
|
||||||
printf(" Throughput: %.1f files/sec\n", num_files * 1000.0 / hash_time.count());
|
printf(" Throughput: %.1f files/sec\n", num_files * 1000.0 / hash_time.count());
|
||||||
|
|
||||||
fh_free_string(hash);
|
fh_free_string(hash);
|
||||||
|
|
|
||||||
|
|
@ -8,79 +8,71 @@
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
#include <filesystem>
|
||||||
#include "../native/queue_index/storage/index_storage.h"
|
#include "../native/queue_index/storage/index_storage.h"
|
||||||
|
|
||||||
// Get absolute path of current working directory
|
namespace fs = std::filesystem;
|
||||||
static std::string get_cwd() {
|
|
||||||
char buf[PATH_MAX];
|
|
||||||
if (getcwd(buf, sizeof(buf)) != nullptr) {
|
|
||||||
return std::string(buf);
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// Test: Verify O_EXCL prevents symlink attacks on .tmp file (CVE-2024-45339)
|
// Test: Verify O_EXCL prevents symlink attacks on .tmp file (CVE-2024-45339)
|
||||||
static int test_symlink_attack_prevention() {
|
static int test_symlink_attack_prevention() {
|
||||||
printf(" Testing symlink attack prevention (CVE-2024-45339)...\n");
|
printf(" Testing symlink attack prevention (CVE-2024-45339)...\n");
|
||||||
|
|
||||||
std::string cwd = get_cwd();
|
// Create temp directory using mkdtemp for security
|
||||||
char base_dir[4096];
|
char base_dir_template[] = "/tmp/test_symlink_XXXXXX";
|
||||||
snprintf(base_dir, sizeof(base_dir), "%s/test_symlink_XXXXXX", cwd.c_str());
|
char* base_dir_ptr = mkdtemp(base_dir_template);
|
||||||
|
if (base_dir_ptr == nullptr) {
|
||||||
if (mkdtemp(base_dir) == nullptr) {
|
|
||||||
printf(" ERROR: mkdtemp failed\n");
|
printf(" ERROR: mkdtemp failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
fs::path base_dir(base_dir_ptr);
|
||||||
|
|
||||||
// Create a fake index.bin file
|
// Create paths using std::filesystem
|
||||||
char index_path[4096];
|
fs::path index_path = base_dir / "index.bin";
|
||||||
snprintf(index_path, sizeof(index_path), "%s/index.bin", base_dir);
|
fs::path decoy_path = base_dir / "decoy.txt";
|
||||||
|
fs::path tmp_path = base_dir / "index.bin.tmp";
|
||||||
|
|
||||||
// Create a decoy file that a symlink attack would try to overwrite
|
// Create a decoy file that a symlink attack would try to overwrite
|
||||||
char decoy_path[4096];
|
FILE* f = fopen(decoy_path.c_str(), "w");
|
||||||
snprintf(decoy_path, sizeof(decoy_path), "%s/decoy.txt", base_dir);
|
|
||||||
FILE* f = fopen(decoy_path, "w");
|
|
||||||
if (!f) {
|
if (!f) {
|
||||||
printf(" ERROR: failed to create decoy file\n");
|
printf(" ERROR: failed to create decoy file\n");
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
fprintf(f, "sensitive data that should not be overwritten\n");
|
fprintf(f, "sensitive data that should not be overwritten\n");
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
|
||||||
// Create a symlink at index.bin.tmp pointing to the decoy
|
// Create a symlink at index.bin.tmp pointing to the decoy
|
||||||
char tmp_path[4096];
|
if (symlink(decoy_path.c_str(), tmp_path.c_str()) != 0) {
|
||||||
snprintf(tmp_path, sizeof(tmp_path), "%s/index.bin.tmp", base_dir);
|
|
||||||
if (symlink(decoy_path, tmp_path) != 0) {
|
|
||||||
printf(" ERROR: failed to create symlink\n");
|
printf(" ERROR: failed to create symlink\n");
|
||||||
unlink(decoy_path);
|
unlink(decoy_path.c_str());
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now try to initialize storage - it should fail or not follow the symlink
|
// Now try to initialize storage - it should fail or not follow the symlink
|
||||||
IndexStorage storage;
|
IndexStorage storage;
|
||||||
if (!storage_init(&storage, base_dir)) {
|
if (!storage_init(&storage, base_dir.c_str())) {
|
||||||
printf(" ERROR: storage_init failed\n");
|
printf(" ERROR: storage_init failed\n");
|
||||||
unlink(tmp_path);
|
unlink(tmp_path.c_str());
|
||||||
unlink(decoy_path);
|
unlink(decoy_path.c_str());
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to open storage - this will attempt to write to .tmp file
|
// Try to open storage - this will attempt to write to .tmp file
|
||||||
// With O_EXCL, it should fail because the symlink exists
|
// With O_EXCL, it should fail because the symlink exists
|
||||||
bool open_result = storage_open(&storage);
|
bool open_result = storage_open(&storage);
|
||||||
|
(void)open_result; // Suppress unused warning - we're testing side effects
|
||||||
|
|
||||||
// Clean up
|
// Clean up
|
||||||
storage_cleanup(&storage);
|
storage_cleanup(&storage);
|
||||||
unlink(tmp_path);
|
unlink(tmp_path.c_str());
|
||||||
unlink(decoy_path);
|
unlink(decoy_path.c_str());
|
||||||
unlink(index_path);
|
unlink(index_path.c_str());
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
|
|
||||||
// Verify the decoy file was NOT overwritten (symlink attack failed)
|
// Verify the decoy file was NOT overwritten (symlink attack failed)
|
||||||
FILE* check = fopen(decoy_path, "r");
|
FILE* check = fopen(decoy_path.c_str(), "r");
|
||||||
if (check) {
|
if (check) {
|
||||||
char buf[256];
|
char buf[256];
|
||||||
if (fgets(buf, sizeof(buf), check) != nullptr) {
|
if (fgets(buf, sizeof(buf), check) != nullptr) {
|
||||||
|
|
@ -103,22 +95,24 @@ static int test_symlink_attack_prevention() {
|
||||||
static int test_stale_temp_file_handling() {
|
static int test_stale_temp_file_handling() {
|
||||||
printf(" Testing stale temp file handling...\n");
|
printf(" Testing stale temp file handling...\n");
|
||||||
|
|
||||||
std::string cwd = get_cwd();
|
// Create temp directory using mkdtemp
|
||||||
char base_dir[4096];
|
char base_dir_template[] = "/tmp/test_stale_XXXXXX";
|
||||||
snprintf(base_dir, sizeof(base_dir), "%s/test_stale_XXXXXX", cwd.c_str());
|
char* base_dir_ptr = mkdtemp(base_dir_template);
|
||||||
|
if (base_dir_ptr == nullptr) {
|
||||||
if (mkdtemp(base_dir) == nullptr) {
|
|
||||||
printf(" ERROR: mkdtemp failed\n");
|
printf(" ERROR: mkdtemp failed\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
fs::path base_dir(base_dir_ptr);
|
||||||
|
|
||||||
|
// Create paths using std::filesystem
|
||||||
|
fs::path tmp_path = base_dir / "index.bin.tmp";
|
||||||
|
fs::path index_path = base_dir / "index.bin";
|
||||||
|
|
||||||
// Create a stale temp file
|
// Create a stale temp file
|
||||||
char tmp_path[4096];
|
FILE* f = fopen(tmp_path.c_str(), "w");
|
||||||
snprintf(tmp_path, sizeof(tmp_path), "%s/index.bin.tmp", base_dir);
|
|
||||||
FILE* f = fopen(tmp_path, "w");
|
|
||||||
if (!f) {
|
if (!f) {
|
||||||
printf(" ERROR: failed to create stale temp file\n");
|
printf(" ERROR: failed to create stale temp file\n");
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
fprintf(f, "stale data\n");
|
fprintf(f, "stale data\n");
|
||||||
|
|
@ -126,18 +120,18 @@ static int test_stale_temp_file_handling() {
|
||||||
|
|
||||||
// Initialize and open storage - should remove stale file and succeed
|
// Initialize and open storage - should remove stale file and succeed
|
||||||
IndexStorage storage;
|
IndexStorage storage;
|
||||||
if (!storage_init(&storage, base_dir)) {
|
if (!storage_init(&storage, base_dir.c_str())) {
|
||||||
printf(" ERROR: storage_init failed\n");
|
printf(" ERROR: storage_init failed\n");
|
||||||
unlink(tmp_path);
|
unlink(tmp_path.c_str());
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!storage_open(&storage)) {
|
if (!storage_open(&storage)) {
|
||||||
printf(" ERROR: storage_open failed to handle stale temp file\n");
|
printf(" ERROR: storage_open failed to handle stale temp file\n");
|
||||||
unlink(tmp_path);
|
unlink(tmp_path.c_str());
|
||||||
storage_cleanup(&storage);
|
storage_cleanup(&storage);
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -152,17 +146,15 @@ static int test_stale_temp_file_handling() {
|
||||||
if (!storage_write_entries(&storage, entries, 1)) {
|
if (!storage_write_entries(&storage, entries, 1)) {
|
||||||
printf(" ERROR: storage_write_entries failed\n");
|
printf(" ERROR: storage_write_entries failed\n");
|
||||||
storage_cleanup(&storage);
|
storage_cleanup(&storage);
|
||||||
rmdir(base_dir);
|
rmdir(base_dir.c_str());
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up
|
// Clean up
|
||||||
storage_cleanup(&storage);
|
storage_cleanup(&storage);
|
||||||
char index_path[4096];
|
unlink(index_path.c_str());
|
||||||
snprintf(index_path, sizeof(index_path), "%s/index.bin", base_dir);
|
unlink(tmp_path.c_str());
|
||||||
unlink(index_path);
|
rmdir(base_dir.c_str());
|
||||||
unlink(tmp_path);
|
|
||||||
rmdir(base_dir);
|
|
||||||
|
|
||||||
printf(" Stale temp file handling: PASSED\n");
|
printf(" Stale temp file handling: PASSED\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
165
scripts/check-audit-sink.sh
Normal file
165
scripts/check-audit-sink.sh
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Pre-deployment audit sink gate script
|
||||||
|
# Verifies the write-once audit sink is reachable and writable
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Colors for output
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
ENV="staging"
|
||||||
|
TIMEOUT=10
|
||||||
|
AUDIT_SINK_HOST=""
|
||||||
|
AUDIT_SINK_PORT=""
|
||||||
|
|
||||||
|
# Parse arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
--env)
|
||||||
|
ENV="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--timeout)
|
||||||
|
TIMEOUT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--host)
|
||||||
|
AUDIT_SINK_HOST="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--port)
|
||||||
|
AUDIT_SINK_PORT="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--help)
|
||||||
|
echo "Usage: $0 [OPTIONS]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " --env ENV Environment (staging|prod) [default: staging]"
|
||||||
|
echo " --timeout SECONDS Timeout in seconds [default: 10]"
|
||||||
|
echo " --host HOST Audit sink host (auto-detected if not set)"
|
||||||
|
echo " --port PORT Audit sink port (auto-detected if not set)"
|
||||||
|
echo " --help Show this help message"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Auto-detect audit sink based on environment
|
||||||
|
if [ -z "$AUDIT_SINK_HOST" ]; then
|
||||||
|
case $ENV in
|
||||||
|
staging)
|
||||||
|
AUDIT_SINK_HOST="ml-staging-audit-sink"
|
||||||
|
AUDIT_SINK_PORT="6379"
|
||||||
|
;;
|
||||||
|
prod)
|
||||||
|
AUDIT_SINK_HOST="ml-prod-audit-sink"
|
||||||
|
AUDIT_SINK_PORT="6379"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo -e "${RED}Error: Unknown environment '$ENV'${NC}"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Checking audit sink for environment: $ENV"
|
||||||
|
echo "Host: $AUDIT_SINK_HOST"
|
||||||
|
echo "Port: $AUDIT_SINK_PORT"
|
||||||
|
echo "Timeout: ${TIMEOUT}s"
|
||||||
|
|
||||||
|
# Check if we can reach the audit sink
|
||||||
|
echo ""
|
||||||
|
echo "Step 1: Checking network reachability..."
|
||||||
|
|
||||||
|
if command -v nc &> /dev/null; then
|
||||||
|
if timeout $TIMEOUT nc -z "$AUDIT_SINK_HOST" "$AUDIT_SINK_PORT" 2>/dev/null; then
|
||||||
|
echo -e "${GREEN}✓ Audit sink is reachable on port $AUDIT_SINK_PORT${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ Audit sink is NOT reachable on $AUDIT_SINK_HOST:$AUDIT_SINK_PORT${NC}"
|
||||||
|
echo "This is a HARD STOP for HIPAA deployments."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
elif command -v redis-cli &> /dev/null; then
|
||||||
|
# Try to ping via redis-cli if available
|
||||||
|
if timeout $TIMEOUT redis-cli -h "$AUDIT_SINK_HOST" -p "$AUDIT_SINK_PORT" ping 2>/dev/null | grep -q "PONG"; then
|
||||||
|
echo -e "${GREEN}✓ Audit sink responded to Redis ping${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ Audit sink did not respond to Redis ping${NC}"
|
||||||
|
echo "This is a HARD STOP for HIPAA deployments."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Neither nc nor redis-cli available - skipping reachability check${NC}"
|
||||||
|
echo "For production, ensure one of these tools is installed."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if audit sink is writable (append-only test)
|
||||||
|
echo ""
|
||||||
|
echo "Step 2: Checking write capability..."
|
||||||
|
|
||||||
|
# For a proper audit sink, we should be able to write but not modify
|
||||||
|
# This is typically implemented with Redis append-only file (AOF) persistence
|
||||||
|
# and restricted commands
|
||||||
|
|
||||||
|
if command -v docker &> /dev/null; then
|
||||||
|
# Check if the audit sink container is running
|
||||||
|
CONTAINER_NAME="ml-${ENV}-audit-sink"
|
||||||
|
|
||||||
|
if docker ps | grep -q "$CONTAINER_NAME"; then
|
||||||
|
echo -e "${GREEN}✓ Audit sink container '$CONTAINER_NAME' is running${NC}"
|
||||||
|
|
||||||
|
# Test write capability
|
||||||
|
TEST_KEY="audit_test_$(date +%s)"
|
||||||
|
TEST_VALUE="test_$(uuidgen 2>/dev/null || echo $RANDOM)"
|
||||||
|
|
||||||
|
if docker exec "$CONTAINER_NAME" redis-cli SET "$TEST_KEY" "$TEST_VALUE" EX 60 > /dev/null 2>&1; then
|
||||||
|
echo -e "${GREEN}✓ Audit sink accepts writes${NC}"
|
||||||
|
|
||||||
|
# Verify we can read it back
|
||||||
|
READ_VALUE=$(docker exec "$CONTAINER_NAME" redis-cli GET "$TEST_KEY" 2>/dev/null)
|
||||||
|
if [ "$READ_VALUE" = "$TEST_VALUE" ]; then
|
||||||
|
echo -e "${GREEN}✓ Audit sink read-after-write successful${NC}"
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Audit sink read-after-write mismatch${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
docker exec "$CONTAINER_NAME" redis-cli DEL "$TEST_KEY" > /dev/null 2>&1 || true
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ Audit sink does not accept writes${NC}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Audit sink container '$CONTAINER_NAME' not found${NC}"
|
||||||
|
echo "Container may not be running or may have a different name."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo -e "${YELLOW}⚠ Docker not available - skipping container check${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Final summary
|
||||||
|
echo ""
|
||||||
|
echo "==================================="
|
||||||
|
echo "Audit Sink Check Summary"
|
||||||
|
echo "==================================="
|
||||||
|
echo -e "${GREEN}✓ Audit sink is reachable and writable${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Deployment can proceed."
|
||||||
|
echo "Note: This check does NOT verify:"
|
||||||
|
echo " - Append-only configuration"
|
||||||
|
echo " - Log retention policies"
|
||||||
|
echo " - Chain integrity"
|
||||||
|
echo " - Tamper resistance"
|
||||||
|
echo ""
|
||||||
|
echo "These must be verified separately for full HIPAA compliance."
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
@ -1,222 +1,83 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -euo pipefail;
|
# Development smoke test for FetchML
|
||||||
|
#
|
||||||
|
# NOTE: If using Colima on macOS, ensure the repo directory is mounted:
|
||||||
|
# colima stop
|
||||||
|
# colima start --mount "/Users/jfraeys/Documents/dev/fetch_ml:w"
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./scripts/dev/smoke-test.sh # Standard run
|
||||||
|
# BUILD_PROGRESS=plain ./scripts/dev/smoke-test.sh # Show full build logs
|
||||||
|
# KEEP_STACK=1 ./scripts/dev/smoke-test.sh # Keep containers after test
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||||
export FETCHML_REPO_ROOT="$repo_root"
|
export FETCHML_REPO_ROOT="$repo_root"
|
||||||
|
|
||||||
# Parse arguments
|
# Use workspace-relative data directory (Colima-compatible)
|
||||||
env="dev"
|
# Avoid $HOME/.fetchml - Colima can't create directories through Docker volumes there
|
||||||
native_mode=false
|
DEFAULT_DATA_DIR="$repo_root/data/smoke"
|
||||||
|
DATA_DIR="${FETCHML_DATA_DIR:-$DEFAULT_DATA_DIR}"
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
echo "Using DATA_DIR: $DATA_DIR"
|
||||||
case "$1" in
|
rm -rf "$DATA_DIR"
|
||||||
--native)
|
|
||||||
native_mode=true
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
dev|prod)
|
|
||||||
env="$1"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--help|-h)
|
|
||||||
echo "Usage: $0 [dev|prod] [--native]"
|
|
||||||
echo ""
|
|
||||||
echo "Options:"
|
|
||||||
echo " dev|prod Environment to test (default: dev)"
|
|
||||||
echo " --native Also test native libraries (C++ integration)"
|
|
||||||
echo " --help Show this help"
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "Unknown option: $1" >&2
|
|
||||||
echo "Usage: $0 [dev|prod] [--native]" >&2
|
|
||||||
exit 2
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# Native library smoke test (merged from smoke-test-native.sh)
|
# Create parent directory first with explicit permissions (for Colima compatibility)
|
||||||
if [[ "$native_mode" == true ]]; then
|
mkdir -p "$(dirname "$DATA_DIR")"
|
||||||
echo "=== FetchML Native Libraries Smoke Test ==="
|
chmod 755 "$(dirname "$DATA_DIR")"
|
||||||
echo ""
|
|
||||||
|
|
||||||
cd "$repo_root"
|
# Create data directory structure
|
||||||
|
mkdir -p "$DATA_DIR"/{redis,minio,logs,experiments,active,workspaces,caddy/data,caddy/config,ssl,configs}
|
||||||
|
chmod -R 777 "$DATA_DIR"
|
||||||
|
|
||||||
# Build native libraries
|
# Copy configs to DATA_DIR for mounting
|
||||||
echo "1. Building native libraries..."
|
cp -r "$repo_root/configs/"* "$DATA_DIR/configs/"
|
||||||
if [[ -d native/build ]]; then
|
|
||||||
cd native/build
|
|
||||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_ASAN=OFF >/dev/null 2>&1 || true
|
|
||||||
make -j4 2>&1 | grep -E "(Built|Error|error)" || true
|
|
||||||
cd ../..
|
|
||||||
echo " Native libraries built"
|
|
||||||
else
|
|
||||||
echo " ⚠ native/build not found, skipping native build"
|
|
||||||
fi
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
# Run C++ unit tests
|
# Build arguments
|
||||||
echo "2. Running C++ smoke tests..."
|
BUILD_PROGRESS="${BUILD_PROGRESS:-auto}" # Set to 'plain' for full logs
|
||||||
local tests_run=0
|
|
||||||
for test_bin in ./native/build/test_*; do
|
|
||||||
if [[ -x "$test_bin" ]]; then
|
|
||||||
local test_name=$(basename "$test_bin")
|
|
||||||
echo " Running $test_name..."
|
|
||||||
"$test_bin" 2>/dev/null && echo " ✓ $test_name passed" || echo " ⚠ $test_name skipped/failed"
|
|
||||||
((tests_run++))
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
if [[ $tests_run -eq 0 ]]; then
|
|
||||||
echo " ⚠ No C++ tests found"
|
|
||||||
else
|
|
||||||
echo " Ran $tests_run C++ test(s)"
|
|
||||||
fi
|
|
||||||
echo ""
|
|
||||||
|
|
||||||
echo "3. Building Go applications with native libs..."
|
compose_cmd=$(command -v docker-compose >/dev/null 2>&1 && echo "docker-compose" || echo "docker compose")
|
||||||
go build -tags native_libs -o /dev/null ./cmd/api-server 2>&1 | grep -v "ignoring duplicate" || true
|
|
||||||
echo " api-server builds"
|
|
||||||
go build -tags native_libs -o /dev/null ./cmd/worker 2>&1 | grep -v "ignoring duplicate" || true 2>/dev/null || echo " (worker optional)"
|
|
||||||
echo ""
|
|
||||||
fi
|
|
||||||
|
|
||||||
probe_https_health_openssl() {
|
# Determine build flags
|
||||||
host="$1"
|
build_flags=""
|
||||||
port="$2"
|
if [[ "$BUILD_PROGRESS" == "plain" ]]; then
|
||||||
path="$3"
|
# For docker compose v2+ with buildkit
|
||||||
|
export BUILDKIT_PROGRESS=plain
|
||||||
req="GET ${path} HTTP/1.1\r\nHost: ${host}\r\nConnection: close\r\n\r\n"
|
build_flags="--progress=plain"
|
||||||
resp=$(printf "%b" "$req" | openssl s_client -connect "127.0.0.1:${port}" -servername "${host}" -tls1_2 -quiet 2>/dev/null || true)
|
|
||||||
printf "%s" "$resp" | tr -d '\r' | head -n 1 | grep -Eq '^HTTP/1\.[01] 200'
|
|
||||||
}
|
|
||||||
|
|
||||||
compose_cmd="docker-compose";
|
|
||||||
if ! command -v docker-compose >/dev/null 2>&1; then
|
|
||||||
compose_cmd="docker compose";
|
|
||||||
fi
|
|
||||||
|
|
||||||
compose_files=()
|
|
||||||
compose_project_args=("--project-directory" "$repo_root")
|
|
||||||
api_base=""
|
|
||||||
prometheus_base=""
|
|
||||||
stack_name=""
|
|
||||||
api_wait_seconds=90
|
|
||||||
prometheus_wait_seconds=90
|
|
||||||
|
|
||||||
if [ "$env" = "dev" ]; then
|
|
||||||
# Use temp directory for smoke test data to avoid file sharing issues on macOS/Colima
|
|
||||||
SMOKE_TEST_DATA_DIR="${SMOKE_TEST_DATA_DIR:-$(mktemp -d /tmp/fetch_ml_smoke.XXXXXX)}"
|
|
||||||
echo "Using temp directory: $SMOKE_TEST_DATA_DIR"
|
|
||||||
|
|
||||||
mkdir -p \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/redis" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/minio" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/prometheus" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/grafana" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/loki" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/logs" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/experiments" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/active" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/workspaces"
|
|
||||||
|
|
||||||
# Copy monitoring config to temp directory (required for promtail)
|
|
||||||
cp "$repo_root/monitoring/promtail-config.yml" "$SMOKE_TEST_DATA_DIR/"
|
|
||||||
|
|
||||||
# Export for docker-compose to use
|
|
||||||
export SMOKE_TEST_DATA_DIR
|
|
||||||
|
|
||||||
# Create env file for docker-compose (process substitution doesn't work)
|
|
||||||
env_file="$SMOKE_TEST_DATA_DIR/.env"
|
|
||||||
echo "SMOKE_TEST_DATA_DIR=$SMOKE_TEST_DATA_DIR" > "$env_file"
|
|
||||||
echo "FETCHML_REPO_ROOT=$repo_root" >> "$env_file"
|
|
||||||
|
|
||||||
# Update compose project args to include env file
|
|
||||||
compose_project_args=("--project-directory" "$repo_root" "--env-file" "$env_file")
|
|
||||||
|
|
||||||
stack_name="dev"
|
|
||||||
api_wait_seconds=180
|
|
||||||
prometheus_wait_seconds=180
|
|
||||||
compose_files=("-f" "$repo_root/deployments/docker-compose.dev.yml")
|
|
||||||
api_base="https://localhost:9101"
|
|
||||||
if ! curl -skf "$api_base/health" >/dev/null 2>&1; then
|
|
||||||
api_base="http://localhost:9101"
|
|
||||||
fi
|
|
||||||
prometheus_base="http://localhost:9090"
|
|
||||||
else
|
|
||||||
# Use temp directory for prod smoke test too
|
|
||||||
SMOKE_TEST_DATA_DIR="${SMOKE_TEST_DATA_DIR:-$(mktemp -d /tmp/fetch_ml_smoke_prod.XXXXXX)}"
|
|
||||||
echo "Using temp directory: $SMOKE_TEST_DATA_DIR"
|
|
||||||
|
|
||||||
mkdir -p \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/caddy/data" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/caddy/config" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/redis" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/logs" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/experiments" \
|
|
||||||
"$SMOKE_TEST_DATA_DIR/active"
|
|
||||||
|
|
||||||
# Copy monitoring config to temp directory (required for promtail)
|
|
||||||
cp "$repo_root/monitoring/promtail-config.yml" "$SMOKE_TEST_DATA_DIR/"
|
|
||||||
|
|
||||||
# Export for docker-compose to use
|
|
||||||
export SMOKE_TEST_DATA_DIR
|
|
||||||
|
|
||||||
# Create env file for docker-compose (process substitution doesn't work)
|
|
||||||
env_file="$SMOKE_TEST_DATA_DIR/.env"
|
|
||||||
echo "SMOKE_TEST_DATA_DIR=$SMOKE_TEST_DATA_DIR" > "$env_file"
|
|
||||||
echo "FETCHML_REPO_ROOT=$repo_root" >> "$env_file"
|
|
||||||
|
|
||||||
# Update compose project args to include env file
|
|
||||||
compose_project_args=("--project-directory" "$repo_root" "--env-file" "$env_file")
|
|
||||||
|
|
||||||
stack_name="prod"
|
|
||||||
compose_files=("-f" "$repo_root/deployments/docker-compose.prod.smoke.yml")
|
|
||||||
api_base="https://localhost:8443"
|
|
||||||
export FETCHML_DOMAIN=localhost
|
|
||||||
export CADDY_EMAIL=smoke@example.invalid
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
status=$?;
|
status=$?
|
||||||
if [ "$status" -ne 0 ]; then
|
if [[ $status -ne 0 ]]; then
|
||||||
$compose_cmd "${compose_project_args[@]}" "${compose_files[@]}" logs --no-color || true;
|
$compose_cmd -f "$repo_root/deployments/docker-compose.dev.yml" logs --no-color 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
if [ "${KEEP_STACK:-0}" != "1" ]; then
|
if [[ "${KEEP_STACK:-0}" != "1" ]]; then
|
||||||
$compose_cmd "${compose_project_args[@]}" "${compose_files[@]}" down -v >/dev/null 2>&1 || true;
|
$compose_cmd -f "$repo_root/deployments/docker-compose.dev.yml" down -v >/dev/null 2>&1 || true
|
||||||
|
rm -rf "$DATA_DIR"
|
||||||
fi
|
fi
|
||||||
exit "$status";
|
exit $status
|
||||||
}
|
}
|
||||||
|
|
||||||
trap cleanup EXIT;
|
trap cleanup EXIT
|
||||||
echo "Starting $stack_name stack for smoke test...";
|
|
||||||
|
|
||||||
$compose_cmd "${compose_project_args[@]}" "${compose_files[@]}" up -d --build >/dev/null;
|
echo "Starting dev stack for smoke test..."
|
||||||
echo "Waiting for API to become healthy...";
|
export DATA_DIR="$DATA_DIR"
|
||||||
|
|
||||||
deadline=$(($(date +%s) + $api_wait_seconds));
|
# Build first with progress option if specified, then up
|
||||||
while true; do
|
if [[ "$BUILD_PROGRESS" == "plain" ]]; then
|
||||||
if [ "$env" = "dev" ]; then
|
$compose_cmd -f "$repo_root/deployments/docker-compose.dev.yml" --project-directory "$repo_root" build --progress=plain
|
||||||
if curl -skf "$api_base/health" >/dev/null 2>&1; then break; fi;
|
|
||||||
else
|
|
||||||
if probe_https_health_openssl "localhost" "8443" "/health"; then break; fi;
|
|
||||||
fi
|
|
||||||
if [ $(date +%s) -ge $deadline ]; then echo "Timed out waiting for $api_base/health"; exit 1; fi;
|
|
||||||
sleep 2;
|
|
||||||
done;
|
|
||||||
|
|
||||||
if [ "$env" = "dev" ]; then
|
|
||||||
echo "Checking metrics endpoint...";
|
|
||||||
curl -skf "$api_base/metrics" >/dev/null;
|
|
||||||
|
|
||||||
echo "Waiting for Prometheus target api-server to be up...";
|
|
||||||
deadline=$(($(date +%s) + $prometheus_wait_seconds));
|
|
||||||
query_url="$prometheus_base/api/v1/query?query=up%7Bjob%3D%22api-server%22%7D";
|
|
||||||
|
|
||||||
while true; do
|
|
||||||
resp=$(curl -sf "$query_url" || true);
|
|
||||||
resp_compact=$(printf "%s" "$resp" | tr -d '\n' | tr -d '\r');
|
|
||||||
if echo "$resp_compact" | grep -Fq '"instance":"api-server:9101"' && echo "$resp_compact" | grep -Fq ',"1"]'; then break; fi;
|
|
||||||
if [ $(date +%s) -ge $deadline ]; then echo "Timed out waiting for Prometheus api-server target to be up"; echo "$resp"; exit 1; fi;
|
|
||||||
sleep 2;
|
|
||||||
done;
|
|
||||||
fi
|
fi
|
||||||
|
$compose_cmd -f "$repo_root/deployments/docker-compose.dev.yml" --project-directory "$repo_root" up -d --build
|
||||||
|
|
||||||
|
echo "Waiting for API to become healthy..."
|
||||||
|
deadline=$(( $(date +%s) + 180 ))
|
||||||
|
while ! curl -sf http://localhost:9101/health >/dev/null 2>&1; do
|
||||||
|
if [[ $(date +%s) -ge $deadline ]]; then
|
||||||
|
echo "Timed out waiting for API"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 2
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "API is healthy"
|
||||||
|
|
@ -35,10 +35,12 @@ print_error() {
|
||||||
cleanup_docker() {
|
cleanup_docker() {
|
||||||
print_header "Docker Compose Cleanup"
|
print_header "Docker Compose Cleanup"
|
||||||
|
|
||||||
|
local compose_cmd=$(command -v docker-compose >/dev/null 2>&1 && echo "docker-compose" || echo "docker compose")
|
||||||
|
|
||||||
# Stop all project-related containers
|
# Stop all project-related containers
|
||||||
docker-compose -f deployments/docker-compose.dev.yml down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f deployments/docker-compose.dev.yml down --volumes --remove-orphans 2>/dev/null || true
|
||||||
docker-compose -f deployments/docker-compose.local.yml down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f deployments/docker-compose.local.yml down --volumes --remove-orphans 2>/dev/null || true
|
||||||
docker-compose -f tests/e2e/docker-compose.logs-debug.yml down --volumes --remove-orphans 2>/dev/null || true
|
$compose_cmd -f tests/e2e/docker-compose.logs-debug.yml down --volumes --remove-orphans 2>/dev/null || true
|
||||||
|
|
||||||
# Remove project-specific images (keep base images)
|
# Remove project-specific images (keep base images)
|
||||||
docker images --filter "reference=fetchml*" --format "{{.ID}}" | xargs -r docker rmi -f 2>/dev/null || true
|
docker images --filter "reference=fetchml*" --format "{{.ID}}" | xargs -r docker rmi -f 2>/dev/null || true
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,12 @@ if [ ! -f "native/build/libqueue_index.so" ] && [ ! -f "native/build/libqueue_in
|
||||||
make native-build
|
make native-build
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
compose_cmd=$(command -v docker-compose >/dev/null 2>&1 && echo "docker-compose" || echo "docker compose")
|
||||||
|
|
||||||
# Start Redis via docker-compose
|
# Start Redis via docker-compose
|
||||||
echo "Starting Redis..."
|
echo "Starting Redis..."
|
||||||
cd deployments
|
cd deployments
|
||||||
docker-compose -f docker-compose.dev.yml up -d redis
|
$compose_cmd -f docker-compose.dev.yml up -d redis
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
# Wait for Redis to be ready
|
# Wait for Redis to be ready
|
||||||
|
|
@ -47,7 +49,7 @@ fi
|
||||||
echo ""
|
echo ""
|
||||||
echo "Stopping Redis..."
|
echo "Stopping Redis..."
|
||||||
cd deployments
|
cd deployments
|
||||||
docker-compose -f docker-compose.dev.yml stop redis
|
$compose_cmd -f docker-compose.dev.yml stop redis
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
exit $TEST_EXIT
|
exit $TEST_EXIT
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,11 @@ set -e
|
||||||
|
|
||||||
echo "Starting Full Production Test Environment with Podman and SQLite..."
|
echo "Starting Full Production Test Environment with Podman and SQLite..."
|
||||||
|
|
||||||
|
compose_cmd=$(command -v docker-compose >/dev/null 2>&1 && echo "docker-compose" || echo "docker compose")
|
||||||
|
|
||||||
# Clean up any existing containers
|
# Clean up any existing containers
|
||||||
echo "Cleaning up existing containers..."
|
echo "Cleaning up existing containers..."
|
||||||
docker-compose -f deployments/docker-compose.prod.yml down -v
|
$compose_cmd -f deployments/docker-compose.prod.yml down -v
|
||||||
|
|
||||||
# Create necessary directories
|
# Create necessary directories
|
||||||
echo "Creating directories..."
|
echo "Creating directories..."
|
||||||
|
|
@ -15,7 +17,7 @@ mkdir -p data logs
|
||||||
|
|
||||||
# Build and start services
|
# Build and start services
|
||||||
echo "Building and starting services..."
|
echo "Building and starting services..."
|
||||||
docker-compose -f deployments/docker-compose.prod.yml up --build -d
|
$compose_cmd -f deployments/docker-compose.prod.yml up --build -d
|
||||||
|
|
||||||
# Wait for services to be healthy
|
# Wait for services to be healthy
|
||||||
echo "Waiting for services to be healthy..."
|
echo "Waiting for services to be healthy..."
|
||||||
|
|
@ -23,7 +25,7 @@ sleep 15
|
||||||
|
|
||||||
# Check service health
|
# Check service health
|
||||||
echo "Checking service health..."
|
echo "Checking service health..."
|
||||||
docker-compose -f deployments/docker-compose.prod.yml ps
|
$compose_cmd -f deployments/docker-compose.prod.yml ps
|
||||||
|
|
||||||
# Test API server
|
# Test API server
|
||||||
echo "Testing API server..."
|
echo "Testing API server..."
|
||||||
|
|
@ -59,8 +61,7 @@ echo " ./cli/zig-out/bin/ml queue prod-test-job"
|
||||||
echo " ./cli/zig-out/bin/ml status"
|
echo " ./cli/zig-out/bin/ml status"
|
||||||
echo ""
|
echo ""
|
||||||
echo "To view logs:"
|
echo "To view logs:"
|
||||||
echo " docker-compose -f deployments/docker-compose.prod.yml logs -f worker"
|
echo " $compose_cmd -f deployments/docker-compose.prod.yml logs -f worker"
|
||||||
echo " docker-compose -f deployments/docker-compose.prod.yml down"
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "To stop:"
|
echo "To stop:"
|
||||||
echo " docker-compose -f deployments/docker-compose.prod.yml down"
|
echo " $compose_cmd -f deployments/docker-compose.prod.yml down"
|
||||||
|
|
|
||||||
|
|
@ -25,22 +25,24 @@ if [[ ! -f "$SSH_KEY" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Check if docker-compose services are running
|
# Check if docker-compose services are running
|
||||||
echo "=== Checking Docker Compose Services ==="
|
compose_cmd=$(command -v docker-compose >/dev/null 2>&1 && echo "docker-compose" || echo "docker compose")
|
||||||
cd "$REPO_ROOT/deployments"
|
|
||||||
|
|
||||||
if docker-compose -f docker-compose.prod.smoke.yml ps | grep -q "ml-smoke-caddy"; then
|
echo "=== Checking Docker Compose Services ==="
|
||||||
|
cd "$REPO_ROOT"
|
||||||
|
|
||||||
|
if $compose_cmd -f deployments/docker-compose.prod.smoke.yml ps | grep -q "ml-smoke-caddy"; then
|
||||||
echo "Caddy container running"
|
echo "Caddy container running"
|
||||||
else
|
else
|
||||||
echo "✗ Caddy container not running"
|
echo "✗ Caddy container not running"
|
||||||
echo "Start services: docker-compose -f docker-compose.prod.smoke.yml up -d"
|
echo "Start services: $compose_cmd -f deployments/docker-compose.prod.smoke.yml up -d"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if docker-compose -f docker-compose.prod.smoke.yml ps | grep -q "ml-ssh-test"; then
|
if $compose_cmd -f deployments/docker-compose.prod.smoke.yml ps | grep -q "ml-ssh-test"; then
|
||||||
echo "SSH test container running"
|
echo "SSH test container running"
|
||||||
else
|
else
|
||||||
echo "✗ SSH test container not running"
|
echo "✗ SSH test container not running"
|
||||||
echo "Start services: docker-compose -f docker-compose.prod.smoke.yml up -d"
|
echo "Start services: $compose_cmd -f deployments/docker-compose.prod.smoke.yml up -d"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -86,26 +86,26 @@ func ParseGoBenchOutput(r io.Reader) ([]BenchmarkResult, error) {
|
||||||
|
|
||||||
// BenchmarkResult represents a single benchmark result
|
// BenchmarkResult represents a single benchmark result
|
||||||
type BenchmarkResult struct {
|
type BenchmarkResult struct {
|
||||||
Name string `json:"name"`
|
|
||||||
Value float64 `json:"value"`
|
|
||||||
Unit string `json:"unit"`
|
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegressionReport contains regression analysis results
|
// RegressionReport contains regression analysis results
|
||||||
type RegressionReport struct {
|
type RegressionReport struct {
|
||||||
|
Summary string `json:"summary"`
|
||||||
Regressions []Regression `json:"regressions"`
|
Regressions []Regression `json:"regressions"`
|
||||||
Improvements []Improvement `json:"improvements"`
|
Improvements []Improvement `json:"improvements"`
|
||||||
Summary string `json:"summary"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regression represents a performance regression
|
// Regression represents a performance regression
|
||||||
type Regression struct {
|
type Regression struct {
|
||||||
Benchmark string `json:"benchmark"`
|
Benchmark string `json:"benchmark"`
|
||||||
|
Severity string `json:"severity"`
|
||||||
CurrentValue float64 `json:"current_value"`
|
CurrentValue float64 `json:"current_value"`
|
||||||
BaselineValue float64 `json:"baseline_value"`
|
BaselineValue float64 `json:"baseline_value"`
|
||||||
PercentChange float64 `json:"percent_change"`
|
PercentChange float64 `json:"percent_change"`
|
||||||
Severity string `json:"severity"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Improvement represents a performance improvement
|
// Improvement represents a performance improvement
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,13 @@ import (
|
||||||
|
|
||||||
// Profiler provides performance profiling capabilities
|
// Profiler provides performance profiling capabilities
|
||||||
type Profiler struct {
|
type Profiler struct {
|
||||||
|
startTime time.Time
|
||||||
cpuProfile string
|
cpuProfile string
|
||||||
memProfile string
|
memProfile string
|
||||||
traceProfile string
|
traceProfile string
|
||||||
blockProfile string
|
blockProfile string
|
||||||
mutexProfile string
|
mutexProfile string
|
||||||
enabled bool
|
enabled bool
|
||||||
startTime time.Time
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ProfileConfig defines profiling configuration
|
// ProfileConfig defines profiling configuration
|
||||||
|
|
@ -151,12 +151,12 @@ func (p *Profiler) Stop() error {
|
||||||
|
|
||||||
// ProfileAnalysis contains analysis results from profiling data
|
// ProfileAnalysis contains analysis results from profiling data
|
||||||
type ProfileAnalysis struct {
|
type ProfileAnalysis struct {
|
||||||
|
GCStats GCStats `json:"gc_stats"`
|
||||||
TopFunctions []FunctionInfo `json:"top_functions"`
|
TopFunctions []FunctionInfo `json:"top_functions"`
|
||||||
|
Recommendations []string `json:"recommendations"`
|
||||||
MemoryUsage MemoryInfo `json:"memory_usage"`
|
MemoryUsage MemoryInfo `json:"memory_usage"`
|
||||||
GoroutineCount int `json:"goroutine_count"`
|
GoroutineCount int `json:"goroutine_count"`
|
||||||
HeapSize uint64 `json:"heap_size"`
|
HeapSize uint64 `json:"heap_size"`
|
||||||
GCStats GCStats `json:"gc_stats"`
|
|
||||||
Recommendations []string `json:"recommendations"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// FunctionInfo represents profiling information for a function
|
// FunctionInfo represents profiling information for a function
|
||||||
|
|
@ -179,10 +179,10 @@ type MemoryInfo struct {
|
||||||
|
|
||||||
// GCStats contains garbage collection statistics
|
// GCStats contains garbage collection statistics
|
||||||
type GCStats struct {
|
type GCStats struct {
|
||||||
NumGC uint32 `json:"num_gc"`
|
Pause []time.Duration `json:"pauses_ns"`
|
||||||
GCCPUFraction float64 `json:"gc_cpu_fraction"`
|
GCCPUFraction float64 `json:"gc_cpu_fraction"`
|
||||||
PauseTotal time.Duration `json:"pause_total_ns"`
|
PauseTotal time.Duration `json:"pause_total_ns"`
|
||||||
Pause []time.Duration `json:"pauses_ns"`
|
NumGC uint32 `json:"num_gc"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// AnalyzeProfiles analyzes generated profile files and returns insights
|
// AnalyzeProfiles analyzes generated profile files and returns insights
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue