feat: add comprehensive configuration and deployment infrastructure

- Add development and production configuration templates
- Include Docker build files for containerized deployment
- Add Nginx configuration with SSL/TLS setup
- Include environment configuration examples
- Add SSL certificate setup and management
- Configure application schemas and validation
- Support for both local and production deployment scenarios

Provides flexible deployment options from development to production
with proper security, monitoring, and configuration management.
This commit is contained in:
Jeremie Fraeys 2025-12-04 16:54:02 -05:00
parent d225ea1f00
commit 3de1e6e9ab
21 changed files with 1514 additions and 0 deletions

6
.env.dev Normal file
View file

@ -0,0 +1,6 @@
# Development environment variables
REDIS_PASSWORD=JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k=
JWT_SECRET=M/11uD5waf4glbTmFQiqSJaMCtCXTFwxvxRiFZL3GuFQO82PoURsIfFbmzyxrbPJ
L5uc9Qj3Gd3Ijw7/kRMhwA==
GRAFANA_USER=admin
GRAFANA_PASSWORD=pd/UiVYlS+wmXlMmvh6mTw==

63
.env.example Normal file
View file

@ -0,0 +1,63 @@
# Fetch ML Environment Variables
# Copy this file to .env and modify as needed
# Server Configuration
FETCH_ML_HOST=localhost
FETCH_ML_PORT=8080
FETCH_ML_LOG_LEVEL=info
FETCH_ML_LOG_FILE=logs/fetch_ml.log
# Database Configuration
FETCH_ML_DB_TYPE=sqlite
FETCH_ML_DB_PATH=db/fetch_ml.db
# Redis Configuration
FETCH_ML_REDIS_URL=redis://localhost:6379
FETCH_ML_REDIS_PASSWORD=
FETCH_ML_REDIS_DB=0
# Authentication
FETCH_ML_AUTH_ENABLED=true
FETCH_ML_AUTH_CONFIG=configs/config-local.yaml
# Security
FETCH_ML_SECRET_KEY=your-secret-key-here
FETCH_ML_JWT_EXPIRY=24h
# Container Runtime
FETCH_ML_CONTAINER_RUNTIME=podman
FETCH_ML_CONTAINER_REGISTRY=docker.io
# Storage
FETCH_ML_STORAGE_PATH=data
FETCH_ML_RESULTS_PATH=results
FETCH_ML_TEMP_PATH=/tmp/fetch_ml
# Development
FETCH_ML_DEBUG=false
FETCH_ML_DEV_MODE=false
# CLI Configuration (overrides ~/.ml/config.toml)
FETCH_ML_CLI_HOST=localhost
FETCH_ML_CLI_USER=mluser
FETCH_ML_CLI_BASE=/opt/ml
FETCH_ML_CLI_PORT=22
FETCH_ML_CLI_API_KEY=your-api-key-here
# TUI Configuration (overrides TUI config file)
FETCH_ML_TUI_HOST=localhost
FETCH_ML_TUI_USER=mluser
FETCH_ML_TUI_SSH_KEY=~/.ssh/id_rsa
FETCH_ML_TUI_PORT=22
FETCH_ML_TUI_BASE_PATH=/opt/ml
FETCH_ML_TUI_TRAIN_SCRIPT=train.py
FETCH_ML_TUI_REDIS_ADDR=localhost:6379
FETCH_ML_TUI_REDIS_PASSWORD=
FETCH_ML_TUI_REDIS_DB=0
FETCH_ML_TUI_KNOWN_HOSTS=~/.ssh/known_hosts
# Monitoring Security
# Generate with: openssl rand -base64 32
GRAFANA_ADMIN_PASSWORD=changeme-generate-secure-password
REDIS_PASSWORD=changeme-generate-secure-password

30
build/README.md Normal file
View file

@ -0,0 +1,30 @@
# Build Configuration
This directory contains build configurations for containerization.
## Docker
**Location**: `build/docker/`
### Dockerfiles
- **`simple.Dockerfile`** - Lightweight API server image
- **`api-server.Dockerfile`** - Full-featured API server
### Usage
```bash
# Build from project root
docker build -f build/docker/simple.Dockerfile -t fetchml:latest .
# Or use Makefile
make docker-build
```
## Podman
**Location**: `../podman/`
Podman configurations for running ML experiments with GPU support.
**Note**: Not for building - these are runtime configs for experiment execution.

View file

@ -0,0 +1,39 @@
# Development files
.git/
.github/
.windsurf/
*.md
# IDE files
.vscode/
.idea/
*.swp
# Test files
*_test.go
tests/
# Build artifacts
bin/
cli/zig-out/
.zig-cache/
# Runtime data
data/
logs/
experiments/
dump.rdb
# Secrets
*.key
*.pem
secrets/
# Python
__pycache__/
*.pyc
.venv/
# Documentation
docs/
examples/

View file

@ -0,0 +1,71 @@
# Multi-stage build for ML Experiment Manager
FROM golang:1.25-alpine AS go-builder
# Install dependencies
RUN apk add --no-cache git make podman redis
# Set working directory
WORKDIR /app
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY . .
# Build Go binaries
RUN make build
# Zig CLI stage
FROM alpine:3.19 AS zig-builder
# Install dependencies
RUN apk add --no-cache curl xz
# Install Zig
RUN curl -L https://ziglang.org/download/0.15.2/zig-linux-aarch64-0.15.2.tar.xz | tar -xJ -C /opt
ENV PATH="/opt/zig-linux-aarch64-0.15.2:${PATH}"
# Copy CLI source
COPY cli/ /app/cli/
# Build Zig CLI
WORKDIR /app/cli
RUN zig build cross
# Final stage
FROM alpine:3.19
# Install runtime dependencies
RUN apk add --no-cache ca-certificates rsync openssh-client redis
# Create app user
RUN addgroup -g 1001 -S appgroup && \
adduser -u 1001 -S appuser -G appgroup
# Set working directory
WORKDIR /app
# Copy binaries from builders
COPY --from=go-builder /app/bin/ /usr/local/bin/
COPY --from=zig-builder /app/cli/zig-out/bin/ml /usr/local/bin/
# Create directories
RUN mkdir -p /data/ml-experiments /home/appuser/.ml && \
chown -R appuser:appgroup /data /home/appuser
# Switch to app user
USER appuser
# Expose ports
EXPOSE 9100 9101
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wget --no-verbose --tries=1 --spider http://localhost:9100/health || exit 1
# Default command
CMD ["/usr/local/bin/api-server"]

View file

@ -0,0 +1,61 @@
# Simple Dockerfile for homelab use
FROM golang:1.25-alpine AS builder
# Install dependencies
RUN apk add --no-cache git make
# Set working directory
WORKDIR /app
# Copy go mod files
COPY go.mod go.sum ./
# Download dependencies
RUN go mod download
# Copy source code
COPY . .
# Build Go binaries
RUN make build
# Final stage
FROM alpine:3.19
# Install runtime dependencies
RUN apk add --no-cache ca-certificates redis openssl
# Create app user
RUN addgroup -g 1001 -S appgroup && \
adduser -u 1001 -S appuser -G appgroup
# Set working directory
WORKDIR /app
# Copy binaries from builder
COPY --from=builder /app/bin/ /usr/local/bin/
# Copy configs and templates
COPY --from=builder /app/configs/ /app/configs/
COPY --from=builder /app/nginx/ /app/nginx/
# Create necessary directories
RUN mkdir -p /app/data/experiments /app/logs /app/ssl
# Generate SSL certificates for container use
RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
-subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
# Switch to app user
USER appuser
# Expose ports
EXPOSE 9101
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -k -f https://localhost:9101/health || exit 1
# Default command
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]

36
configs/config-dev.yaml Normal file
View file

@ -0,0 +1,36 @@
base_path: "./data/experiments"
auth:
enabled: true
apikeys:
test_user:
hash: "02d4e2b0d8b4869a34511cc01ff1ebbc3cac581a6b361988106eaedca9886a38"
admin: true
roles: ["data_scientist", "admin"]
permissions:
read: true
write: true
delete: true
server:
address: ":9102"
tls:
enabled: false
security:
rate_limit:
enabled: true
requests_per_minute: 60
burst_size: 10
ip_whitelist:
- "127.0.0.1"
- "::1"
- "localhost"
redis:
url: "redis://localhost:6379"
password: "${REDIS_PASSWORD}"
logging:
level: "info"
file: "" # Empty = stderr only (dev mode)

View file

@ -0,0 +1,39 @@
base_path: "/app/data/experiments"
auth:
enabled: true
api_keys:
homelab_user:
hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
admin: true
roles: ["user", "admin"]
permissions:
read: true
write: true
delete: true
server:
address: ":9101"
tls:
enabled: true
cert_file: "/app/ssl/cert.pem"
key_file: "/app/ssl/key.pem"
security:
rate_limit:
enabled: true
requests_per_minute: 30
ip_whitelist:
- "127.0.0.1"
- "::1"
- "192.168.0.0/16"
- "10.0.0.0/8"
redis:
url: "redis://redis:6379"
max_connections: 10
logging:
level: "info"
file: "/app/logs/app.log"
audit_file: "/app/logs/audit.log"

View file

@ -0,0 +1,6 @@
worker_host = "127.0.0.1"
worker_user = "dev_user"
worker_base = "/tmp/ml-experiments"
worker_port = 9101
api_key = "dev_test_api_key_12345"
protocol = "http"

33
configs/config-local.yaml Normal file
View file

@ -0,0 +1,33 @@
auth:
enabled: true
apikeys:
dev_user:
hash: 2baf1f40105d9501fe319a8ec463fdf4325a2a5df445adf3f572f626253678c9
admin: true
roles:
- admin
permissions:
'*': true
server:
address: ":9101"
tls:
enabled: false
security:
rate_limit:
enabled: false
ip_whitelist:
- "127.0.0.1"
- "::1"
- "localhost"
- "10.0.0.0/8"
- "192.168.0.0/16"
- "172.16.0.0/12"
# Prometheus metrics
metrics:
enabled: true
listen_addr: ":9100"
tls:
enabled: false

View file

@ -0,0 +1,27 @@
base_path: "./data/experiments"
auth:
enabled: true
server:
address: ":9102"
tls:
enabled: false
security:
rate_limit:
enabled: true
requests_per_minute: 60
burst_size: 10
ip_whitelist:
- "127.0.0.1"
- "::1"
- "localhost"
redis:
url: "redis://localhost:6379"
password: "${REDIS_PASSWORD}"
logging:
level: "info"
file: "./logs/fetch_ml.log"

53
configs/config-prod.yaml Normal file
View file

@ -0,0 +1,53 @@
base_path: "./data/ml-experiments"
auth:
enabled: true
apikeys:
homelab_user:
hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
admin: true
roles: ["admin"]
permissions:
read: true
write: true
delete: true
server:
address: ":9101"
tls:
enabled: false # Disabled for local testing
cert_file: "./ssl/cert.pem"
key_file: "./ssl/key.pem"
min_version: "1.3"
security:
rate_limit:
enabled: true
requests_per_minute: 60
burst_size: 10
ip_whitelist:
- "127.0.0.1"
- "::1"
- "localhost"
- "10.0.0.0/8"
- "192.168.0.0/16"
- "172.16.0.0/12"
failed_login_lockout:
enabled: true
max_attempts: 5
lockout_duration: "15m"
# SQLite database for production
database:
type: "sqlite"
connection: "data/fetch_ml.db"
redis:
url: "redis://localhost:6379"
addr: "localhost:6379"
password: "JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k="
logging:
level: "info"
file: "logs/fetch_ml.log"
audit_log: "logs/audit.log"

View file

@ -0,0 +1,64 @@
# Fetch ML Configuration Example for PostgreSQL
# This example shows how to configure Fetch ML to use PostgreSQL as the database
auth:
enabled: true
apikeys:
admin:
hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd5f8b6c8b0b4f0b8e3" # "password"
admin: true
roles: ["admin"]
server:
address: ":9101"
tls:
enabled: false
database:
type: "postgres"
host: "localhost"
port: 5432
username: "fetchml"
password: "your_password_here"
database: "fetchml"
# Alternatively, you can use a full connection string:
# connection: "postgres://fetchml:your_password_here@localhost:5432/fetchml?sslmode=disable"
redis:
host: "localhost"
port: 6379
password: ""
db: 0
pool_size: 10
max_retries: 3
logging:
level: "info"
console: true
format: "text"
security:
secret_key: "your-secret-key-here-at-least-16-characters"
jwt_expiry: "24h"
rate_limit:
enabled: false
requests_per_minute: 60
burst_size: 10
containers:
runtime: "podman"
registry: "docker.io"
pull_policy: "missing"
resources:
cpu_limit: "2"
memory_limit: "4Gi"
gpu_limit: 1
storage:
data_path: "data"
results_path: "results"
temp_path: "/tmp/fetch_ml"
cleanup:
enabled: true
max_age_hours: 168
max_size_gb: 10

View file

@ -0,0 +1,66 @@
# Fetch ML Configuration Example
# Copy this file to config.yaml and customize for your environment
auth:
enabled: true
api_keys:
# Example API key (replace with real hashed keys)
admin:
hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
admin: true
roles: ["admin"]
permissions:
"*": true
server:
host: "localhost"
port: 8080
database:
type: "sqlite"
connection: "data/fetch_ml.db"
host: ""
port: 5432
username: ""
password: ""
database: "fetch_ml"
redis:
url: "redis://localhost:6379"
host: "localhost"
port: 6379
password: ""
db: 0
pool_size: 10
max_retries: 3
logging:
level: "info"
file: "logs/fetch_ml.log"
format: "text"
console: true
security:
secret_key: "your-secret-key-at-least-16-chars"
jwt_expiry: "24h"
rate_limit:
enabled: false
requests_per_minute: 60
containers:
runtime: "podman"
registry: "docker.io"
pull_policy: "missing"
resources:
cpu_limit: "2"
memory_limit: "4Gi"
gpu_limit: 1
storage:
data_path: "data"
results_path: "results"
temp_path: "/tmp/fetch_ml"
cleanup:
enabled: true
max_age_hours: 168
max_size_gb: 10

View file

@ -0,0 +1,238 @@
# Fetch ML Configuration Schema (JSON Schema expressed as YAML)
$schema: "http://json-schema.org/draft-07/schema#"
title: "Fetch ML Configuration"
type: object
additionalProperties: false
required:
- auth
- server
properties:
base_path:
type: string
description: Base path for experiment data
auth:
type: object
additionalProperties: false
required:
- enabled
properties:
enabled:
type: boolean
description: Enable or disable authentication
apikeys:
type: object
description: API key registry
additionalProperties:
type: object
additionalProperties: false
required:
- hash
properties:
hash:
type: string
description: SHA256 hash of the API key
admin:
type: boolean
default: false
roles:
type: array
items:
type: string
enum: [admin, data_scientist, data_engineer, viewer, operator]
permissions:
type: object
additionalProperties:
type: boolean
server:
type: object
additionalProperties: false
required: [address]
properties:
address:
type: string
description: Listen address, e.g. ":9101"
tls:
type: object
additionalProperties: false
properties:
enabled:
type: boolean
default: false
cert_file:
type: string
key_file:
type: string
min_version:
type: string
description: Minimum TLS version (e.g. "1.3")
database:
type: object
additionalProperties: false
properties:
type:
type: string
enum: [sqlite, postgres, mysql]
default: sqlite
connection:
type: string
host:
type: string
port:
type: integer
minimum: 1
maximum: 65535
username:
type: string
password:
type: string
database:
type: string
redis:
type: object
additionalProperties: false
properties:
url:
type: string
pattern: "^redis://"
addr:
type: string
description: Optional host:port shorthand for Redis
host:
type: string
default: "localhost"
port:
type: integer
minimum: 1
maximum: 65535
default: 6379
password:
type: string
db:
type: integer
minimum: 0
default: 0
pool_size:
type: integer
minimum: 1
default: 10
max_retries:
type: integer
minimum: 0
default: 3
logging:
type: object
additionalProperties: false
properties:
level:
type: string
enum: [debug, info, warn, error, fatal]
default: "info"
file:
type: string
audit_log:
type: string
format:
type: string
enum: [text, json]
default: "text"
console:
type: boolean
default: true
security:
type: object
additionalProperties: false
properties:
secret_key:
type: string
minLength: 16
jwt_expiry:
type: string
pattern: "^\\d+[smhd]$"
default: "24h"
ip_whitelist:
type: array
items:
type: string
failed_login_lockout:
type: object
additionalProperties: false
properties:
enabled:
type: boolean
max_attempts:
type: integer
minimum: 1
lockout_duration:
type: string
description: Duration string, e.g. "15m"
rate_limit:
type: object
additionalProperties: false
properties:
enabled:
type: boolean
default: false
requests_per_minute:
type: integer
minimum: 1
default: 60
burst_size:
type: integer
minimum: 1
containers:
type: object
additionalProperties: false
properties:
runtime:
type: string
enum: [podman, docker]
default: "podman"
registry:
type: string
default: "docker.io"
pull_policy:
type: string
enum: [always, missing, never]
default: "missing"
resources:
type: object
additionalProperties: false
properties:
cpu_limit:
type: string
description: CPU limit (e.g., "2" or "500m")
memory_limit:
type: string
description: Memory limit (e.g., "1Gi" or "512Mi")
gpu_limit:
type: integer
minimum: 0
storage:
type: object
additionalProperties: false
properties:
data_path:
type: string
default: "data"
results_path:
type: string
default: "results"
temp_path:
type: string
default: "/tmp/fetch_ml"
cleanup:
type: object
additionalProperties: false
properties:
enabled:
type: boolean
default: true
max_age_hours:
type: integer
minimum: 1
default: 168
max_size_gb:
type: integer
minimum: 1
default: 10

View file

@ -0,0 +1,139 @@
# Role-based permissions configuration
# Defines what each role can do in the system
# Permission format: resource:action
# Examples: jobs:create, data:read, users:manage
roles:
admin:
description: "Full system access"
permissions:
- "*"
data_scientist:
description: "ML experiment management"
permissions:
- "jobs:create"
- "jobs:read"
- "jobs:update"
- "jobs:delete:own"
- "data:read"
- "data:create"
- "models:read"
- "models:create"
- "models:update:own"
- "metrics:read"
data_engineer:
description: "Data pipeline and infrastructure"
permissions:
- "data:create"
- "data:read"
- "data:update"
- "data:delete"
- "jobs:read"
- "jobs:update"
- "pipelines:create"
- "pipelines:read"
- "pipelines:update"
- "storage:read"
- "storage:write"
viewer:
description: "Read-only access"
permissions:
- "jobs:read"
- "data:read"
- "models:read"
- "metrics:read"
- "pipelines:read"
operator:
description: "System operations and monitoring"
permissions:
- "jobs:read"
- "jobs:update"
- "jobs:restart"
- "metrics:read"
- "system:read"
- "system:status"
- "logs:read"
# Permission groups for easier management
groups:
ml_developer:
description: "Combined data scientist and data engineer"
inherits:
- data_scientist
- data_engineer
read_only:
description: "Read access to all resources"
permissions:
- "jobs:read"
- "data:read"
- "models:read"
- "pipelines:read"
- "metrics:read"
- "system:read"
# Resource hierarchy for permission inheritance
hierarchy:
jobs:
children:
create: true
read: true
update: true
delete: true
restart: true
special:
own: "User can only access their own resources"
data:
children:
create: true
read: true
update: true
delete: true
upload: true
download: true
models:
children:
create: true
read: true
update: true
delete: true
deploy: true
special:
own: "User can only access their own models"
system:
children:
read: true
status: true
manage: true
config: true
metrics:
children:
read: true
export: true
delete: true
pipelines:
children:
create: true
read: true
update: true
delete: true
run: true
stop: true
# Default permissions for new users
defaults:
new_user_role: "viewer"
admin_users:
- "admin"
- "root"
- "system"

39
configs/worker-prod.toml Normal file
View file

@ -0,0 +1,39 @@
worker_id = "worker-prod-01"
base_path = "/data/ml-experiments"
max_workers = 4
# Redis connection
redis_addr = "localhost:6379"
redis_password = "JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k="
redis_db = 0
# SSH connection (for remote operations)
host = "localhost"
user = "ml-user"
port = 22
ssh_key = "~/.ssh/id_rsa"
# Podman configuration
podman_image = "ml-training:latest"
gpu_access = true
container_workspace = "/workspace"
container_results = "/results"
train_script = "train.py"
# Dataset management
auto_fetch_data = true
data_dir = "/data/datasets"
data_manager_path = "/usr/local/bin/data_manager"
dataset_cache_ttl = "24h"
# Task management
task_lease_duration = "1h"
heartbeat_interval = "30s"
graceful_timeout = "5m"
poll_interval = "100ms"
metrics_flush_interval = "10s"
# Metrics exporter
[metrics]
enabled = true
listen_addr = ":9090"

138
nginx/README.md Normal file
View file

@ -0,0 +1,138 @@
# Nginx Configuration for FetchML
This directory contains nginx configurations for FetchML.
## Files
- **`fetchml-site.conf`** - Ready-to-use site configuration (recommended)
- **`nginx-secure.conf`** - Full standalone nginx config (advanced)
- **`setup-nginx.sh`** - Helper script for easy installation
## Quick Setup
### Option 1: Automated (Recommended)
```bash
sudo ./nginx/setup-nginx.sh
```
This will:
- Detect your nginx setup (Debian or RHEL style)
- Prompt for your domain and SSL certificates
- Install the configuration
- Test and reload nginx
### Option 2: Manual
**For Debian/Ubuntu:**
```bash
# 1. Edit fetchml-site.conf and change:
# - ml.example.com to your domain
# - SSL certificate paths
# - Port if not using 9102
# 2. Install
sudo cp nginx/fetchml-site.conf /etc/nginx/sites-available/fetchml
sudo ln -s /etc/nginx/sites-available/fetchml /etc/nginx/sites-enabled/
# 3. Test and reload
sudo nginx -t
sudo systemctl reload nginx
```
**For RHEL/Rocky/CentOS:**
```bash
# 1. Edit fetchml-site.conf (same as above)
# 2. Install
sudo cp nginx/fetchml-site.conf /etc/nginx/conf.d/fetchml.conf
# 3. Test and reload
sudo nginx -t
sudo systemctl reload nginx
```
## Configuration Details
### Endpoints
- `/ws` - WebSocket API (rate limited: 5 req/s)
- `/api/` - REST API (rate limited: 10 req/s)
- `/health` - Health check
- `/grafana/` - Grafana (commented out by default)
### Security Features
- TLSv1.2 and TLSv1.3 only
- Security headers (HSTS, CSP, etc.)
- Rate limiting per endpoint
- Request size limits (10MB)
- Version hiding
### What to Change
Before using, update these values in `fetchml-site.conf`:
1. **Domain**: Replace `ml.example.com` with your domain
2. **SSL Certificates**: Update paths to your actual certificates
3. **Port**: Change `9102` if using a different port
4. **Grafana**: Uncomment if you want to expose it
## SSL Certificates
### Self-Signed (Dev/Testing)
```bash
sudo openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
-keyout /etc/ssl/private/fetchml.key \
-out /etc/ssl/certs/fetchml.crt \
-subj "/CN=ml.example.com"
```
### Let's Encrypt (Production)
```bash
sudo apt-get install certbot python3-certbot-nginx
sudo certbot --nginx -d ml.example.com
```
## Troubleshooting
### Test Configuration
```bash
sudo nginx -t
```
### Check Logs
```bash
sudo tail -f /var/log/nginx/fetchml_error.log
sudo tail -f /var/log/nginx/fetchml_access.log
```
### Verify Proxy
```bash
curl -I https://ml.example.com/health
```
### Common Issues
**"Permission denied" error**: Check that nginx user can access SSL certificates
```bash
sudo chmod 644 /etc/ssl/certs/fetchml.crt
sudo chmod 600 /etc/ssl/private/fetchml.key
```
**WebSocket not working**: Ensure your firewall allows the connection and backend is running
```bash
# Check backend
curl http://localhost:9102/health
# Check firewall
sudo firewall-cmd --list-all
```
## Integration with Existing Nginx
If you already have nginx running, just drop `fetchml-site.conf` into your sites directory. It won't conflict with other sites.
The configuration is self-contained and only handles the specified `server_name`.

100
nginx/fetchml-site.conf Normal file
View file

@ -0,0 +1,100 @@
# FetchML Nginx Site Configuration
# Drop this file into /etc/nginx/sites-available/fetchml
# Then: sudo ln -s /etc/nginx/sites-available/fetchml /etc/nginx/sites-enabled/
# Test: sudo nginx -t
# Reload: sudo systemctl reload nginx
server {
listen 80;
server_name ml.example.com; # CHANGE THIS to your domain
# Redirect HTTP to HTTPS
return 301 https://$server_name$request_uri;
}
server {
listen 443 ssl http2;
server_name ml.example.com; # CHANGE THIS to your domain
# SSL Configuration
# CHANGE THESE paths to your actual SSL certificates
ssl_certificate /etc/ssl/certs/ml.example.com.crt;
ssl_certificate_key /etc/ssl/private/ml.example.com.key;
# Modern SSL settings
ssl_protocols TLSv1.3 TLSv1.2;
ssl_prefer_server_ciphers on;
ssl_ciphers ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305;
ssl_session_timeout 1d;
ssl_session_cache shared:MozSSL:10m;
ssl_session_tickets off;
# Security headers
add_header X-Frame-Options DENY always;
add_header X-Content-Type-Options nosniff always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
# Hide nginx version
server_tokens off;
# Rate limiting for API
limit_req_zone $binary_remote_addr zone=fetchml_api:10m rate=10r/s;
limit_req_zone $binary_remote_addr zone=fetchml_ws:10m rate=5r/s;
# Client limits
client_max_body_size 10M;
client_body_timeout 12s;
client_header_timeout 12s;
# Logging
access_log /var/log/nginx/fetchml_access.log;
error_log /var/log/nginx/fetchml_error.log warn;
# WebSocket endpoint
location /ws {
limit_req zone=fetchml_ws burst=10 nodelay;
proxy_pass http://localhost:9102; # CHANGE PORT if needed
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket timeouts
proxy_connect_timeout 7d;
proxy_send_timeout 7d;
proxy_read_timeout 7d;
}
# API endpoints
location /api/ {
limit_req zone=fetchml_api burst=20 nodelay;
proxy_pass http://localhost:9102; # CHANGE PORT if needed
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-API-Key $http_x_api_key;
}
# Health check
location /health {
proxy_pass http://localhost:9102; # CHANGE PORT if needed
proxy_set_header Host $host;
access_log off;
}
# Grafana (optional - only if you want to expose it)
# Uncomment if you want Grafana accessible via nginx
# location /grafana/ {
# proxy_pass http://localhost:3000/;
# proxy_set_header Host $host;
# proxy_set_header X-Real-IP $remote_addr;
# }
}

157
nginx/nginx-secure.conf Normal file
View file

@ -0,0 +1,157 @@
events {
worker_connections 1024;
}
http {
# Security headers
add_header X-Frame-Options DENY always;
add_header X-Content-Type-Options nosniff always;
add_header X-XSS-Protection "1; mode=block" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'" always;
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always;
# Hide server version
server_tokens off;
# Rate limiting
limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
limit_req_zone $binary_remote_addr zone=ws:10m rate=5r/s;
# Connection limiting
limit_conn_zone $binary_remote_addr zone=conn_limit_per_ip:10m;
# Logging
log_format security '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" '
'$request_time $upstream_response_time';
access_log /var/log/nginx/security.log security;
error_log /var/log/nginx/error.log warn;
# Redirect HTTP to HTTPS
server {
listen 80;
server_name _;
return 301 https://$host$request_uri;
}
# HTTPS server
server {
listen 443 ssl http2;
server_name ml-experiments.example.com;
# SSL configuration
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
ssl_trusted_certificate /etc/nginx/ssl/ca.pem;
# Modern SSL configuration
ssl_protocols TLSv1.3;
ssl_prefer_server_ciphers on;
ssl_ciphers ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305;
ssl_session_timeout 1d;
ssl_session_cache shared:SSL:50m;
ssl_session_tickets off;
# OCSP stapling
ssl_stapling on;
ssl_stapling_verify on;
# Security limits
client_max_body_size 10M;
client_body_timeout 12s;
client_header_timeout 12s;
keepalive_timeout 15s;
send_timeout 10s;
limit_conn conn_limit_per_ip 20;
# API endpoints
location /health {
proxy_pass https://api-server:9101;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
# WebSocket endpoint with special rate limiting
location /ws {
limit_req zone=ws burst=10 nodelay;
proxy_pass https://api-server:9101;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_connect_timeout 7d;
proxy_send_timeout 7d;
proxy_read_timeout 7d;
# WebSocket specific headers
proxy_set_header Sec-WebSocket-Key $http_sec_websocket_key;
proxy_set_header Sec-WebSocket-Protocol $http_sec_websocket_protocol;
proxy_set_header Sec-WebSocket-Version $http_sec_websocket_version;
}
# API endpoints with rate limiting
location /api/ {
limit_req zone=api burst=20 nodelay;
proxy_pass https://api-server:9101;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-API-Key $http_x_api_key;
proxy_connect_timeout 5s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
# Deny all other locations
location / {
return 404;
}
# Security monitoring endpoints (admin only)
location /admin/ {
# IP whitelist for admin access
allow 10.0.0.0/8;
allow 192.168.0.0/16;
allow 172.16.0.0/12;
deny all;
proxy_pass https://api-server:9101;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Health check for load balancers
location /lb-health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
}
# Default server to catch unknown hosts
server {
listen 443 ssl http2 default_server;
server_name _;
ssl_certificate /etc/nginx/ssl/cert.pem;
ssl_certificate_key /etc/nginx/ssl/key.pem;
return 444;
}
}

109
nginx/setup-nginx.sh Executable file
View file

@ -0,0 +1,109 @@
#!/bin/bash
# Nginx Setup Helper for FetchML
# This script helps integrate FetchML into an existing nginx setup
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SITE_CONFIG="$SCRIPT_DIR/fetchml-site.conf"
# Colors
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m'
echo -e "${BLUE}FetchML Nginx Setup Helper${NC}"
echo ""
# Check if nginx is installed
if ! command -v nginx &>/dev/null; then
echo -e "${YELLOW}Nginx is not installed.${NC}"
echo "Install with:"
echo " Ubuntu/Debian: sudo apt-get install nginx"
echo " RHEL/Rocky: sudo dnf install nginx"
exit 1
fi
# Detect nginx config structure
if [ -d "/etc/nginx/sites-available" ]; then
# Debian/Ubuntu style
SITES_AVAILABLE="/etc/nginx/sites-available"
SITES_ENABLED="/etc/nginx/sites-enabled"
STYLE="debian"
elif [ -d "/etc/nginx/conf.d" ]; then
# RHEL/CentOS style
SITES_AVAILABLE="/etc/nginx/conf.d"
SITES_ENABLED=""
STYLE="rhel"
else
echo -e "${YELLOW}Could not detect nginx configuration directory.${NC}"
echo "Please manually copy $SITE_CONFIG to your nginx config directory."
exit 1
fi
echo "Detected nginx style: $STYLE"
echo ""
# Read values
read -p "Enter your domain name (e.g., ml.example.com): " domain
read -p "Enter API server port [9102]: " port
port=${port:-9102}
read -p "Enter SSL certificate path: " cert_path
read -p "Enter SSL key path: " key_path
# Create temp config with substitutions
temp_config="/tmp/fetchml-site.conf"
sed -e "s|ml\.example\.com|$domain|g" \
-e "s|localhost:9102|localhost:$port|g" \
-e "s|/etc/ssl/certs/ml\.example\.com\.crt|$cert_path|g" \
-e "s|/etc/ssl/private/ml\.example\.com\.key|$key_path|g" \
"$SITE_CONFIG" > "$temp_config"
# Install config
echo ""
echo -e "${BLUE}Installing nginx configuration...${NC}"
if [ "$STYLE" = "debian" ]; then
sudo cp "$temp_config" "$SITES_AVAILABLE/fetchml"
sudo ln -sf "$SITES_AVAILABLE/fetchml" "$SITES_ENABLED/fetchml"
echo -e "${GREEN}${NC} Config installed to $SITES_AVAILABLE/fetchml"
echo -e "${GREEN}${NC} Symlink created in $SITES_ENABLED/"
else
sudo cp "$temp_config" "$SITES_AVAILABLE/fetchml.conf"
echo -e "${GREEN}${NC} Config installed to $SITES_AVAILABLE/fetchml.conf"
fi
# Test nginx config
echo ""
echo -e "${BLUE}Testing nginx configuration...${NC}"
if sudo nginx -t; then
echo -e "${GREEN}${NC} Nginx configuration is valid"
# Offer to reload
read -p "Reload nginx now? [y/N]: " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
sudo systemctl reload nginx
echo -e "${GREEN}${NC} Nginx reloaded"
else
echo "Reload later with: sudo systemctl reload nginx"
fi
else
echo -e "${YELLOW}!${NC} Nginx configuration test failed"
echo "Please fix the errors and run: sudo nginx -t"
fi
# Cleanup
rm -f "$temp_config"
echo ""
echo -e "${GREEN}Setup complete!${NC}"
echo ""
echo "Your site is configured for: https://$domain"
echo ""
echo "Next steps:"
echo " 1. Ensure your DNS points to this server"
echo " 2. Start FetchML API server on port $port"
echo " 3. Visit https://$domain/health to test"