diff --git a/.env.dev b/.env.dev new file mode 100644 index 0000000..dd9b9bc --- /dev/null +++ b/.env.dev @@ -0,0 +1,6 @@ +# Development environment variables +REDIS_PASSWORD=JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k= +JWT_SECRET=M/11uD5waf4glbTmFQiqSJaMCtCXTFwxvxRiFZL3GuFQO82PoURsIfFbmzyxrbPJ +L5uc9Qj3Gd3Ijw7/kRMhwA== +GRAFANA_USER=admin +GRAFANA_PASSWORD=pd/UiVYlS+wmXlMmvh6mTw== diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..89ee812 --- /dev/null +++ b/.env.example @@ -0,0 +1,63 @@ +# Fetch ML Environment Variables +# Copy this file to .env and modify as needed + +# Server Configuration +FETCH_ML_HOST=localhost +FETCH_ML_PORT=8080 +FETCH_ML_LOG_LEVEL=info +FETCH_ML_LOG_FILE=logs/fetch_ml.log + +# Database Configuration +FETCH_ML_DB_TYPE=sqlite +FETCH_ML_DB_PATH=db/fetch_ml.db + +# Redis Configuration +FETCH_ML_REDIS_URL=redis://localhost:6379 +FETCH_ML_REDIS_PASSWORD= +FETCH_ML_REDIS_DB=0 + +# Authentication +FETCH_ML_AUTH_ENABLED=true +FETCH_ML_AUTH_CONFIG=configs/config-local.yaml + +# Security +FETCH_ML_SECRET_KEY=your-secret-key-here +FETCH_ML_JWT_EXPIRY=24h + +# Container Runtime +FETCH_ML_CONTAINER_RUNTIME=podman +FETCH_ML_CONTAINER_REGISTRY=docker.io + +# Storage +FETCH_ML_STORAGE_PATH=data +FETCH_ML_RESULTS_PATH=results +FETCH_ML_TEMP_PATH=/tmp/fetch_ml + +# Development +FETCH_ML_DEBUG=false +FETCH_ML_DEV_MODE=false + +# CLI Configuration (overrides ~/.ml/config.toml) +FETCH_ML_CLI_HOST=localhost +FETCH_ML_CLI_USER=mluser +FETCH_ML_CLI_BASE=/opt/ml +FETCH_ML_CLI_PORT=22 +FETCH_ML_CLI_API_KEY=your-api-key-here + +# TUI Configuration (overrides TUI config file) +FETCH_ML_TUI_HOST=localhost +FETCH_ML_TUI_USER=mluser +FETCH_ML_TUI_SSH_KEY=~/.ssh/id_rsa +FETCH_ML_TUI_PORT=22 +FETCH_ML_TUI_BASE_PATH=/opt/ml +FETCH_ML_TUI_TRAIN_SCRIPT=train.py +FETCH_ML_TUI_REDIS_ADDR=localhost:6379 +FETCH_ML_TUI_REDIS_PASSWORD= +FETCH_ML_TUI_REDIS_DB=0 +FETCH_ML_TUI_KNOWN_HOSTS=~/.ssh/known_hosts + +# Monitoring Security +# Generate with: openssl rand -base64 32 +GRAFANA_ADMIN_PASSWORD=changeme-generate-secure-password +REDIS_PASSWORD=changeme-generate-secure-password + diff --git a/build/README.md b/build/README.md new file mode 100644 index 0000000..f5c18fc --- /dev/null +++ b/build/README.md @@ -0,0 +1,30 @@ +# Build Configuration + +This directory contains build configurations for containerization. + +## Docker + +**Location**: `build/docker/` + +### Dockerfiles + +- **`simple.Dockerfile`** - Lightweight API server image +- **`api-server.Dockerfile`** - Full-featured API server + +### Usage + +```bash +# Build from project root +docker build -f build/docker/simple.Dockerfile -t fetchml:latest . + +# Or use Makefile +make docker-build +``` + +## Podman + +**Location**: `../podman/` + +Podman configurations for running ML experiments with GPU support. + +**Note**: Not for building - these are runtime configs for experiment execution. diff --git a/build/docker/.dockerignore b/build/docker/.dockerignore new file mode 100644 index 0000000..4707837 --- /dev/null +++ b/build/docker/.dockerignore @@ -0,0 +1,39 @@ +# Development files +.git/ +.github/ +.windsurf/ +*.md + +# IDE files +.vscode/ +.idea/ +*.swp + +# Test files +*_test.go +tests/ + +# Build artifacts +bin/ +cli/zig-out/ +.zig-cache/ + +# Runtime data +data/ +logs/ +experiments/ +dump.rdb + +# Secrets +*.key +*.pem +secrets/ + +# Python +__pycache__/ +*.pyc +.venv/ + +# Documentation +docs/ +examples/ diff --git a/build/docker/api-server.Dockerfile b/build/docker/api-server.Dockerfile new file mode 100644 index 0000000..f9e93c5 --- /dev/null +++ b/build/docker/api-server.Dockerfile @@ -0,0 +1,71 @@ +# Multi-stage build for ML Experiment Manager +FROM golang:1.25-alpine AS go-builder + +# Install dependencies +RUN apk add --no-cache git make podman redis + +# Set working directory +WORKDIR /app + +# Copy go mod files +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy source code +COPY . . + +# Build Go binaries +RUN make build + +# Zig CLI stage +FROM alpine:3.19 AS zig-builder + +# Install dependencies +RUN apk add --no-cache curl xz + +# Install Zig +RUN curl -L https://ziglang.org/download/0.15.2/zig-linux-aarch64-0.15.2.tar.xz | tar -xJ -C /opt +ENV PATH="/opt/zig-linux-aarch64-0.15.2:${PATH}" + +# Copy CLI source +COPY cli/ /app/cli/ + +# Build Zig CLI +WORKDIR /app/cli +RUN zig build cross + +# Final stage +FROM alpine:3.19 + +# Install runtime dependencies +RUN apk add --no-cache ca-certificates rsync openssh-client redis + +# Create app user +RUN addgroup -g 1001 -S appgroup && \ + adduser -u 1001 -S appuser -G appgroup + +# Set working directory +WORKDIR /app + +# Copy binaries from builders +COPY --from=go-builder /app/bin/ /usr/local/bin/ +COPY --from=zig-builder /app/cli/zig-out/bin/ml /usr/local/bin/ + +# Create directories +RUN mkdir -p /data/ml-experiments /home/appuser/.ml && \ + chown -R appuser:appgroup /data /home/appuser + +# Switch to app user +USER appuser + +# Expose ports +EXPOSE 9100 9101 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:9100/health || exit 1 + +# Default command +CMD ["/usr/local/bin/api-server"] diff --git a/build/docker/simple.Dockerfile b/build/docker/simple.Dockerfile new file mode 100644 index 0000000..1364e08 --- /dev/null +++ b/build/docker/simple.Dockerfile @@ -0,0 +1,61 @@ +# Simple Dockerfile for homelab use +FROM golang:1.25-alpine AS builder + +# Install dependencies +RUN apk add --no-cache git make + +# Set working directory +WORKDIR /app + +# Copy go mod files +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy source code +COPY . . + +# Build Go binaries +RUN make build + +# Final stage +FROM alpine:3.19 + +# Install runtime dependencies +RUN apk add --no-cache ca-certificates redis openssl + +# Create app user +RUN addgroup -g 1001 -S appgroup && \ + adduser -u 1001 -S appuser -G appgroup + +# Set working directory +WORKDIR /app + +# Copy binaries from builder +COPY --from=builder /app/bin/ /usr/local/bin/ + +# Copy configs and templates +COPY --from=builder /app/configs/ /app/configs/ +COPY --from=builder /app/nginx/ /app/nginx/ + +# Create necessary directories +RUN mkdir -p /app/data/experiments /app/logs /app/ssl + +# Generate SSL certificates for container use +RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \ + -subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \ + chmod 644 /app/ssl/cert.pem /app/ssl/key.pem + +# Switch to app user +USER appuser + +# Expose ports +EXPOSE 9101 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \ + CMD curl -k -f https://localhost:9101/health || exit 1 + +# Default command +CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"] diff --git a/configs/config-dev.yaml b/configs/config-dev.yaml new file mode 100644 index 0000000..a32b6d8 --- /dev/null +++ b/configs/config-dev.yaml @@ -0,0 +1,36 @@ +base_path: "./data/experiments" + +auth: + enabled: true + apikeys: + test_user: + hash: "02d4e2b0d8b4869a34511cc01ff1ebbc3cac581a6b361988106eaedca9886a38" + admin: true + roles: ["data_scientist", "admin"] + permissions: + read: true + write: true + delete: true + +server: + address: ":9102" + tls: + enabled: false + +security: + rate_limit: + enabled: true + requests_per_minute: 60 + burst_size: 10 + ip_whitelist: + - "127.0.0.1" + - "::1" + - "localhost" + +redis: + url: "redis://localhost:6379" + password: "${REDIS_PASSWORD}" + +logging: + level: "info" + file: "" # Empty = stderr only (dev mode) diff --git a/configs/config-docker.yaml b/configs/config-docker.yaml new file mode 100644 index 0000000..5583d29 --- /dev/null +++ b/configs/config-docker.yaml @@ -0,0 +1,39 @@ +base_path: "/app/data/experiments" + +auth: + enabled: true + api_keys: + homelab_user: + hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password" + admin: true + roles: ["user", "admin"] + permissions: + read: true + write: true + delete: true + +server: + address: ":9101" + tls: + enabled: true + cert_file: "/app/ssl/cert.pem" + key_file: "/app/ssl/key.pem" + +security: + rate_limit: + enabled: true + requests_per_minute: 30 + ip_whitelist: + - "127.0.0.1" + - "::1" + - "192.168.0.0/16" + - "10.0.0.0/8" + +redis: + url: "redis://redis:6379" + max_connections: 10 + +logging: + level: "info" + file: "/app/logs/app.log" + audit_file: "/app/logs/audit.log" diff --git a/configs/config-local.toml b/configs/config-local.toml new file mode 100644 index 0000000..d30a2c2 --- /dev/null +++ b/configs/config-local.toml @@ -0,0 +1,6 @@ +worker_host = "127.0.0.1" +worker_user = "dev_user" +worker_base = "/tmp/ml-experiments" +worker_port = 9101 +api_key = "dev_test_api_key_12345" +protocol = "http" diff --git a/configs/config-local.yaml b/configs/config-local.yaml new file mode 100644 index 0000000..4cca3a8 --- /dev/null +++ b/configs/config-local.yaml @@ -0,0 +1,33 @@ +auth: + enabled: true + apikeys: + dev_user: + hash: 2baf1f40105d9501fe319a8ec463fdf4325a2a5df445adf3f572f626253678c9 + admin: true + roles: + - admin + permissions: + '*': true + +server: + address: ":9101" + tls: + enabled: false + +security: + rate_limit: + enabled: false + ip_whitelist: + - "127.0.0.1" + - "::1" + - "localhost" + - "10.0.0.0/8" + - "192.168.0.0/16" + - "172.16.0.0/12" + +# Prometheus metrics +metrics: + enabled: true + listen_addr: ":9100" + tls: + enabled: false diff --git a/configs/config-no-tls.yaml b/configs/config-no-tls.yaml new file mode 100644 index 0000000..b796844 --- /dev/null +++ b/configs/config-no-tls.yaml @@ -0,0 +1,27 @@ +base_path: "./data/experiments" + +auth: + enabled: true + +server: + address: ":9102" + tls: + enabled: false + +security: + rate_limit: + enabled: true + requests_per_minute: 60 + burst_size: 10 + ip_whitelist: + - "127.0.0.1" + - "::1" + - "localhost" + +redis: + url: "redis://localhost:6379" + password: "${REDIS_PASSWORD}" + +logging: + level: "info" + file: "./logs/fetch_ml.log" diff --git a/configs/config-prod.yaml b/configs/config-prod.yaml new file mode 100644 index 0000000..c1ec0bd --- /dev/null +++ b/configs/config-prod.yaml @@ -0,0 +1,53 @@ +base_path: "./data/ml-experiments" + +auth: + enabled: true + apikeys: + homelab_user: + hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password" + admin: true + roles: ["admin"] + permissions: + read: true + write: true + delete: true + +server: + address: ":9101" + tls: + enabled: false # Disabled for local testing + cert_file: "./ssl/cert.pem" + key_file: "./ssl/key.pem" + min_version: "1.3" + +security: + rate_limit: + enabled: true + requests_per_minute: 60 + burst_size: 10 + ip_whitelist: + - "127.0.0.1" + - "::1" + - "localhost" + - "10.0.0.0/8" + - "192.168.0.0/16" + - "172.16.0.0/12" + failed_login_lockout: + enabled: true + max_attempts: 5 + lockout_duration: "15m" + +# SQLite database for production +database: + type: "sqlite" + connection: "data/fetch_ml.db" + +redis: + url: "redis://localhost:6379" + addr: "localhost:6379" + password: "JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k=" + +logging: + level: "info" + file: "logs/fetch_ml.log" + audit_log: "logs/audit.log" diff --git a/configs/examples/config-postgres.yaml b/configs/examples/config-postgres.yaml new file mode 100644 index 0000000..35cf082 --- /dev/null +++ b/configs/examples/config-postgres.yaml @@ -0,0 +1,64 @@ +# Fetch ML Configuration Example for PostgreSQL +# This example shows how to configure Fetch ML to use PostgreSQL as the database + +auth: + enabled: true + apikeys: + admin: + hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd5f8b6c8b0b4f0b8e3" # "password" + admin: true + roles: ["admin"] + +server: + address: ":9101" + tls: + enabled: false + +database: + type: "postgres" + host: "localhost" + port: 5432 + username: "fetchml" + password: "your_password_here" + database: "fetchml" + # Alternatively, you can use a full connection string: + # connection: "postgres://fetchml:your_password_here@localhost:5432/fetchml?sslmode=disable" + +redis: + host: "localhost" + port: 6379 + password: "" + db: 0 + pool_size: 10 + max_retries: 3 + +logging: + level: "info" + console: true + format: "text" + +security: + secret_key: "your-secret-key-here-at-least-16-characters" + jwt_expiry: "24h" + rate_limit: + enabled: false + requests_per_minute: 60 + burst_size: 10 + +containers: + runtime: "podman" + registry: "docker.io" + pull_policy: "missing" + resources: + cpu_limit: "2" + memory_limit: "4Gi" + gpu_limit: 1 + +storage: + data_path: "data" + results_path: "results" + temp_path: "/tmp/fetch_ml" + cleanup: + enabled: true + max_age_hours: 168 + max_size_gb: 10 diff --git a/configs/examples/config.yaml.example b/configs/examples/config.yaml.example new file mode 100644 index 0000000..c467c5a --- /dev/null +++ b/configs/examples/config.yaml.example @@ -0,0 +1,66 @@ +# Fetch ML Configuration Example +# Copy this file to config.yaml and customize for your environment + +auth: + enabled: true + api_keys: + # Example API key (replace with real hashed keys) + admin: + hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password" + admin: true + roles: ["admin"] + permissions: + "*": true + +server: + host: "localhost" + port: 8080 + +database: + type: "sqlite" + connection: "data/fetch_ml.db" + host: "" + port: 5432 + username: "" + password: "" + database: "fetch_ml" + +redis: + url: "redis://localhost:6379" + host: "localhost" + port: 6379 + password: "" + db: 0 + pool_size: 10 + max_retries: 3 + +logging: + level: "info" + file: "logs/fetch_ml.log" + format: "text" + console: true + +security: + secret_key: "your-secret-key-at-least-16-chars" + jwt_expiry: "24h" + rate_limit: + enabled: false + requests_per_minute: 60 + +containers: + runtime: "podman" + registry: "docker.io" + pull_policy: "missing" + resources: + cpu_limit: "2" + memory_limit: "4Gi" + gpu_limit: 1 + +storage: + data_path: "data" + results_path: "results" + temp_path: "/tmp/fetch_ml" + cleanup: + enabled: true + max_age_hours: 168 + max_size_gb: 10 diff --git a/configs/schema/config_schema.yaml b/configs/schema/config_schema.yaml new file mode 100644 index 0000000..feef868 --- /dev/null +++ b/configs/schema/config_schema.yaml @@ -0,0 +1,238 @@ +# Fetch ML Configuration Schema (JSON Schema expressed as YAML) + +$schema: "http://json-schema.org/draft-07/schema#" +title: "Fetch ML Configuration" +type: object +additionalProperties: false +required: + - auth + - server +properties: + base_path: + type: string + description: Base path for experiment data + auth: + type: object + additionalProperties: false + required: + - enabled + properties: + enabled: + type: boolean + description: Enable or disable authentication + apikeys: + type: object + description: API key registry + additionalProperties: + type: object + additionalProperties: false + required: + - hash + properties: + hash: + type: string + description: SHA256 hash of the API key + admin: + type: boolean + default: false + roles: + type: array + items: + type: string + enum: [admin, data_scientist, data_engineer, viewer, operator] + permissions: + type: object + additionalProperties: + type: boolean + server: + type: object + additionalProperties: false + required: [address] + properties: + address: + type: string + description: Listen address, e.g. ":9101" + tls: + type: object + additionalProperties: false + properties: + enabled: + type: boolean + default: false + cert_file: + type: string + key_file: + type: string + min_version: + type: string + description: Minimum TLS version (e.g. "1.3") + database: + type: object + additionalProperties: false + properties: + type: + type: string + enum: [sqlite, postgres, mysql] + default: sqlite + connection: + type: string + host: + type: string + port: + type: integer + minimum: 1 + maximum: 65535 + username: + type: string + password: + type: string + database: + type: string + redis: + type: object + additionalProperties: false + properties: + url: + type: string + pattern: "^redis://" + addr: + type: string + description: Optional host:port shorthand for Redis + host: + type: string + default: "localhost" + port: + type: integer + minimum: 1 + maximum: 65535 + default: 6379 + password: + type: string + db: + type: integer + minimum: 0 + default: 0 + pool_size: + type: integer + minimum: 1 + default: 10 + max_retries: + type: integer + minimum: 0 + default: 3 + logging: + type: object + additionalProperties: false + properties: + level: + type: string + enum: [debug, info, warn, error, fatal] + default: "info" + file: + type: string + audit_log: + type: string + format: + type: string + enum: [text, json] + default: "text" + console: + type: boolean + default: true + security: + type: object + additionalProperties: false + properties: + secret_key: + type: string + minLength: 16 + jwt_expiry: + type: string + pattern: "^\\d+[smhd]$" + default: "24h" + ip_whitelist: + type: array + items: + type: string + failed_login_lockout: + type: object + additionalProperties: false + properties: + enabled: + type: boolean + max_attempts: + type: integer + minimum: 1 + lockout_duration: + type: string + description: Duration string, e.g. "15m" + rate_limit: + type: object + additionalProperties: false + properties: + enabled: + type: boolean + default: false + requests_per_minute: + type: integer + minimum: 1 + default: 60 + burst_size: + type: integer + minimum: 1 + containers: + type: object + additionalProperties: false + properties: + runtime: + type: string + enum: [podman, docker] + default: "podman" + registry: + type: string + default: "docker.io" + pull_policy: + type: string + enum: [always, missing, never] + default: "missing" + resources: + type: object + additionalProperties: false + properties: + cpu_limit: + type: string + description: CPU limit (e.g., "2" or "500m") + memory_limit: + type: string + description: Memory limit (e.g., "1Gi" or "512Mi") + gpu_limit: + type: integer + minimum: 0 + storage: + type: object + additionalProperties: false + properties: + data_path: + type: string + default: "data" + results_path: + type: string + default: "results" + temp_path: + type: string + default: "/tmp/fetch_ml" + cleanup: + type: object + additionalProperties: false + properties: + enabled: + type: boolean + default: true + max_age_hours: + type: integer + minimum: 1 + default: 168 + max_size_gb: + type: integer + minimum: 1 + default: 10 diff --git a/configs/schema/permissions.yaml b/configs/schema/permissions.yaml new file mode 100644 index 0000000..ffcaaf6 --- /dev/null +++ b/configs/schema/permissions.yaml @@ -0,0 +1,139 @@ +# Role-based permissions configuration +# Defines what each role can do in the system + +# Permission format: resource:action +# Examples: jobs:create, data:read, users:manage + +roles: + admin: + description: "Full system access" + permissions: + - "*" + + data_scientist: + description: "ML experiment management" + permissions: + - "jobs:create" + - "jobs:read" + - "jobs:update" + - "jobs:delete:own" + - "data:read" + - "data:create" + - "models:read" + - "models:create" + - "models:update:own" + - "metrics:read" + + data_engineer: + description: "Data pipeline and infrastructure" + permissions: + - "data:create" + - "data:read" + - "data:update" + - "data:delete" + - "jobs:read" + - "jobs:update" + - "pipelines:create" + - "pipelines:read" + - "pipelines:update" + - "storage:read" + - "storage:write" + + viewer: + description: "Read-only access" + permissions: + - "jobs:read" + - "data:read" + - "models:read" + - "metrics:read" + - "pipelines:read" + + operator: + description: "System operations and monitoring" + permissions: + - "jobs:read" + - "jobs:update" + - "jobs:restart" + - "metrics:read" + - "system:read" + - "system:status" + - "logs:read" + +# Permission groups for easier management +groups: + ml_developer: + description: "Combined data scientist and data engineer" + inherits: + - data_scientist + - data_engineer + + read_only: + description: "Read access to all resources" + permissions: + - "jobs:read" + - "data:read" + - "models:read" + - "pipelines:read" + - "metrics:read" + - "system:read" + +# Resource hierarchy for permission inheritance +hierarchy: + jobs: + children: + create: true + read: true + update: true + delete: true + restart: true + special: + own: "User can only access their own resources" + + data: + children: + create: true + read: true + update: true + delete: true + upload: true + download: true + + models: + children: + create: true + read: true + update: true + delete: true + deploy: true + special: + own: "User can only access their own models" + + system: + children: + read: true + status: true + manage: true + config: true + + metrics: + children: + read: true + export: true + delete: true + + pipelines: + children: + create: true + read: true + update: true + delete: true + run: true + stop: true + +# Default permissions for new users +defaults: + new_user_role: "viewer" + admin_users: + - "admin" + - "root" + - "system" diff --git a/configs/worker-prod.toml b/configs/worker-prod.toml new file mode 100644 index 0000000..b7ff9c9 --- /dev/null +++ b/configs/worker-prod.toml @@ -0,0 +1,39 @@ +worker_id = "worker-prod-01" +base_path = "/data/ml-experiments" +max_workers = 4 + +# Redis connection +redis_addr = "localhost:6379" +redis_password = "JZVd2Y6IDaLNaYLBOFgQ7ae4Ox5t37NTIyPMQlLJD4k=" +redis_db = 0 + +# SSH connection (for remote operations) +host = "localhost" +user = "ml-user" +port = 22 +ssh_key = "~/.ssh/id_rsa" + +# Podman configuration +podman_image = "ml-training:latest" +gpu_access = true +container_workspace = "/workspace" +container_results = "/results" +train_script = "train.py" + +# Dataset management +auto_fetch_data = true +data_dir = "/data/datasets" +data_manager_path = "/usr/local/bin/data_manager" +dataset_cache_ttl = "24h" + +# Task management +task_lease_duration = "1h" +heartbeat_interval = "30s" +graceful_timeout = "5m" +poll_interval = "100ms" +metrics_flush_interval = "10s" + +# Metrics exporter +[metrics] +enabled = true +listen_addr = ":9090" diff --git a/nginx/README.md b/nginx/README.md new file mode 100644 index 0000000..d9419f9 --- /dev/null +++ b/nginx/README.md @@ -0,0 +1,138 @@ +# Nginx Configuration for FetchML + +This directory contains nginx configurations for FetchML. + +## Files + +- **`fetchml-site.conf`** - Ready-to-use site configuration (recommended) +- **`nginx-secure.conf`** - Full standalone nginx config (advanced) +- **`setup-nginx.sh`** - Helper script for easy installation + +## Quick Setup + +### Option 1: Automated (Recommended) + +```bash +sudo ./nginx/setup-nginx.sh +``` + +This will: +- Detect your nginx setup (Debian or RHEL style) +- Prompt for your domain and SSL certificates +- Install the configuration +- Test and reload nginx + +### Option 2: Manual + +**For Debian/Ubuntu:** +```bash +# 1. Edit fetchml-site.conf and change: +# - ml.example.com to your domain +# - SSL certificate paths +# - Port if not using 9102 + +# 2. Install +sudo cp nginx/fetchml-site.conf /etc/nginx/sites-available/fetchml +sudo ln -s /etc/nginx/sites-available/fetchml /etc/nginx/sites-enabled/ + +# 3. Test and reload +sudo nginx -t +sudo systemctl reload nginx +``` + +**For RHEL/Rocky/CentOS:** +```bash +# 1. Edit fetchml-site.conf (same as above) + +# 2. Install +sudo cp nginx/fetchml-site.conf /etc/nginx/conf.d/fetchml.conf + +# 3. Test and reload +sudo nginx -t +sudo systemctl reload nginx +``` + +## Configuration Details + +### Endpoints + +- `/ws` - WebSocket API (rate limited: 5 req/s) +- `/api/` - REST API (rate limited: 10 req/s) +- `/health` - Health check +- `/grafana/` - Grafana (commented out by default) + +### Security Features + +- TLSv1.2 and TLSv1.3 only +- Security headers (HSTS, CSP, etc.) +- Rate limiting per endpoint +- Request size limits (10MB) +- Version hiding + +### What to Change + +Before using, update these values in `fetchml-site.conf`: + +1. **Domain**: Replace `ml.example.com` with your domain +2. **SSL Certificates**: Update paths to your actual certificates +3. **Port**: Change `9102` if using a different port +4. **Grafana**: Uncomment if you want to expose it + +## SSL Certificates + +### Self-Signed (Dev/Testing) + +```bash +sudo openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout /etc/ssl/private/fetchml.key \ + -out /etc/ssl/certs/fetchml.crt \ + -subj "/CN=ml.example.com" +``` + +### Let's Encrypt (Production) + +```bash +sudo apt-get install certbot python3-certbot-nginx +sudo certbot --nginx -d ml.example.com +``` + +## Troubleshooting + +### Test Configuration +```bash +sudo nginx -t +``` + +### Check Logs +```bash +sudo tail -f /var/log/nginx/fetchml_error.log +sudo tail -f /var/log/nginx/fetchml_access.log +``` + +### Verify Proxy +```bash +curl -I https://ml.example.com/health +``` + +### Common Issues + +**"Permission denied" error**: Check that nginx user can access SSL certificates +```bash +sudo chmod 644 /etc/ssl/certs/fetchml.crt +sudo chmod 600 /etc/ssl/private/fetchml.key +``` + +**WebSocket not working**: Ensure your firewall allows the connection and backend is running +```bash +# Check backend +curl http://localhost:9102/health + +# Check firewall +sudo firewall-cmd --list-all +``` + +## Integration with Existing Nginx + +If you already have nginx running, just drop `fetchml-site.conf` into your sites directory. It won't conflict with other sites. + +The configuration is self-contained and only handles the specified `server_name`. diff --git a/nginx/fetchml-site.conf b/nginx/fetchml-site.conf new file mode 100644 index 0000000..6824ee8 --- /dev/null +++ b/nginx/fetchml-site.conf @@ -0,0 +1,100 @@ +# FetchML Nginx Site Configuration +# Drop this file into /etc/nginx/sites-available/fetchml +# Then: sudo ln -s /etc/nginx/sites-available/fetchml /etc/nginx/sites-enabled/ +# Test: sudo nginx -t +# Reload: sudo systemctl reload nginx + +server { + listen 80; + server_name ml.example.com; # CHANGE THIS to your domain + + # Redirect HTTP to HTTPS + return 301 https://$server_name$request_uri; +} + +server { + listen 443 ssl http2; + server_name ml.example.com; # CHANGE THIS to your domain + + # SSL Configuration + # CHANGE THESE paths to your actual SSL certificates + ssl_certificate /etc/ssl/certs/ml.example.com.crt; + ssl_certificate_key /etc/ssl/private/ml.example.com.key; + + # Modern SSL settings + ssl_protocols TLSv1.3 TLSv1.2; + ssl_prefer_server_ciphers on; + ssl_ciphers ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305; + ssl_session_timeout 1d; + ssl_session_cache shared:MozSSL:10m; + ssl_session_tickets off; + + # Security headers + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + + # Hide nginx version + server_tokens off; + + # Rate limiting for API + limit_req_zone $binary_remote_addr zone=fetchml_api:10m rate=10r/s; + limit_req_zone $binary_remote_addr zone=fetchml_ws:10m rate=5r/s; + + # Client limits + client_max_body_size 10M; + client_body_timeout 12s; + client_header_timeout 12s; + + # Logging + access_log /var/log/nginx/fetchml_access.log; + error_log /var/log/nginx/fetchml_error.log warn; + + # WebSocket endpoint + location /ws { + limit_req zone=fetchml_ws burst=10 nodelay; + + proxy_pass http://localhost:9102; # CHANGE PORT if needed + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # WebSocket timeouts + proxy_connect_timeout 7d; + proxy_send_timeout 7d; + proxy_read_timeout 7d; + } + + # API endpoints + location /api/ { + limit_req zone=fetchml_api burst=20 nodelay; + + proxy_pass http://localhost:9102; # CHANGE PORT if needed + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-API-Key $http_x_api_key; + } + + # Health check + location /health { + proxy_pass http://localhost:9102; # CHANGE PORT if needed + proxy_set_header Host $host; + access_log off; + } + + # Grafana (optional - only if you want to expose it) + # Uncomment if you want Grafana accessible via nginx + # location /grafana/ { + # proxy_pass http://localhost:3000/; + # proxy_set_header Host $host; + # proxy_set_header X-Real-IP $remote_addr; + # } +} diff --git a/nginx/nginx-secure.conf b/nginx/nginx-secure.conf new file mode 100644 index 0000000..00cc20b --- /dev/null +++ b/nginx/nginx-secure.conf @@ -0,0 +1,157 @@ +events { + worker_connections 1024; +} + +http { + # Security headers + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Referrer-Policy "strict-origin-when-cross-origin" always; + add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always; + + # Hide server version + server_tokens off; + + # Rate limiting + limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s; + limit_req_zone $binary_remote_addr zone=ws:10m rate=5r/s; + + # Connection limiting + limit_conn_zone $binary_remote_addr zone=conn_limit_per_ip:10m; + + # Logging + log_format security '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" ' + '$request_time $upstream_response_time'; + + access_log /var/log/nginx/security.log security; + error_log /var/log/nginx/error.log warn; + + # Redirect HTTP to HTTPS + server { + listen 80; + server_name _; + return 301 https://$host$request_uri; + } + + # HTTPS server + server { + listen 443 ssl http2; + server_name ml-experiments.example.com; + + # SSL configuration + ssl_certificate /etc/nginx/ssl/cert.pem; + ssl_certificate_key /etc/nginx/ssl/key.pem; + ssl_trusted_certificate /etc/nginx/ssl/ca.pem; + + # Modern SSL configuration + ssl_protocols TLSv1.3; + ssl_prefer_server_ciphers on; + ssl_ciphers ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305; + ssl_session_timeout 1d; + ssl_session_cache shared:SSL:50m; + ssl_session_tickets off; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # Security limits + client_max_body_size 10M; + client_body_timeout 12s; + client_header_timeout 12s; + keepalive_timeout 15s; + send_timeout 10s; + limit_conn conn_limit_per_ip 20; + + # API endpoints + location /health { + proxy_pass https://api-server:9101; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 5s; + proxy_send_timeout 10s; + proxy_read_timeout 10s; + } + + # WebSocket endpoint with special rate limiting + location /ws { + limit_req zone=ws burst=10 nodelay; + + proxy_pass https://api-server:9101; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_connect_timeout 7d; + proxy_send_timeout 7d; + proxy_read_timeout 7d; + + # WebSocket specific headers + proxy_set_header Sec-WebSocket-Key $http_sec_websocket_key; + proxy_set_header Sec-WebSocket-Protocol $http_sec_websocket_protocol; + proxy_set_header Sec-WebSocket-Version $http_sec_websocket_version; + } + + # API endpoints with rate limiting + location /api/ { + limit_req zone=api burst=20 nodelay; + + proxy_pass https://api-server:9101; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-API-Key $http_x_api_key; + proxy_connect_timeout 5s; + proxy_send_timeout 10s; + proxy_read_timeout 10s; + } + + # Deny all other locations + location / { + return 404; + } + + # Security monitoring endpoints (admin only) + location /admin/ { + # IP whitelist for admin access + allow 10.0.0.0/8; + allow 192.168.0.0/16; + allow 172.16.0.0/12; + deny all; + + proxy_pass https://api-server:9101; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Health check for load balancers + location /lb-health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + } + + # Default server to catch unknown hosts + server { + listen 443 ssl http2 default_server; + server_name _; + + ssl_certificate /etc/nginx/ssl/cert.pem; + ssl_certificate_key /etc/nginx/ssl/key.pem; + + return 444; + } +} diff --git a/nginx/setup-nginx.sh b/nginx/setup-nginx.sh new file mode 100755 index 0000000..29df6c4 --- /dev/null +++ b/nginx/setup-nginx.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# Nginx Setup Helper for FetchML +# This script helps integrate FetchML into an existing nginx setup + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SITE_CONFIG="$SCRIPT_DIR/fetchml-site.conf" + +# Colors +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +NC='\033[0m' + +echo -e "${BLUE}FetchML Nginx Setup Helper${NC}" +echo "" + +# Check if nginx is installed +if ! command -v nginx &>/dev/null; then + echo -e "${YELLOW}Nginx is not installed.${NC}" + echo "Install with:" + echo " Ubuntu/Debian: sudo apt-get install nginx" + echo " RHEL/Rocky: sudo dnf install nginx" + exit 1 +fi + +# Detect nginx config structure +if [ -d "/etc/nginx/sites-available" ]; then + # Debian/Ubuntu style + SITES_AVAILABLE="/etc/nginx/sites-available" + SITES_ENABLED="/etc/nginx/sites-enabled" + STYLE="debian" +elif [ -d "/etc/nginx/conf.d" ]; then + # RHEL/CentOS style + SITES_AVAILABLE="/etc/nginx/conf.d" + SITES_ENABLED="" + STYLE="rhel" +else + echo -e "${YELLOW}Could not detect nginx configuration directory.${NC}" + echo "Please manually copy $SITE_CONFIG to your nginx config directory." + exit 1 +fi + +echo "Detected nginx style: $STYLE" +echo "" + +# Read values +read -p "Enter your domain name (e.g., ml.example.com): " domain +read -p "Enter API server port [9102]: " port +port=${port:-9102} + +read -p "Enter SSL certificate path: " cert_path +read -p "Enter SSL key path: " key_path + +# Create temp config with substitutions +temp_config="/tmp/fetchml-site.conf" +sed -e "s|ml\.example\.com|$domain|g" \ + -e "s|localhost:9102|localhost:$port|g" \ + -e "s|/etc/ssl/certs/ml\.example\.com\.crt|$cert_path|g" \ + -e "s|/etc/ssl/private/ml\.example\.com\.key|$key_path|g" \ + "$SITE_CONFIG" > "$temp_config" + +# Install config +echo "" +echo -e "${BLUE}Installing nginx configuration...${NC}" + +if [ "$STYLE" = "debian" ]; then + sudo cp "$temp_config" "$SITES_AVAILABLE/fetchml" + sudo ln -sf "$SITES_AVAILABLE/fetchml" "$SITES_ENABLED/fetchml" + echo -e "${GREEN}✓${NC} Config installed to $SITES_AVAILABLE/fetchml" + echo -e "${GREEN}✓${NC} Symlink created in $SITES_ENABLED/" +else + sudo cp "$temp_config" "$SITES_AVAILABLE/fetchml.conf" + echo -e "${GREEN}✓${NC} Config installed to $SITES_AVAILABLE/fetchml.conf" +fi + +# Test nginx config +echo "" +echo -e "${BLUE}Testing nginx configuration...${NC}" +if sudo nginx -t; then + echo -e "${GREEN}✓${NC} Nginx configuration is valid" + + # Offer to reload + read -p "Reload nginx now? [y/N]: " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + sudo systemctl reload nginx + echo -e "${GREEN}✓${NC} Nginx reloaded" + else + echo "Reload later with: sudo systemctl reload nginx" + fi +else + echo -e "${YELLOW}!${NC} Nginx configuration test failed" + echo "Please fix the errors and run: sudo nginx -t" +fi + +# Cleanup +rm -f "$temp_config" + +echo "" +echo -e "${GREEN}Setup complete!${NC}" +echo "" +echo "Your site is configured for: https://$domain" +echo "" +echo "Next steps:" +echo " 1. Ensure your DNS points to this server" +echo " 2. Start FetchML API server on port $port" +echo " 3. Visit https://$domain/health to test"