deploy: remove obsolete deployment files
- Delete Caddyfile.smoke (merged into prod.yml smoke profile) - Remove deployments/Makefile (use root Makefile instead) - Remove deploy.sh (use scripts/deploy/deploy.sh) - Clean up nested configs/worker/ in deployments/ - Update homelab-secure and staging compose files
This commit is contained in:
parent
8a7e7695f4
commit
fd5f2ad672
8 changed files with 8 additions and 658 deletions
|
|
@ -1,23 +0,0 @@
|
|||
{
|
||||
auto_https off
|
||||
}
|
||||
|
||||
localhost {
|
||||
tls internal
|
||||
|
||||
handle /health {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle /ws* {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle /api/* {
|
||||
reverse_proxy api-server:9101
|
||||
}
|
||||
|
||||
handle {
|
||||
respond 404
|
||||
}
|
||||
}
|
||||
|
|
@ -1,202 +0,0 @@
|
|||
# Docker Compose Deployment Management
|
||||
.PHONY: help dev-up dev-down dev-logs dev-restart staging-up staging-down staging-logs staging-restart staging-status homelab-secure-up homelab-secure-down prod-up prod-down prod-logs prod-restart prod-status status clean rollback security-mode check-audit-sink health-check security-scan
|
||||
|
||||
# Default target
|
||||
help: ## Show this help message
|
||||
@echo "Available commands:"
|
||||
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-25s\033[0m %s\n", $$1, $$2}'
|
||||
|
||||
# Development environment
|
||||
dev-up: ## Start development environment
|
||||
@echo "Starting development environment..."
|
||||
docker-compose -f docker-compose.dev.yml up -d
|
||||
@echo "Services: Caddy (8080/8443), Redis (6379), Prometheus (9090), Grafana (3000)"
|
||||
|
||||
dev-down: ## Stop development environment
|
||||
@echo "Stopping development environment..."
|
||||
docker-compose -f docker-compose.dev.yml down
|
||||
|
||||
dev-logs: ## Show development logs
|
||||
docker-compose -f docker-compose.dev.yml logs -f
|
||||
|
||||
dev-restart: ## Restart development environment
|
||||
@echo "Restarting development environment..."
|
||||
docker-compose -f docker-compose.dev.yml restart
|
||||
|
||||
# Staging environment
|
||||
staging-up: ## Start staging environment
|
||||
@echo "Starting staging environment..."
|
||||
@if [ ! -f .env.staging ]; then \
|
||||
echo "Creating staging environment file..."; \
|
||||
echo "DATA_DIR=./data/staging" > .env.staging; \
|
||||
echo "LOG_LEVEL=info" >> .env.staging; \
|
||||
echo "COMPLIANCE_MODE=standard" >> .env.staging; \
|
||||
fi
|
||||
docker-compose -f docker-compose.staging.yml up -d
|
||||
@echo "Staging services: Caddy (9080/9443), Redis (6380), API (9102), MinIO (9002/9003)"
|
||||
|
||||
staging-down: ## Stop staging environment
|
||||
@echo "Stopping staging environment..."
|
||||
docker-compose -f docker-compose.staging.yml down
|
||||
|
||||
staging-logs: ## Show staging logs
|
||||
docker-compose -f docker-compose.staging.yml logs -f
|
||||
|
||||
staging-restart: ## Restart staging environment
|
||||
@echo "Restarting staging environment..."
|
||||
docker-compose -f docker-compose.staging.yml restart
|
||||
|
||||
staging-status: ## Show staging status
|
||||
docker-compose -f docker-compose.staging.yml ps
|
||||
|
||||
|
||||
# Homelab environment
|
||||
homelab-secure-up: ## Start secure homelab environment
|
||||
@echo "Starting secure homelab environment..."
|
||||
docker-compose -f docker-compose.homelab-secure.yml up -d
|
||||
|
||||
homelab-secure-down: ## Stop secure homelab environment
|
||||
@echo "Stopping secure homelab environment..."
|
||||
docker-compose -f docker-compose.homelab-secure.yml down
|
||||
|
||||
# Production environment
|
||||
prod-up: ## Start production environment
|
||||
@echo "Starting production environment..."
|
||||
@echo "⚠ WARNING: This is production! Ensure you have proper backups."
|
||||
@read -p "Continue? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
docker-compose -f docker-compose.prod.yml up -d
|
||||
|
||||
prod-down: ## Stop production environment
|
||||
@echo "Stopping production environment..."
|
||||
docker-compose -f docker-compose.prod.yml down
|
||||
|
||||
prod-logs: ## Show production logs
|
||||
docker-compose -f docker-compose.prod.yml logs -f
|
||||
|
||||
prod-restart: ## Restart production environment
|
||||
@echo "Restarting production environment..."
|
||||
@read -p "Restart production? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
docker-compose -f docker-compose.prod.yml restart
|
||||
|
||||
prod-status: ## Show production status
|
||||
docker-compose -f docker-compose.prod.yml ps
|
||||
|
||||
# Utility commands
|
||||
status: ## Show status of all environments
|
||||
@echo "=== Development Status ==="
|
||||
@if [ -f docker-compose.dev.yml ]; then \
|
||||
docker-compose -f docker-compose.dev.yml ps 2>/dev/null || echo "Not running"; \
|
||||
fi
|
||||
@echo ""
|
||||
@echo "=== Staging Status ==="
|
||||
@if [ -f docker-compose.staging.yml ]; then \
|
||||
docker-compose -f docker-compose.staging.yml ps 2>/dev/null || echo "Not running"; \
|
||||
fi
|
||||
@echo ""
|
||||
@echo "=== Homelab Secure Status ==="
|
||||
@if [ -f docker-compose.homelab-secure.yml ]; then \
|
||||
docker-compose -f docker-compose.homelab-secure.yml ps 2>/dev/null || echo "Not running"; \
|
||||
fi
|
||||
@echo ""
|
||||
@echo "=== Production Status ==="
|
||||
@if [ -f docker-compose.prod.yml ]; then \
|
||||
docker-compose -f docker-compose.prod.yml ps 2>/dev/null || echo "Not running"; \
|
||||
fi
|
||||
|
||||
clean: ## Clean up all containers and volumes
|
||||
@echo "Cleaning up all Docker resources..."
|
||||
@echo "This will remove all containers and volumes. Continue? [y/N]"
|
||||
@read -r confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
docker-compose -f docker-compose.dev.yml down -v 2>/dev/null || true
|
||||
docker-compose -f docker-compose.staging.yml down -v 2>/dev/null || true
|
||||
docker-compose -f docker-compose.homelab-secure.yml down -v 2>/dev/null || true
|
||||
docker-compose -f docker-compose.prod.yml down -v 2>/dev/null || true
|
||||
docker system prune -f
|
||||
@echo "Cleanup complete."
|
||||
|
||||
# Security mode targets
|
||||
security-mode-dev: ## Run worker in dev security mode
|
||||
@echo "Running with dev security mode (relaxed validation)..."
|
||||
COMPLIANCE_MODE=dev docker-compose -f docker-compose.dev.yml up -d worker
|
||||
|
||||
security-mode-standard: ## Run worker in standard security mode
|
||||
@echo "Running with standard security mode..."
|
||||
COMPLIANCE_MODE=standard docker-compose -f docker-compose.dev.yml up -d worker
|
||||
|
||||
security-mode-hipaa: ## Run worker in HIPAA security mode
|
||||
@echo "Running with HIPAA security mode (strict compliance)..."
|
||||
@echo "✓ Network mode: none"
|
||||
@echo "✓ Seccomp profile: default-hardened"
|
||||
@echo "✓ No new privileges: enforced"
|
||||
@echo "✓ Audit sink: required"
|
||||
@read -p "Confirm HIPAA mode deployment? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
COMPLIANCE_MODE=hipaa docker-compose -f docker-compose.dev.yml up -d worker
|
||||
|
||||
# Rollback targets
|
||||
rollback-staging: ## Rollback staging deployment
|
||||
@echo "Rolling back staging deployment..."
|
||||
@echo "⚠ This rolls back the image only - queue state and audit log are NOT rolled back"
|
||||
@read -p "Continue with rollback? [y/N] " confirm && [ "$$confirm" = "y" ] || exit 1
|
||||
docker-compose -f docker-compose.staging.yml down
|
||||
@if [ -f .staging-deployment.log ]; then \
|
||||
PREVIOUS_TAG=$$(tail -2 .staging-deployment.log | head -1 | grep -o 'tag=[^ ]*' | cut -d'=' -f2 || echo "latest"); \
|
||||
echo "Previous tag: $$PREVIOUS_TAG"; \
|
||||
docker-compose -f docker-compose.staging.yml up -d; \
|
||||
fi
|
||||
@echo "$$(date -Iseconds) | rollback | staging | actor=$$(whoami)" >> .staging-audit.log
|
||||
|
||||
rollback-prod: ## Rollback production deployment
|
||||
@echo "Rolling back production deployment..."
|
||||
@echo "⚠ CRITICAL: This rolls back the image only"
|
||||
@echo "⚠ Queue state is NOT rolled back"
|
||||
@echo "⚠ Audit log chain is NOT rolled back (must never break chain)"
|
||||
@echo "⚠ New artifacts remain in storage"
|
||||
@read -p "CONFIRM PRODUCTION ROLLBACK? [yes/N] " confirm && [ "$$confirm" = "yes" ] || exit 1
|
||||
docker-compose -f docker-compose.prod.yml down
|
||||
@if [ -f .prod-audit.log ]; then \
|
||||
PREVIOUS_SHA=$$(tail -2 .prod-audit.log | head -1 | grep -o 'sha-[a-f0-9]*' || echo "previous"); \
|
||||
echo "Rolling back to: $$PREVIOUS_SHA"; \
|
||||
docker-compose -f docker-compose.prod.yml up -d; \
|
||||
fi
|
||||
@echo "$$(date -Iseconds) | rollback | prod | actor=$$(whoami)" >> .prod-audit.log
|
||||
@echo "Rollback complete. Verify health: make prod-status"
|
||||
|
||||
check-audit-sink: ## Check audit sink reachability
|
||||
@echo "Checking audit sink..."
|
||||
@if [ -f ../scripts/check-audit-sink.sh ]; then \
|
||||
../scripts/check-audit-sink.sh --env staging; \
|
||||
else \
|
||||
echo "Audit sink check script not found"; \
|
||||
fi
|
||||
|
||||
health-check: ## Run health checks on all environments
|
||||
@echo "=== Health Checks ==="
|
||||
@echo "Development (localhost:9101):"
|
||||
@curl -fsS http://localhost:9101/health 2>/dev/null && echo "✓ Healthy" || echo "✗ Not responding"
|
||||
@echo ""
|
||||
@echo "Staging (localhost:9102):"
|
||||
@curl -fsS http://localhost:9102/health 2>/dev/null && echo "✓ Healthy" || echo "✗ Not responding"
|
||||
@echo ""
|
||||
@echo "Production (localhost:9101):"
|
||||
@curl -fsS http://localhost:9101/health 2>/dev/null && echo "✓ Healthy" || echo "✗ Not responding"
|
||||
|
||||
security-scan: ## Run security scanners locally
|
||||
@echo "Running security scanners..."
|
||||
@if command -v gosec >/dev/null 2>&1; then \
|
||||
echo "Running gosec..."; \
|
||||
cd .. && gosec ./... 2>/dev/null || echo "gosec found issues"; \
|
||||
else \
|
||||
echo "gosec not installed - skipping"; \
|
||||
fi
|
||||
@if command -v nancy >/dev/null 2>&1; then \
|
||||
echo "Running nancy..."; \
|
||||
cd .. && go list -json -deps ./... 2>/dev/null | nancy sleuth 2>/dev/null || echo "nancy found issues"; \
|
||||
else \
|
||||
echo "nancy not installed - skipping"; \
|
||||
fi
|
||||
|
||||
# Quick aliases
|
||||
up: dev-up ## Alias for dev-up
|
||||
down: dev-down ## Alias for dev-down
|
||||
logs: dev-logs ## Alias for dev-logs
|
||||
restart: dev-restart ## Alias for dev-restart
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
# Development mode worker configuration
|
||||
# Relaxed validation for fast iteration
|
||||
host: localhost
|
||||
port: 22
|
||||
user: dev-user
|
||||
base_path: /tmp/fetchml_dev
|
||||
train_script: train.py
|
||||
|
||||
# Redis configuration
|
||||
redis_url: redis://redis:6379
|
||||
|
||||
# Development mode - relaxed security
|
||||
compliance_mode: dev
|
||||
max_workers: 4
|
||||
|
||||
# Sandbox settings (relaxed for development)
|
||||
sandbox:
|
||||
network_mode: bridge
|
||||
seccomp_profile: ""
|
||||
no_new_privileges: false
|
||||
allowed_secrets: [] # All secrets allowed in dev
|
||||
|
||||
# GPU configuration
|
||||
gpu_vendor: none
|
||||
|
||||
# Artifact handling (relaxed limits)
|
||||
max_artifact_files: 10000
|
||||
max_artifact_total_bytes: 1073741824 # 1GB
|
||||
|
||||
# Provenance (disabled in dev for speed)
|
||||
provenance_best_effort: false
|
||||
|
||||
# Plugin Configuration (development mode)
|
||||
plugins:
|
||||
# Jupyter Notebook/Lab Service
|
||||
jupyter:
|
||||
enabled: true
|
||||
image: "quay.io/jupyter/base-notebook:latest"
|
||||
default_port: 8888
|
||||
mode: "lab"
|
||||
security:
|
||||
trusted_channels:
|
||||
- "conda-forge"
|
||||
- "defaults"
|
||||
blocked_packages: [] # No restrictions in dev
|
||||
require_password: false # No password for dev
|
||||
max_gpu_per_instance: 1
|
||||
max_memory_per_instance: "4Gi"
|
||||
|
||||
# vLLM Inference Service
|
||||
vllm:
|
||||
enabled: true
|
||||
image: "vllm/vllm-openai:latest"
|
||||
default_port: 8000
|
||||
model_cache: "/tmp/models" # Temp location for dev
|
||||
default_quantization: "" # No quantization for dev
|
||||
max_gpu_per_instance: 1
|
||||
max_model_len: 2048
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
# HIPAA compliance mode worker configuration
|
||||
# Strict validation, no network, PHI protection
|
||||
host: localhost
|
||||
port: 22
|
||||
user: hipaa-worker
|
||||
base_path: /var/lib/fetchml/secure
|
||||
train_script: train.py
|
||||
|
||||
# Redis configuration (must use env var for password)
|
||||
redis_url: redis://redis:6379
|
||||
redis_password: ${REDIS_PASSWORD}
|
||||
|
||||
# HIPAA mode - strict compliance
|
||||
compliance_mode: hipaa
|
||||
max_workers: 1
|
||||
|
||||
# Sandbox settings (strict isolation required by HIPAA)
|
||||
sandbox:
|
||||
# Network must be disabled for HIPAA compliance
|
||||
network_mode: none
|
||||
# Seccomp profile must be set
|
||||
seccomp_profile: default-hardened
|
||||
# No new privileges must be enforced
|
||||
no_new_privileges: true
|
||||
# Only approved secrets allowed (no PHI fields)
|
||||
allowed_secrets:
|
||||
- HF_TOKEN
|
||||
- WANDB_API_KEY
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
# PHI fields are EXPLICITLY DENIED:
|
||||
# - PATIENT_ID
|
||||
# - SSN
|
||||
# - MEDICAL_RECORD_NUMBER
|
||||
# - DIAGNOSIS_CODE
|
||||
# - DOB
|
||||
# - INSURANCE_ID
|
||||
|
||||
# GPU configuration
|
||||
gpu_vendor: none
|
||||
|
||||
# Artifact handling (strict limits for HIPAA)
|
||||
max_artifact_files: 100
|
||||
max_artifact_total_bytes: 104857600 # 100MB
|
||||
|
||||
# Provenance (strictly required for HIPAA)
|
||||
provenance_best_effort: false
|
||||
|
||||
# SSH key must use environment variable
|
||||
ssh_key: ${SSH_KEY_PATH}
|
||||
|
||||
# Config hash computation enabled (required for audit)
|
||||
# This is automatically computed by Validate()
|
||||
|
||||
# Plugin Configuration (DISABLED for HIPAA compliance)
|
||||
# Jupyter and vLLM services are disabled in HIPAA mode to ensure
|
||||
# no unauthorized network access or data processing
|
||||
plugins:
|
||||
jupyter:
|
||||
enabled: false # Disabled: HIPAA requires strict network isolation
|
||||
vllm:
|
||||
enabled: false # Disabled: External model downloads violate PHI controls
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
# Standard security mode worker configuration
|
||||
# Normal sandbox, network isolation
|
||||
host: localhost
|
||||
port: 22
|
||||
user: worker-user
|
||||
base_path: /var/lib/fetchml
|
||||
train_script: train.py
|
||||
|
||||
# Redis configuration
|
||||
redis_url: redis://redis:6379
|
||||
|
||||
# Standard mode - normal security
|
||||
compliance_mode: standard
|
||||
max_workers: 2
|
||||
|
||||
# Sandbox settings (standard isolation)
|
||||
sandbox:
|
||||
network_mode: none
|
||||
seccomp_profile: default
|
||||
no_new_privileges: true
|
||||
allowed_secrets:
|
||||
- HF_TOKEN
|
||||
- WANDB_API_KEY
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
|
||||
# GPU configuration
|
||||
gpu_vendor: none
|
||||
|
||||
# Artifact handling (reasonable limits)
|
||||
max_artifact_files: 1000
|
||||
max_artifact_total_bytes: 536870912 # 512MB
|
||||
|
||||
# Provenance (enabled)
|
||||
provenance_best_effort: true
|
||||
|
||||
# Plugin Configuration
|
||||
plugins:
|
||||
# Jupyter Notebook/Lab Service
|
||||
jupyter:
|
||||
enabled: true
|
||||
image: "quay.io/jupyter/base-notebook:latest"
|
||||
default_port: 8888
|
||||
mode: "lab"
|
||||
security:
|
||||
trusted_channels:
|
||||
- "conda-forge"
|
||||
- "defaults"
|
||||
blocked_packages:
|
||||
- "requests"
|
||||
- "urllib3"
|
||||
require_password: true
|
||||
max_gpu_per_instance: 1
|
||||
max_memory_per_instance: "8Gi"
|
||||
|
||||
# vLLM Inference Service
|
||||
vllm:
|
||||
enabled: true
|
||||
image: "vllm/vllm-openai:latest"
|
||||
default_port: 8000
|
||||
model_cache: "/models"
|
||||
default_quantization: ""
|
||||
max_gpu_per_instance: 1
|
||||
max_model_len: 4096
|
||||
|
|
@ -1,241 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Quick deployment script for fetch_ml
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
REPO_ROOT=$(cd "${SCRIPT_DIR}/.." && pwd)
|
||||
export FETCHML_REPO_ROOT="${FETCHML_REPO_ROOT:-${REPO_ROOT}}"
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Function to print colored output
|
||||
print_status() {
|
||||
echo -e "${BLUE}[INFO]${NC} $1"
|
||||
}
|
||||
|
||||
print_success() {
|
||||
echo -e "${GREEN}[SUCCESS]${NC} $1"
|
||||
}
|
||||
|
||||
print_warning() {
|
||||
echo -e "${YELLOW}[WARNING]${NC} $1"
|
||||
}
|
||||
|
||||
print_error() {
|
||||
echo -e "${RED}[ERROR]${NC} $1"
|
||||
}
|
||||
|
||||
# Function to show usage
|
||||
show_usage() {
|
||||
echo "Usage: $0 [ENVIRONMENT] [ACTION]"
|
||||
echo ""
|
||||
echo "Environments:"
|
||||
echo " dev Development environment"
|
||||
echo " staging Staging environment (pre-production)"
|
||||
echo " secure Secure homelab environment"
|
||||
echo " prod Production environment"
|
||||
echo ""
|
||||
echo "Actions:"
|
||||
echo " up Start services"
|
||||
echo " down Stop services"
|
||||
echo " restart Restart services"
|
||||
echo " logs Show logs"
|
||||
echo " status Show status"
|
||||
echo " rollback Rollback to previous deployment (image only)"
|
||||
echo " health-check Check service health and compliance mode"
|
||||
echo " check-audit-sink Verify audit sink reachability"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 dev up # Start development environment"
|
||||
echo " $0 staging up # Start staging environment"
|
||||
echo " $0 prod down # Stop production environment"
|
||||
echo " $0 staging rollback # Rollback staging deployment"
|
||||
echo " $0 prod health-check # Check production health"
|
||||
echo " $0 prod check-audit-sink # Verify audit sink before deploy"
|
||||
}
|
||||
|
||||
# Function to check if docker-compose file exists
|
||||
check_compose_file() {
|
||||
local env=$1
|
||||
local compose_file=""
|
||||
|
||||
case $env in
|
||||
"dev")
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.dev.yml"
|
||||
;;
|
||||
"staging")
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.staging.yml"
|
||||
;;
|
||||
"secure")
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.homelab-secure.yml"
|
||||
;;
|
||||
"prod")
|
||||
compose_file="${FETCHML_REPO_ROOT}/deployments/docker-compose.prod.yml"
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown environment: $env"
|
||||
show_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ ! -f "$compose_file" ]; then
|
||||
print_error "Docker Compose file not found: $compose_file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "$compose_file"
|
||||
}
|
||||
|
||||
# Function to check if .env file exists
|
||||
check_env_file() {
|
||||
local env=$1
|
||||
|
||||
if [ ! -f "${FETCHML_REPO_ROOT}/.env" ]; then
|
||||
print_warning ".env file not found. Creating from example..."
|
||||
if [ "$env" = "dev" ]; then
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
elif [ "$env" = "prod" ]; then
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.prod.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
else
|
||||
cp "${FETCHML_REPO_ROOT}/deployments/env.dev.example" "${FETCHML_REPO_ROOT}/.env"
|
||||
fi
|
||||
print_warning "Please edit .env file with your configuration"
|
||||
fi
|
||||
}
|
||||
|
||||
# Main script
|
||||
main() {
|
||||
if [ $# -ne 2 ]; then
|
||||
show_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local environment=$1
|
||||
local action=$2
|
||||
|
||||
print_status "Environment: $environment"
|
||||
print_status "Action: $action"
|
||||
|
||||
# Check compose file
|
||||
compose_file=$(check_compose_file "$environment")
|
||||
print_status "Using: $compose_file"
|
||||
|
||||
# Check .env file
|
||||
check_env_file "$environment"
|
||||
|
||||
# Execute action
|
||||
case $action in
|
||||
"up")
|
||||
print_status "Starting $environment environment..."
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" up -d
|
||||
print_success "$environment environment started successfully!"
|
||||
|
||||
# Show service URLs
|
||||
echo ""
|
||||
print_status "Service URLs:"
|
||||
echo " API Server: http://localhost:9101"
|
||||
if [ "$environment" = "dev" ]; then
|
||||
echo " Grafana: http://localhost:3000 (admin/admin123)"
|
||||
echo " Prometheus: http://localhost:9090"
|
||||
fi
|
||||
;;
|
||||
"down")
|
||||
print_status "Stopping $environment environment..."
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" down
|
||||
print_success "$environment environment stopped successfully!"
|
||||
;;
|
||||
"restart")
|
||||
print_status "Restarting $environment environment..."
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" restart
|
||||
print_success "$environment environment restarted successfully!"
|
||||
;;
|
||||
"logs")
|
||||
print_status "Showing logs for $environment environment..."
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" logs -f
|
||||
;;
|
||||
"status")
|
||||
print_status "Status of $environment environment:"
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" ps
|
||||
;;
|
||||
"rollback")
|
||||
print_warning "Rolling back $environment environment..."
|
||||
print_warning "⚠ This rolls back the image only - queue state and audit log are NOT rolled back"
|
||||
|
||||
if [ "$environment" = "prod" ]; then
|
||||
print_warning "⚠ CRITICAL: Production rollback"
|
||||
print_warning "⚠ Queue state is NOT rolled back"
|
||||
print_warning "⚠ Audit log chain is NOT rolled back (must never break chain)"
|
||||
read -p "CONFIRM PRODUCTION ROLLBACK? [yes/N] " confirm
|
||||
if [ "$confirm" != "yes" ]; then
|
||||
print_error "Rollback cancelled"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Get previous deployment info
|
||||
LOG_FILE="${FETCHML_REPO_ROOT}/deployments/.${environment}-audit.log"
|
||||
if [ -f "$LOG_FILE" ]; then
|
||||
PREVIOUS_SHA=$(tail -2 "$LOG_FILE" | head -1 | grep -o 'sha-[a-f0-9]*' || echo "")
|
||||
if [ -n "$PREVIOUS_SHA" ]; then
|
||||
print_status "Rolling back to: $PREVIOUS_SHA"
|
||||
fi
|
||||
fi
|
||||
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" down
|
||||
docker-compose --project-directory "${FETCHML_REPO_ROOT}" -f "$compose_file" up -d
|
||||
|
||||
# Write rollback entry to audit log
|
||||
echo "$(date -Iseconds) | rollback | $environment | actor=$(whoami)" >> "$LOG_FILE" 2>/dev/null || true
|
||||
|
||||
print_success "$environment rollback complete!"
|
||||
print_status "Verify health with: $0 $environment health-check"
|
||||
;;
|
||||
"health-check"|"health")
|
||||
print_status "Health check for $environment environment..."
|
||||
|
||||
# Determine port based on environment
|
||||
case $environment in
|
||||
dev) PORT=9101 ;;
|
||||
staging) PORT=9102 ;;
|
||||
prod) PORT=9101 ;;
|
||||
*) PORT=9101 ;;
|
||||
esac
|
||||
|
||||
# Check API health
|
||||
if curl -fsS "http://localhost:${PORT}/health" > /dev/null 2>&1; then
|
||||
print_success "API is healthy (port $PORT)"
|
||||
|
||||
# Check compliance_mode
|
||||
COMPLIANCE_MODE=$(curl -fsS "http://localhost:${PORT}/health" 2>/dev/null | grep -o '"compliance_mode":"[^"]*"' | cut -d'"' -f4 || echo "unknown")
|
||||
print_status "Compliance mode: $COMPLIANCE_MODE"
|
||||
else
|
||||
print_error "API health check failed (port $PORT)"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
"check-audit-sink")
|
||||
print_status "Checking audit sink for $environment..."
|
||||
|
||||
if [ -f "${FETCHML_REPO_ROOT}/scripts/check-audit-sink.sh" ]; then
|
||||
"${FETCHML_REPO_ROOT}/scripts/check-audit-sink.sh" --env "$environment"
|
||||
else
|
||||
print_warning "Audit sink check script not found"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
print_error "Unknown action: $action"
|
||||
show_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Run main function
|
||||
main "$@"
|
||||
|
|
@ -4,8 +4,8 @@
|
|||
services:
|
||||
api-server:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./build/docker/simple.Dockerfile
|
||||
context: ..
|
||||
dockerfile: build/docker/simple.Dockerfile
|
||||
container_name: ml-experiments-api
|
||||
ports:
|
||||
- "9101:9101"
|
||||
|
|
@ -15,7 +15,7 @@ services:
|
|||
- ${HOMELAB_DATA_DIR:-./data/homelab}/active:/data/active
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/logs:/logs
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/ssl:/app/ssl:ro
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/configs/api/homelab-secure.yaml:/app/configs/api/prod.yaml:ro
|
||||
- ${CONFIG_DIR:-../configs}/api/homelab-secure.yaml:/app/configs/api/prod.yaml:ro
|
||||
- ${FETCHML_REPO_ROOT:-..}/.env.secure:/app/.env.secure:ro
|
||||
depends_on:
|
||||
redis:
|
||||
|
|
@ -78,14 +78,14 @@ services:
|
|||
|
||||
worker:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./build/docker/simple.Dockerfile
|
||||
context: ..
|
||||
dockerfile: build/docker/simple.Dockerfile
|
||||
container_name: ml-experiments-worker
|
||||
volumes:
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/experiments:/app/data/experiments
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/active:/data/active
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/logs:/logs
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/configs/worker/homelab-secure.yaml:/app/configs/worker.yaml:ro
|
||||
- ${CONFIG_DIR:-../configs}/worker/homelab-secure.yaml:/app/configs/worker.yaml:ro
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
|
@ -133,7 +133,7 @@ services:
|
|||
- "127.0.0.1:6379:6379" # Bind to localhost only
|
||||
volumes:
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/redis:/data
|
||||
- ${HOMELAB_DATA_DIR:-./data/homelab}/configs/redis/redis-secure.conf:/usr/local/etc/redis/redis.conf:ro
|
||||
- ${CONFIG_DIR:-../configs}/redis/redis-secure.conf:/usr/local/etc/redis/redis.conf:ro
|
||||
restart: unless-stopped
|
||||
command: redis-server /usr/local/etc/redis/redis.conf --requirepass ${REDIS_PASSWORD}
|
||||
healthcheck:
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ services:
|
|||
- ${DATA_DIR:-./data/staging}/experiments:/data/experiments
|
||||
- ${DATA_DIR:-./data/staging}/active:/data/active
|
||||
- ${DATA_DIR:-./data/staging}/workspaces:/data/active/workspaces:delegated
|
||||
- ${DATA_DIR:-./data/staging}/configs/worker:/app/configs:ro
|
||||
- ${CONFIG_DIR:-../configs}/worker/docker-staging.yaml:/app/configs/worker.yaml:ro
|
||||
- ${DATA_DIR:-./data/staging}/ssh:/root/.ssh:ro
|
||||
depends_on:
|
||||
redis:
|
||||
|
|
|
|||
Loading…
Reference in a new issue