name: Deploy to Production on: workflow_dispatch: inputs: deploy_tag: description: 'Image tag to deploy (default: staging)' required: false default: 'staging' confirm_hipaa: description: 'Confirm HIPAA compliance verification (required for HIPAA mode)' required: false default: 'false' concurrency: group: deploy-prod-${{ gitea.workflow }}-${{ gitea.ref }} cancel-in-progress: false permissions: contents: read actions: read env: DEPLOY_ENV: prod COMPOSE_FILE: deployments/docker-compose.prod.yml jobs: manual-approval: name: Manual Approval Gate runs-on: self-hosted timeout-minutes: 1 steps: - name: Verify manual trigger run: | echo "=== Production Deployment Approval ===" echo "This deployment requires manual approval." echo "Triggered by: ${{ gitea.actor }}" echo "Deploy tag: ${{ gitea.event.inputs.deploy_tag || 'latest' }}" echo "" echo "Please verify:" echo " ✓ Staging deployment was successful" echo " ✓ Smoke tests passed in staging" echo " ✓ SLSA provenance is verified" echo " ✓ HIPAA config signature is valid (if HIPAA mode)" echo "" echo "If all checks pass, this deployment will proceed." pre-deployment-gates: name: Pre-Deployment Gates runs-on: self-hosted needs: manual-approval timeout-minutes: 15 steps: - name: Checkout code uses: actions/checkout@v4 - name: Verify SLSA provenance run: | echo "=== Verifying SLSA provenance ===" # In production, verify the provenance file # For now, this is a placeholder echo "Provenance verification (placeholder)" echo "In production, this would:" echo " - Download provenance artifact from build workflow" echo " - Verify signature and chain" echo " - Confirm build source and materials" # Example verification with slsa-verifier: # slsa-verifier verify-artifact fetchml-worker \ # --provenance-path fetchml-worker.intoto.jsonl \ # --source-uri forgejo.example.com/jfraeysd/fetch_ml \ # --source-tag ${{ gitea.sha }} - name: Verify HIPAA config signature run: | echo "=== Verifying HIPAA config signature ===" # Check if we're deploying in HIPAA mode if [ -f "deployments/configs/worker/docker-prod.yaml" ]; then if grep -q "compliance_mode.*hipaa" deployments/configs/worker/docker-prod.yaml; then echo "HIPAA mode detected - signature verification REQUIRED" # Check if signature file exists if [ -f "deployments/configs/worker/docker-hipaa.yaml.sig" ]; then echo "✓ HIPAA config signature file exists" # Verify signature with cosign if command -v cosign &> /dev/null && [ -n "${{ secrets.COSIGN_PUBLIC_KEY }}" ]; then cosign verify-blob \ --key ${{ secrets.COSIGN_PUBLIC_KEY }} \ --signature deployments/configs/worker/docker-hipaa.yaml.sig \ deployments/configs/worker/docker-hipaa.yaml || { echo "✗ HIPAA config signature verification FAILED" exit 1 } echo "✓ HIPAA config signature verified" else echo "⚠ cosign or COSIGN_PUBLIC_KEY not available" echo "Manual verification required - confirm with: ${{ gitea.event.inputs.confirm_hipaa }}" if [ "${{ gitea.event.inputs.confirm_hipaa }}" != "true" ]; then echo "✗ HIPAA mode deployment requires explicit confirmation" exit 1 fi fi else echo "✗ HIPAA config signature file NOT FOUND" echo "Deployment BLOCKED - HIPAA mode requires signed config" exit 1 fi else echo "Not in HIPAA mode - skipping signature verification" fi fi - name: Check audit sink reachability run: | echo "=== Checking audit sink reachability ===" # Check if audit sink check script exists if [ -f "scripts/check-audit-sink.sh" ]; then chmod +x scripts/check-audit-sink.sh ./scripts/check-audit-sink.sh --env prod --timeout 10s || { echo "✗ Audit sink check FAILED" echo "Deployment BLOCKED - audit sink must be reachable" exit 1 } echo "✓ Audit sink is reachable" else echo "⚠ Audit sink check script not found" echo "This is a WARNING - audit logging may be unavailable" fi - name: Verify image digest run: | echo "=== Verifying image digest ===" DEPLOY_TAG="${{ gitea.event.inputs.deploy_tag || 'latest' }}" echo "Deploy tag: $DEPLOY_TAG" # In production, verify the image digest # This ensures we're deploying the exact image that was built and tested echo "Image digest verification (placeholder)" echo "Expected digest: (from build artifacts)" echo "Actual digest: (would be fetched from registry)" # Example: # EXPECTED_DIGEST=$(cat .forgejo/artifacts/image-digest.txt) # ACTUAL_DIGEST=$(docker inspect --format='{{index .RepoDigests 0}}' fetchml-worker:$DEPLOY_TAG) # [ "$EXPECTED_DIGEST" = "$ACTUAL_DIGEST" ] || exit 1 deploy: name: Deploy to Production runs-on: self-hosted needs: pre-deployment-gates timeout-minutes: 30 steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up environment run: | DEPLOY_TAG="${{ gitea.event.inputs.deploy_tag || 'latest' }}" echo "DEPLOY_ENV=${{ env.DEPLOY_ENV }}" echo "COMPOSE_FILE=${{ env.COMPOSE_FILE }}" echo "DEPLOY_TAG=$DEPLOY_TAG" # Ensure environment file exists if [ ! -f "deployments/.env.prod" ]; then echo "Creating production environment file..." cat > deployments/.env.prod << 'EOF' DATA_DIR=./data/prod LOG_LEVEL=warn COMPLIANCE_MODE=standard EOF fi - name: Deploy to production run: | echo "=== Deploying to production environment ===" DEPLOY_TAG="${{ gitea.event.inputs.deploy_tag || 'latest' }}" # Change to deployments directory cd deployments # Source the environment file set -a source .env.prod set +a # Record current deployment for potential rollback docker compose -f docker-compose.prod.yml ps > .prod-previous-state.txt 2>/dev/null || true # Pull specified image tag echo "Pulling image tag: $DEPLOY_TAG" docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:$DEPLOY_TAG || { echo "⚠ Image pull failed - may need to build locally or use different tag" } # Deploy the stack docker compose -f docker-compose.prod.yml up -d echo "✓ Production deployment initiated" - name: Post-deployment health check run: | echo "=== Running post-deployment health checks ===" # Wait for services to start sleep 15 # Check if services are running cd deployments docker compose -f docker-compose.prod.yml ps # Check health endpoints with retries MAX_RETRIES=5 RETRY_DELAY=10 for i in $(seq 1 $MAX_RETRIES); do echo "Health check attempt $i/$MAX_RETRIES..." if curl -fsS http://localhost:9101/health > /dev/null 2>&1; then echo "✓ API health check passed" break fi if [ $i -eq $MAX_RETRIES ]; then echo "✗ API health check failed after $MAX_RETRIES attempts" exit 1 fi echo "Retrying in ${RETRY_DELAY}s..." sleep $RETRY_DELAY done # Check compliance_mode echo "Checking compliance_mode..." COMPLIANCE_MODE=$(curl -fsS http://localhost:9101/health 2>/dev/null | grep -o '"compliance_mode":"[^"]*"' | cut -d'"' -f4 || echo "unknown") echo "Compliance mode reported: $COMPLIANCE_MODE" # Verify it matches expected EXPECTED_MODE=$(grep "compliance_mode" deployments/configs/worker/docker-prod.yaml 2>/dev/null | head -1 | sed 's/.*: *//' || echo "standard") if [ "$COMPLIANCE_MODE" = "$EXPECTED_MODE" ]; then echo "✓ compliance_mode matches expected: $EXPECTED_MODE" else echo "⚠ compliance_mode mismatch: expected $EXPECTED_MODE, got $COMPLIANCE_MODE" # Don't fail here - log for monitoring fi - name: Run smoke tests run: | echo "=== Running production smoke tests ===" # Wait for services to be fully ready sleep 20 # Basic connectivity test curl -fsS http://localhost:9101/health && echo "✓ API is responding" # Check Redis docker exec ml-prod-redis redis-cli ping && echo "✓ Redis is responding" # Check worker (if running) if docker ps | grep -q ml-prod-worker; then echo "✓ Worker container is running" fi echo "✓ Production smoke tests passed" - name: Send deployment notification if: always() run: | echo "=== Deployment Notification ===" if [ "${{ job.status }}" = "success" ]; then echo "✓ Production deployment ${{ gitea.run_id }} SUCCESSFUL" echo "Deployed by: ${{ gitea.actor }}" echo "Tag: ${{ gitea.event.inputs.deploy_tag || 'latest' }}" echo "SHA: ${{ gitea.sha }}" else echo "✗ Production deployment ${{ gitea.run_id }} FAILED" echo "Deployed by: ${{ gitea.actor }}" echo "Check logs for details" fi # In production, integrate with notification system: # - Slack webhook # - Email notification # - PagerDuty (for failures) - name: Write audit log if: always() run: | echo "=== Writing Audit Log Entry ===" AUDIT_LOG="deployments/.prod-audit.log" TIMESTAMP=$(date -Iseconds) STATUS="${{ job.status }}" RUN_ID="${{ gitea.run_id }}" ACTOR="${{ gitea.actor }}" echo "$TIMESTAMP | deployment | $STATUS | run_id=$RUN_ID | actor=$ACTOR | tag=${{ gitea.event.inputs.deploy_tag || 'latest' }}" >> "$AUDIT_LOG" echo "✓ Audit log entry written" - name: Rollback on failure if: failure() run: | echo "=== Production deployment failed ===" echo "Rollback procedure:" echo "1. Identify previous working image tag from .prod-audit.log" echo "2. Run: cd deployments && docker compose -f docker-compose.prod.yml down" echo "3. Deploy previous tag: docker compose -f docker-compose.prod.yml up -d" echo "4. Verify health endpoints" echo "" echo "Note: Audit log chain is NOT rolled back - chain integrity preserved" echo "Note: Redis queue state is NOT rolled back - may need manual cleanup" exit 1