fetch_ml/scripts/setup-prod.sh
Jeremie Fraeys bb25743b0f feat: add comprehensive setup scripts and management tools
- Add production setup scripts for automated deployment
- Include monitoring setup and configuration validation
- Add legacy setup scripts for various Linux distributions
- Implement Bitwarden integration for secure credential management
- Add development and production environment setup
- Include comprehensive management tools and utilities
- Add shell script library with common functions

Provides complete automation for setup, deployment, and management
of FetchML platform in development and production environments.
2025-12-04 16:55:04 -05:00

229 lines
6.5 KiB
Bash
Executable file

#!/bin/bash
# Production Setup Script for Rocky Linux (Bare Metal)
# This script sets up the complete FetchML environment on bare metal
set -e
BOLD='\033[1m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BOLD}=== FetchML Production Setup (Rocky Linux Bare Metal) ===${NC}\n"
# Configuration
BASE_PATH="${1:-/data/ml-experiments}"
ML_USER="${2:-ml-user}"
ML_GROUP="${3:-ml-group}"
echo "Configuration:"
echo " Base path: $BASE_PATH"
echo " ML user: $ML_USER"
echo " ML group: $ML_GROUP"
echo ""
# 1. Create system user if it doesn't exist
echo -e "${BLUE}[1/8]${NC} Creating system user..."
if id "$ML_USER" &>/dev/null; then
echo " User $ML_USER already exists"
else
sudo useradd -r -s /bin/bash -m -d /home/$ML_USER -c "ML System User" $ML_USER
echo -e "${GREEN}${NC} Created user: $ML_USER"
fi
# 2. Create directory structure
echo -e "${BLUE}[2/8]${NC} Creating directory structure..."
sudo mkdir -p "${BASE_PATH}"/{experiments,pending,running,finished,failed,datasets}
sudo mkdir -p /var/log/fetch_ml
sudo mkdir -p /etc/fetch_ml
echo -e "${GREEN}${NC} Created directories:"
echo " $BASE_PATH/experiments/"
echo " $BASE_PATH/pending/"
echo " $BASE_PATH/running/"
echo " $BASE_PATH/finished/"
echo " $BASE_PATH/failed/"
echo " $BASE_PATH/datasets/"
echo " /var/log/fetch_ml/"
echo " /etc/fetch_ml/"
# 3. Set ownership and permissions
echo -e "${BLUE}[3/8]${NC} Setting permissions..."
sudo chown -R $ML_USER:$ML_GROUP $BASE_PATH
sudo chmod 755 $BASE_PATH
sudo chmod 700 $BASE_PATH/experiments # Restrict experiment data
sudo chown -R $ML_USER:$ML_GROUP /var/log/fetch_ml
sudo chmod 755 /var/log/fetch_ml
echo -e "${GREEN}${NC} Permissions set"
# 4. Install system dependencies (Rocky Linux)
echo -e "${BLUE}[4/8]${NC} Installing system dependencies..."
sudo dnf install -y \
golang \
podman \
redis \
git \
make \
gcc \
|| echo "Some packages may already be installed"
echo -e "${GREEN}${NC} Dependencies installed"
# 5. Configure Podman for GPU access (if NVIDIA GPU present)
echo -e "${BLUE}[5/8]${NC} Configuring Podman..."
if lspci | grep -i nvidia &>/dev/null; then
echo " NVIDIA GPU detected, configuring GPU access..."
# Install nvidia-container-toolkit if not present
if ! command -v nvidia-container-toolkit &>/dev/null; then
echo " Installing nvidia-container-toolkit..."
sudo dnf config-manager --add-repo \
https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
sudo dnf install -y nvidia-container-toolkit
fi
# Configure Podman CDI
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
echo -e "${GREEN}${NC} GPU support configured"
else
echo " No NVIDIA GPU detected, skipping GPU setup"
fi
# 6. Configure Redis
echo -e "${BLUE}[6/8]${NC} Configuring Redis..."
sudo systemctl enable redis
sudo systemctl start redis || echo "Redis may already be running"
# Set Redis password if not already configured
if ! sudo grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then
REDIS_PASSWORD=$(openssl rand -base64 32)
echo "requirepass $REDIS_PASSWORD" | sudo tee -a /etc/redis/redis.conf >/dev/null
sudo systemctl restart redis
echo " Generated Redis password: $REDIS_PASSWORD"
echo " Save this password for your configuration!"
else
echo " Redis password already configured"
fi
echo -e "${GREEN}${NC} Redis configured"
# 7. Setup systemd services
echo -e "${BLUE}[7/8]${NC} Creating systemd services..."
# API Server service
sudo tee /etc/systemd/system/fetchml-api.service >/dev/null <<EOF
[Unit]
Description=FetchML API Server
After=network.target redis.service
Wants=redis.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
WorkingDirectory=/opt/fetch_ml
ExecStart=/usr/local/bin/fetchml-api -config /etc/fetch_ml/config.yaml
Restart=always
RestartSec=10
StandardOutput=append:/var/log/fetch_ml/api.log
StandardError=append:/var/log/fetch_ml/api-error.log
# Security hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=$BASE_PATH /var/log/fetch_ml
[Install]
WantedBy=multi-user.target
EOF
# Worker service
sudo tee /etc/systemd/system/fetchml-worker.service >/dev/null <<EOF
[Unit]
Description=FetchML Worker
After=network.target redis.service fetchml-api.service
Wants=redis.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
WorkingDirectory=/opt/fetch_ml
ExecStart=/usr/local/bin/fetchml-worker -config /etc/fetch_ml/worker.toml
Restart=always
RestartSec=10
StandardOutput=append:/var/log/fetch_ml/worker.log
StandardError=append:/var/log/fetch_ml/worker-error.log
# Security hardening
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=$BASE_PATH /var/log/fetch_ml
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
echo -e "${GREEN}${NC} Systemd services created"
# 8. Setup logrotate
echo -e "${BLUE}[8/8]${NC} Configuring log rotation..."
sudo tee /etc/logrotate.d/fetchml >/dev/null <<EOF
/var/log/fetch_ml/*.log {
daily
rotate 14
compress
delaycompress
notifempty
missingok
create 0640 $ML_USER $ML_GROUP
sharedscripts
postrotate
systemctl reload fetchml-api >/dev/null 2>&1 || true
systemctl reload fetchml-worker >/dev/null 2>&1 || true
endscript
}
EOF
echo -e "${GREEN}${NC} Log rotation configured"
# Summary
echo ""
echo -e "${BOLD}=== Setup Complete! ===${NC}"
echo ""
echo "Directory structure created at: $BASE_PATH"
echo "Logs will be written to: /var/log/fetch_ml/"
echo "Configuration directory: /etc/fetch_ml/"
echo ""
echo -e "${BOLD}Next steps:${NC}"
echo "1. Copy your config files:"
echo " sudo cp configs/config-prod.yaml /etc/fetch_ml/config.yaml"
echo " sudo cp configs/worker-prod.toml /etc/fetch_ml/worker.toml"
echo ""
echo "2. Build and install binaries:"
echo " make build"
echo " sudo cp bin/api-server /usr/local/bin/fetchml-api"
echo " sudo cp bin/worker /usr/local/bin/fetchml-worker"
echo ""
echo "3. Update config files with your settings (Redis password, API keys, etc.)"
echo ""
echo "4. Start services:"
echo " sudo systemctl start fetchml-api"
echo " sudo systemctl start fetchml-worker"
echo ""
echo "5. Enable services to start on boot:"
echo " sudo systemctl enable fetchml-api"
echo " sudo systemctl enable fetchml-worker"
echo ""
echo "6. Check status:"
echo " sudo systemctl status fetchml-api"
echo " sudo systemctl status fetchml-worker"
echo " sudo journalctl -u fetchml-api -f"
echo ""