#!/bin/bash # Production Setup Script for Rocky Linux (Bare Metal) # This script sets up the complete FetchML environment on bare metal set -e BOLD='\033[1m' GREEN='\033[0;32m' BLUE='\033[0;34m' NC='\033[0m' echo -e "${BOLD}=== FetchML Production Setup (Rocky Linux Bare Metal) ===${NC}\n" # Configuration BASE_PATH="${1:-/data/ml-experiments}" ML_USER="${2:-ml-user}" ML_GROUP="${3:-ml-group}" echo "Configuration:" echo " Base path: $BASE_PATH" echo " ML user: $ML_USER" echo " ML group: $ML_GROUP" echo "" # 1. Create system user if it doesn't exist echo -e "${BLUE}[1/8]${NC} Creating system user..." if id "$ML_USER" &>/dev/null; then echo " User $ML_USER already exists" else sudo useradd -r -s /bin/bash -m -d /home/$ML_USER -c "ML System User" $ML_USER echo -e "${GREEN}✓${NC} Created user: $ML_USER" fi # 2. Create directory structure echo -e "${BLUE}[2/8]${NC} Creating directory structure..." sudo mkdir -p "${BASE_PATH}"/{experiments,pending,running,finished,failed,datasets} sudo mkdir -p /var/log/fetch_ml sudo mkdir -p /etc/fetch_ml echo -e "${GREEN}✓${NC} Created directories:" echo " $BASE_PATH/experiments/" echo " $BASE_PATH/pending/" echo " $BASE_PATH/running/" echo " $BASE_PATH/finished/" echo " $BASE_PATH/failed/" echo " $BASE_PATH/datasets/" echo " /var/log/fetch_ml/" echo " /etc/fetch_ml/" # 3. Set ownership and permissions echo -e "${BLUE}[3/8]${NC} Setting permissions..." sudo chown -R $ML_USER:$ML_GROUP $BASE_PATH sudo chmod 755 $BASE_PATH sudo chmod 700 $BASE_PATH/experiments # Restrict experiment data sudo chown -R $ML_USER:$ML_GROUP /var/log/fetch_ml sudo chmod 755 /var/log/fetch_ml echo -e "${GREEN}✓${NC} Permissions set" # 4. Install system dependencies (Rocky Linux) echo -e "${BLUE}[4/8]${NC} Installing system dependencies..." sudo dnf install -y \ golang \ podman \ redis \ git \ make \ gcc \ || echo "Some packages may already be installed" echo -e "${GREEN}✓${NC} Dependencies installed" # 5. Configure Podman for GPU access (if NVIDIA GPU present) echo -e "${BLUE}[5/8]${NC} Configuring Podman..." if lspci | grep -i nvidia &>/dev/null; then echo " NVIDIA GPU detected, configuring GPU access..." # Install nvidia-container-toolkit if not present if ! command -v nvidia-container-toolkit &>/dev/null; then echo " Installing nvidia-container-toolkit..." sudo dnf config-manager --add-repo \ https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo sudo dnf install -y nvidia-container-toolkit fi # Configure Podman CDI sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml echo -e "${GREEN}✓${NC} GPU support configured" else echo " No NVIDIA GPU detected, skipping GPU setup" fi # 6. Configure Redis echo -e "${BLUE}[6/8]${NC} Configuring Redis..." sudo systemctl enable redis sudo systemctl start redis || echo "Redis may already be running" # Set Redis password if not already configured if ! sudo grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then REDIS_PASSWORD=$(openssl rand -base64 32) echo "requirepass $REDIS_PASSWORD" | sudo tee -a /etc/redis/redis.conf >/dev/null sudo systemctl restart redis echo " Generated Redis password: $REDIS_PASSWORD" echo " Save this password for your configuration!" else echo " Redis password already configured" fi echo -e "${GREEN}✓${NC} Redis configured" # 7. Setup systemd services echo -e "${BLUE}[7/8]${NC} Creating systemd services..." # API Server service sudo tee /etc/systemd/system/fetchml-api.service >/dev/null </dev/null </dev/null </dev/null 2>&1 || true systemctl reload fetchml-worker >/dev/null 2>&1 || true endscript } EOF echo -e "${GREEN}✓${NC} Log rotation configured" # Summary echo "" echo -e "${BOLD}=== Setup Complete! ===${NC}" echo "" echo "Directory structure created at: $BASE_PATH" echo "Logs will be written to: /var/log/fetch_ml/" echo "Configuration directory: /etc/fetch_ml/" echo "" echo -e "${BOLD}Next steps:${NC}" echo "1. Copy your config files:" echo " sudo cp configs/config-prod.yaml /etc/fetch_ml/config.yaml" echo " sudo cp configs/worker-prod.toml /etc/fetch_ml/worker.toml" echo "" echo "2. Build and install binaries:" echo " make build" echo " sudo cp bin/api-server /usr/local/bin/fetchml-api" echo " sudo cp bin/worker /usr/local/bin/fetchml-worker" echo "" echo "3. Update config files with your settings (Redis password, API keys, etc.)" echo "" echo "4. Start services:" echo " sudo systemctl start fetchml-api" echo " sudo systemctl start fetchml-worker" echo "" echo "5. Enable services to start on boot:" echo " sudo systemctl enable fetchml-api" echo " sudo systemctl enable fetchml-worker" echo "" echo "6. Check status:" echo " sudo systemctl status fetchml-api" echo " sudo systemctl status fetchml-worker" echo " sudo journalctl -u fetchml-api -f" echo ""