- Add production setup scripts for automated deployment - Include monitoring setup and configuration validation - Add legacy setup scripts for various Linux distributions - Implement Bitwarden integration for secure credential management - Add development and production environment setup - Include comprehensive management tools and utilities - Add shell script library with common functions Provides complete automation for setup, deployment, and management of FetchML platform in development and production environments.
229 lines
6.5 KiB
Bash
Executable file
229 lines
6.5 KiB
Bash
Executable file
#!/bin/bash
|
|
# Production Setup Script for Rocky Linux (Bare Metal)
|
|
# This script sets up the complete FetchML environment on bare metal
|
|
|
|
set -e
|
|
|
|
BOLD='\033[1m'
|
|
GREEN='\033[0;32m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
echo -e "${BOLD}=== FetchML Production Setup (Rocky Linux Bare Metal) ===${NC}\n"
|
|
|
|
# Configuration
|
|
BASE_PATH="${1:-/data/ml-experiments}"
|
|
ML_USER="${2:-ml-user}"
|
|
ML_GROUP="${3:-ml-group}"
|
|
|
|
echo "Configuration:"
|
|
echo " Base path: $BASE_PATH"
|
|
echo " ML user: $ML_USER"
|
|
echo " ML group: $ML_GROUP"
|
|
echo ""
|
|
|
|
# 1. Create system user if it doesn't exist
|
|
echo -e "${BLUE}[1/8]${NC} Creating system user..."
|
|
if id "$ML_USER" &>/dev/null; then
|
|
echo " User $ML_USER already exists"
|
|
else
|
|
sudo useradd -r -s /bin/bash -m -d /home/$ML_USER -c "ML System User" $ML_USER
|
|
echo -e "${GREEN}✓${NC} Created user: $ML_USER"
|
|
fi
|
|
|
|
# 2. Create directory structure
|
|
echo -e "${BLUE}[2/8]${NC} Creating directory structure..."
|
|
sudo mkdir -p "${BASE_PATH}"/{experiments,pending,running,finished,failed,datasets}
|
|
sudo mkdir -p /var/log/fetch_ml
|
|
sudo mkdir -p /etc/fetch_ml
|
|
|
|
echo -e "${GREEN}✓${NC} Created directories:"
|
|
echo " $BASE_PATH/experiments/"
|
|
echo " $BASE_PATH/pending/"
|
|
echo " $BASE_PATH/running/"
|
|
echo " $BASE_PATH/finished/"
|
|
echo " $BASE_PATH/failed/"
|
|
echo " $BASE_PATH/datasets/"
|
|
echo " /var/log/fetch_ml/"
|
|
echo " /etc/fetch_ml/"
|
|
|
|
# 3. Set ownership and permissions
|
|
echo -e "${BLUE}[3/8]${NC} Setting permissions..."
|
|
sudo chown -R $ML_USER:$ML_GROUP $BASE_PATH
|
|
sudo chmod 755 $BASE_PATH
|
|
sudo chmod 700 $BASE_PATH/experiments # Restrict experiment data
|
|
|
|
sudo chown -R $ML_USER:$ML_GROUP /var/log/fetch_ml
|
|
sudo chmod 755 /var/log/fetch_ml
|
|
|
|
echo -e "${GREEN}✓${NC} Permissions set"
|
|
|
|
# 4. Install system dependencies (Rocky Linux)
|
|
echo -e "${BLUE}[4/8]${NC} Installing system dependencies..."
|
|
sudo dnf install -y \
|
|
golang \
|
|
podman \
|
|
redis \
|
|
git \
|
|
make \
|
|
gcc \
|
|
|| echo "Some packages may already be installed"
|
|
|
|
echo -e "${GREEN}✓${NC} Dependencies installed"
|
|
|
|
# 5. Configure Podman for GPU access (if NVIDIA GPU present)
|
|
echo -e "${BLUE}[5/8]${NC} Configuring Podman..."
|
|
if lspci | grep -i nvidia &>/dev/null; then
|
|
echo " NVIDIA GPU detected, configuring GPU access..."
|
|
|
|
# Install nvidia-container-toolkit if not present
|
|
if ! command -v nvidia-container-toolkit &>/dev/null; then
|
|
echo " Installing nvidia-container-toolkit..."
|
|
sudo dnf config-manager --add-repo \
|
|
https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
|
|
sudo dnf install -y nvidia-container-toolkit
|
|
fi
|
|
|
|
# Configure Podman CDI
|
|
sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml
|
|
echo -e "${GREEN}✓${NC} GPU support configured"
|
|
else
|
|
echo " No NVIDIA GPU detected, skipping GPU setup"
|
|
fi
|
|
|
|
# 6. Configure Redis
|
|
echo -e "${BLUE}[6/8]${NC} Configuring Redis..."
|
|
sudo systemctl enable redis
|
|
sudo systemctl start redis || echo "Redis may already be running"
|
|
|
|
# Set Redis password if not already configured
|
|
if ! sudo grep -q "^requirepass" /etc/redis/redis.conf 2>/dev/null; then
|
|
REDIS_PASSWORD=$(openssl rand -base64 32)
|
|
echo "requirepass $REDIS_PASSWORD" | sudo tee -a /etc/redis/redis.conf >/dev/null
|
|
sudo systemctl restart redis
|
|
echo " Generated Redis password: $REDIS_PASSWORD"
|
|
echo " Save this password for your configuration!"
|
|
else
|
|
echo " Redis password already configured"
|
|
fi
|
|
|
|
echo -e "${GREEN}✓${NC} Redis configured"
|
|
|
|
# 7. Setup systemd services
|
|
echo -e "${BLUE}[7/8]${NC} Creating systemd services..."
|
|
|
|
# API Server service
|
|
sudo tee /etc/systemd/system/fetchml-api.service >/dev/null <<EOF
|
|
[Unit]
|
|
Description=FetchML API Server
|
|
After=network.target redis.service
|
|
Wants=redis.service
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=$ML_USER
|
|
Group=$ML_GROUP
|
|
WorkingDirectory=/opt/fetch_ml
|
|
ExecStart=/usr/local/bin/fetchml-api -config /etc/fetch_ml/config.yaml
|
|
Restart=always
|
|
RestartSec=10
|
|
StandardOutput=append:/var/log/fetch_ml/api.log
|
|
StandardError=append:/var/log/fetch_ml/api-error.log
|
|
|
|
# Security hardening
|
|
NoNewPrivileges=true
|
|
PrivateTmp=true
|
|
ProtectSystem=strict
|
|
ProtectHome=true
|
|
ReadWritePaths=$BASE_PATH /var/log/fetch_ml
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
# Worker service
|
|
sudo tee /etc/systemd/system/fetchml-worker.service >/dev/null <<EOF
|
|
[Unit]
|
|
Description=FetchML Worker
|
|
After=network.target redis.service fetchml-api.service
|
|
Wants=redis.service
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=$ML_USER
|
|
Group=$ML_GROUP
|
|
WorkingDirectory=/opt/fetch_ml
|
|
ExecStart=/usr/local/bin/fetchml-worker -config /etc/fetch_ml/worker.toml
|
|
Restart=always
|
|
RestartSec=10
|
|
StandardOutput=append:/var/log/fetch_ml/worker.log
|
|
StandardError=append:/var/log/fetch_ml/worker-error.log
|
|
|
|
# Security hardening
|
|
NoNewPrivileges=true
|
|
PrivateTmp=true
|
|
ProtectSystem=strict
|
|
ProtectHome=true
|
|
ReadWritePaths=$BASE_PATH /var/log/fetch_ml
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
sudo systemctl daemon-reload
|
|
echo -e "${GREEN}✓${NC} Systemd services created"
|
|
|
|
# 8. Setup logrotate
|
|
echo -e "${BLUE}[8/8]${NC} Configuring log rotation..."
|
|
sudo tee /etc/logrotate.d/fetchml >/dev/null <<EOF
|
|
/var/log/fetch_ml/*.log {
|
|
daily
|
|
rotate 14
|
|
compress
|
|
delaycompress
|
|
notifempty
|
|
missingok
|
|
create 0640 $ML_USER $ML_GROUP
|
|
sharedscripts
|
|
postrotate
|
|
systemctl reload fetchml-api >/dev/null 2>&1 || true
|
|
systemctl reload fetchml-worker >/dev/null 2>&1 || true
|
|
endscript
|
|
}
|
|
EOF
|
|
|
|
echo -e "${GREEN}✓${NC} Log rotation configured"
|
|
|
|
# Summary
|
|
echo ""
|
|
echo -e "${BOLD}=== Setup Complete! ===${NC}"
|
|
echo ""
|
|
echo "Directory structure created at: $BASE_PATH"
|
|
echo "Logs will be written to: /var/log/fetch_ml/"
|
|
echo "Configuration directory: /etc/fetch_ml/"
|
|
echo ""
|
|
echo -e "${BOLD}Next steps:${NC}"
|
|
echo "1. Copy your config files:"
|
|
echo " sudo cp configs/config-prod.yaml /etc/fetch_ml/config.yaml"
|
|
echo " sudo cp configs/worker-prod.toml /etc/fetch_ml/worker.toml"
|
|
echo ""
|
|
echo "2. Build and install binaries:"
|
|
echo " make build"
|
|
echo " sudo cp bin/api-server /usr/local/bin/fetchml-api"
|
|
echo " sudo cp bin/worker /usr/local/bin/fetchml-worker"
|
|
echo ""
|
|
echo "3. Update config files with your settings (Redis password, API keys, etc.)"
|
|
echo ""
|
|
echo "4. Start services:"
|
|
echo " sudo systemctl start fetchml-api"
|
|
echo " sudo systemctl start fetchml-worker"
|
|
echo ""
|
|
echo "5. Enable services to start on boot:"
|
|
echo " sudo systemctl enable fetchml-api"
|
|
echo " sudo systemctl enable fetchml-worker"
|
|
echo ""
|
|
echo "6. Check status:"
|
|
echo " sudo systemctl status fetchml-api"
|
|
echo " sudo systemctl status fetchml-worker"
|
|
echo " sudo journalctl -u fetchml-api -f"
|
|
echo ""
|