fetch_ml/scripts/legacy/setup_ubuntu.sh
Jeremie Fraeys bb25743b0f feat: add comprehensive setup scripts and management tools
- Add production setup scripts for automated deployment
- Include monitoring setup and configuration validation
- Add legacy setup scripts for various Linux distributions
- Implement Bitwarden integration for secure credential management
- Add development and production environment setup
- Include comprehensive management tools and utilities
- Add shell script library with common functions

Provides complete automation for setup, deployment, and management
of FetchML platform in development and production environments.
2025-12-04 16:55:04 -05:00

294 lines
8.7 KiB
Bash
Executable file

#!/usr/bin/env bash
# Fetch ML Ubuntu Server Setup Script
# Optimized for ML experiments on Ubuntu 20.04/22.04
set -euo pipefail
# shellcheck source=scripts/setup_common.sh
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
source "$SCRIPT_DIR/setup_common.sh"
check_root() {
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root"
exit 1
fi
}
check_ubuntu() {
if ! command -v apt-get &> /dev/null; then
log_error "This script is designed for Ubuntu systems"
exit 1
fi
local ubuntu_version=$(lsb_release -rs)
log_info "Ubuntu version: $ubuntu_version"
if (( $(echo "$ubuntu_version < 20.04" | bc -l) == 1 )); then
log_warning "Ubuntu version < 20.04 may not support all features"
fi
}
update_system() {
log_info "Updating system packages..."
apt-get update -y
apt-get upgrade -y
apt-get install -y curl wget gnupg lsb-release software-properties-common
}
install_go() {
log_info "Installing Go 1.25..."
if command -v go &> /dev/null; then
local go_version=$(go version | awk '{print $3}' | sed 's/go//')
log_info "Go already installed: $go_version"
return
fi
cd /tmp
TMP_FILES="/tmp/go1.25.0.linux-amd64.tar.gz"
secure_download "https://go.dev/dl/go1.25.0.linux-amd64.tar.gz" "b5b98c784d53115553848114fd3c74e565643b4e4c8e8db0c3bea3478fd8c345" "/tmp/go1.25.0.linux-amd64.tar.gz"
tar -C /usr/local -xzf go1.25.0.linux-amd64.tar.gz
# Add to PATH
echo 'export PATH=$PATH:/usr/local/go/bin' >> /etc/profile
echo 'export PATH=$PATH:$HOME/go/bin' >> /etc/profile
export PATH=$PATH:/usr/local/go/bin
log_success "Go 1.25 installed"
}
install_podman() {
log_info "Installing Podman..."
if command -v podman &> /dev/null; then
log_info "Podman already installed"
return
fi
# Add official Podman repository
echo "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_$(lsb_release -rs)/ /" | tee /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list
curl -L "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_$(lsb_release -rs)/Release.key" | apt-key add -
apt-get update -y
apt-get install -y podman podman-compose
# Configure Podman for rootless operation
echo "user_namespace_enable = 1" >> /etc/containers/containers.conf
echo "runtime = \"crun\"" >> /etc/containers/containers.conf
log_success "Podman installed"
}
install_redis() {
log_info "Installing Redis..."
if command -v redis-server &> /dev/null; then
log_info "Redis already installed"
return
fi
apt-get install -y redis-server
# Configure Redis for production
sed -i 's/supervised no/supervised systemd/' /etc/redis/redis.conf
sed -i 's/bind 127.0.0.1 ::1/bind 127.0.0.1/' /etc/redis/redis.conf
systemctl enable redis-server
systemctl start redis-server
log_success "Redis installed and configured"
}
install_nvidia_drivers() {
log_info "Checking for NVIDIA GPU..."
if command -v nvidia-smi &> /dev/null; then
log_info "NVIDIA drivers already installed"
nvidia-smi
return
fi
if lspci | grep -i nvidia &> /dev/null; then
log_info "NVIDIA GPU detected, installing drivers..."
# Add NVIDIA repository
TMP_FILES="/tmp/cuda-keyring_1.1-1_all.deb"
secure_download "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu$(lsb_release -rs | cut -d. -f1)/x86_64/cuda-keyring_1.1-1_all.deb" "cfa6b4109e7e3d9be060a016b7dc07e8edcd5356c0eabcc0c537a76e6c603d76" "/tmp/cuda-keyring_1.1-1_all.deb"
dpkg -i /tmp/cuda-keyring_1.1-1_all.deb
apt-get update -y
# Install drivers
apt-get install -y nvidia-driver-535 nvidia-cuda-toolkit
# Configure Podman for NVIDIA (only if needed)
if ! podman run --rm --device nvidia.com/gpu=all alpine echo "NVIDIA GPU access configured" 2>/dev/null; then
log_warning "NVIDIA GPU access test failed, you may need to reboot"
else
log_success "NVIDIA drivers installed and GPU access verified"
fi
else
log_info "No NVIDIA GPU detected, skipping driver installation"
fi
}
install_ml_tools() {
log_info "Installing ML tools and dependencies..."
# Python and ML packages
apt-get install -y python3 python3-pip python3-venv
# System dependencies for ML
apt-get install -y build-essential cmake git pkg-config
apt-get install -y libjpeg-dev libpng-dev libtiff-dev
apt-get install -y libavcodec-dev libavformat-dev libswscale-dev
apt-get install -y libgtk2.0-dev libcanberra-gtk-module
apt-get install -y libxvidcore-dev libx264-dev
apt-get install -y libatlas-base-dev gfortran
# Install common ML libraries
pip3 install --upgrade pip
pip3 install numpy scipy scikit-learn pandas
pip3 install jupyter matplotlib seaborn
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
log_success "ML tools installed"
}
create_user() {
log_info "Creating fetchml user..."
ensure_user
create_directories
log_success "User $FETCH_ML_USER and directories created"
}
setup_firewall() {
log_info "Configuring firewall..."
if command -v ufw &> /dev/null; then
ufw --force enable
ufw allow ssh
ufw allow 8080/tcp # Worker API
ufw allow 8081/tcp # Data manager API
ufw allow 6379/tcp # Redis
ufw status
else
log_warning "UFW not available, skipping firewall configuration"
fi
}
setup_systemd_services() {
log_info "Setting up systemd services..."
setup_systemd_service "fetch_ml_worker" "$FETCH_ML_HOME/bin/worker --config $FETCH_ML_HOME/configs/config-local.yaml"
setup_systemd_service "fetch_ml_data_manager" "$FETCH_ML_HOME/bin/data_manager --config $FETCH_ML_HOME/configs/config-local.yaml"
# Enable services
systemctl daemon-reload
systemctl enable fetch_ml_worker
systemctl enable fetch_ml_data_manager
log_success "Systemd services configured"
}
setup_log_rotation() {
log_info "Setting up log rotation..."
setup_logrotate
log_success "Log rotation configured"
}
optimize_system() {
log_info "Optimizing system for ML workloads..."
hardening_steps
# Optimize kernel parameters for ML
cat >> /etc/sysctl.conf << EOF
# ML Optimization
net.core.rmem_max = 134217728
net.core.wmem_max = 134217728
vm.swappiness = 10
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
EOF
sysctl -p
# Configure GPU persistence mode if NVIDIA available
if command -v nvidia-smi &> /dev/null; then
nvidia-smi -pm 1 || log_warning "Could not enable GPU persistence mode"
fi
log_success "System optimized for ML workloads"
}
install_fetch_ml() {
log_info "Installing Fetch ML..."
# Clone or copy Fetch ML
cd $FETCH_ML_HOME
if [[ ! -d "fetch_ml" ]]; then
# This would be replaced with actual repository URL
log_warning "Please clone Fetch ML repository manually to $FETCH_ML_HOME/fetch_ml"
log_info "Example: git clone https://github.com/your-org/fetch_ml.git"
return
fi
cd fetch_ml
# Build
export PATH=$PATH:/usr/local/go/bin
make build
# Copy binaries
cp bin/* $FETCH_ML_HOME/bin/
chmod +x $FETCH_ML_HOME/bin/*
# Copy configs
mkdir -p $FETCH_ML_HOME/configs
cp configs/config-local.yaml.example $FETCH_ML_HOME/configs/config-local.yaml
# Set permissions
chown -R $FETCH_ML_USER:$FETCH_ML_USER $FETCH_ML_HOME
log_success "Fetch ML installed"
}
main() {
log_info "Starting Fetch ML Ubuntu server setup..."
check_root
check_ubuntu
update_system
install_go
install_podman
install_redis
install_nvidia_drivers
install_ml_tools
ensure_user
create_directories
setup_firewall
setup_systemd_services
setup_logrotate
hardening_steps
install_fetch_ml
log_success "Fetch ML setup complete!"
echo
log_info "Next steps:"
echo "1. Clone Fetch ML repository: git clone https://github.com/your-org/fetch_ml.git $FETCH_ML_HOME/fetch_ml"
echo "2. Configure: $FETCH_ML_HOME/configs/config-local.yaml"
echo "3. Start services: systemctl start fetch_ml_worker fetch_ml_data_manager"
echo "4. Check status: systemctl status fetch_ml_worker fetch_ml_data_manager"
echo "5. View logs: journalctl -u fetch_ml_worker -f"
echo
log_info "Services will be available at:"
echo "- Worker API: http://$(hostname -I | awk '{print $1}'):8080"
echo "- Data Manager: http://$(hostname -I | awk '{print $1}'):8081"
}
# Run main function
main "$@"