fetch_ml/scripts/setup-monitoring-prod.sh
Jeremie Fraeys bb25743b0f feat: add comprehensive setup scripts and management tools
- Add production setup scripts for automated deployment
- Include monitoring setup and configuration validation
- Add legacy setup scripts for various Linux distributions
- Implement Bitwarden integration for secure credential management
- Add development and production environment setup
- Include comprehensive management tools and utilities
- Add shell script library with common functions

Provides complete automation for setup, deployment, and management
of FetchML platform in development and production environments.
2025-12-04 16:55:04 -05:00

275 lines
8 KiB
Bash
Executable file

#!/bin/bash
# Production Monitoring Stack Setup for Linux
# Deploys Prometheus/Grafana/Loki/Promtail as Podman containers with systemd
# Compatible with: Rocky/RHEL/CentOS, Ubuntu/Debian, Arch, SUSE, etc.
set -e
BOLD='\033[1m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[0;33m'
NC='\033[0m'
echo -e "${BOLD}=== FetchML Monitoring Stack Setup (Linux) ===${NC}\n"
# Detect Linux distribution and package manager
detect_distro() {
if [ -f /etc/os-release ]; then
. /etc/os-release
DISTRO=$ID
DISTRO_VERSION=$VERSION_ID
elif [ -f /etc/redhat-release ]; then
DISTRO="rhel"
else
DISTRO="unknown"
fi
# Detect package manager
if command -v dnf &>/dev/null; then
PKG_MANAGER="dnf"
elif command -v yum &>/dev/null; then
PKG_MANAGER="yum"
elif command -v apt-get &>/dev/null; then
PKG_MANAGER="apt"
elif command -v pacman &>/dev/null; then
PKG_MANAGER="pacman"
elif command -v zypper &>/dev/null; then
PKG_MANAGER="zypper"
else
echo -e "${YELLOW}Warning: No known package manager found${NC}"
PKG_MANAGER="unknown"
fi
echo "Detected distribution: $DISTRO (using $PKG_MANAGER)"
}
detect_distro
# Configuration
DATA_PATH="${1:-/data/monitoring}"
ML_USER="${2:-ml-user}"
ML_GROUP="${3:-ml-group}"
echo "Configuration:"
echo " Monitoring data path: $DATA_PATH"
echo " User: $ML_USER"
echo " Group: $ML_GROUP"
echo ""
# Create pod for monitoring stack
POD_NAME="monitoring"
# 1. Create directories
echo -e "${BLUE}[1/6]${NC} Creating directory structure..."
sudo mkdir -p "${DATA_PATH}"/{prometheus,grafana,loki,promtail-config}
sudo mkdir -p /etc/fetch_ml/monitoring
sudo mkdir -p /var/lib/grafana/dashboards
sudo chown -R $ML_USER:$ML_GROUP $DATA_PATH
sudo chmod 755 $DATA_PATH
echo -e "${GREEN}${NC} Directories created"
# 2. Copy configuration files
echo -e "${BLUE}[2/6]${NC} Copying configuration files..."
sudo cp monitoring/prometheus.yml /etc/fetch_ml/monitoring/
sudo cp monitoring/loki-config.yml /etc/fetch_ml/monitoring/
sudo cp monitoring/promtail-config.yml /etc/fetch_ml/monitoring/
sudo cp monitoring/grafana/provisioning /etc/fetch_ml/monitoring/ -r
sudo cp monitoring/grafana-dashboard.json /var/lib/grafana/dashboards/ml-queue.json
sudo cp monitoring/logs-dashboard.json /var/lib/grafana/dashboards/logs.json
sudo chown -R $ML_USER:$ML_GROUP /etc/fetch_ml/monitoring
sudo chown -R $ML_USER:$ML_GROUP /var/lib/grafana
echo -e "${GREEN}${NC} Configuration copied"
# 3. Create Podman pod
echo -e "${BLUE}[3/6]${NC} Creating Podman pod..."
sudo -u $ML_USER podman pod create \\
--name $POD_NAME \\
-p 3000:3000 \\
-p 9090:9090 \\
-p 3100:3100 \\
|| echo "Pod may already exist"
echo -e "${GREEN}${NC} Pod created"
# 4. Create systemd service for monitoring pod
echo -e "${BLUE}[4/6]${NC} Creating systemd services..."
# Prometheus service
sudo tee /etc/systemd/system/prometheus.service >/dev/null <<EOF
[Unit]
Description=Prometheus Monitoring
After=network.target
PartOf=$POD_NAME-pod.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
Restart=always
RestartSec=10
ExecStartPre=/usr/bin/podman pod exists $POD_NAME || /usr/bin/podman pod create --name $POD_NAME -p 9090:9090
ExecStart=/usr/bin/podman run --rm --name prometheus \\
--pod $POD_NAME \\
-v /etc/fetch_ml/monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro \\
-v ${DATA_PATH}/prometheus:/prometheus \\
docker.io/prom/prometheus:latest \\
--config.file=/etc/prometheus/prometheus.yml \\
--storage.tsdb.path=/prometheus \\
--web.enable-lifecycle
ExecStop=/usr/bin/podman stop -t 10 prometheus
[Install]
WantedBy=multi-user.target
EOF
# Loki service
sudo tee /etc/systemd/system/loki.service >/dev/null <<EOF
[Unit]
Description=Loki Log Aggregation
After=network.target
PartOf=$POD_NAME-pod.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
Restart=always
RestartSec=10
ExecStartPre=/usr/bin/podman pod exists $POD_NAME || /usr/bin/podman pod create --name $POD_NAME -p 3100:3100
ExecStart=/usr/bin/podman run --rm --name loki \\
--pod $POD_NAME \\
-v /etc/fetch_ml/monitoring/loki-config.yml:/etc/loki/local-config.yaml:ro \\
-v ${DATA_PATH}/loki:/loki \\
docker.io/grafana/loki:latest \\
-config.file=/etc/loki/local-config.yaml
ExecStop=/usr/bin/podman stop -t 10 loki
[Install]
WantedBy=multi-user.target
EOF
# Grafana service
sudo tee /etc/systemd/system/grafana.service >/dev/null <<EOF
[Unit]
Description=Grafana Visualization
After=network.target prometheus.service loki.service
PartOf=$POD_NAME-pod.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
Restart=always
RestartSec=10
ExecStartPre=/usr/bin/podman pod exists $POD_NAME || /usr/bin/podman pod create --name $POD_NAME -p 3000:3000
ExecStart=/usr/bin/podman run --rm --name grafana \\
--pod $POD_NAME \\
-v ${DATA_PATH}/grafana:/var/lib/grafana \\
-v /etc/fetch_ml/monitoring/grafana/provisioning:/etc/grafana/provisioning:ro \\
-v /var/lib/grafana/dashboards:/var/lib/grafana/dashboards:ro \\
-e GF_SECURITY_ADMIN_PASSWORD=\${GRAFANA_ADMIN_PASSWORD:-$(openssl rand -base64 32)} \\
-e GF_USERS_ALLOW_SIGN_UP=false \\
-e GF_AUTH_ANONYMOUS_ENABLED=false \\
docker.io/grafana/grafana:latest
ExecStop=/usr/bin/podman stop -t 10 grafana
[Install]
WantedBy=multi-user.target
EOF
# Promtail service
sudo tee /etc/systemd/system/promtail.service >/dev/null <<EOF
[Unit]
Description=Promtail Log Collector
After=network.target loki.service
PartOf=$POD_NAME-pod.service
[Service]
Type=simple
User=$ML_USER
Group=$ML_GROUP
Restart=always
RestartSec=10
ExecStartPre=/usr/bin/podman pod exists $POD_NAME || /usr/bin/podman pod create --name $POD_NAME
ExecStart=/usr/bin/podman run --rm --name promtail \\
--pod $POD_NAME \\
-v /etc/fetch_ml/monitoring/promtail-config.yml:/etc/promtail/config.yml:ro \\
-v /var/log/fetch_ml:/var/log/app:ro \\
docker.io/grafana/promtail:latest \\
-config.file=/etc/promtail/config.yml
ExecStop=/usr/bin/podman stop -t 10 promtail
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
echo -e "${GREEN}${NC} Systemd services created"
# 5. Create monitoring pod service
echo -e "${BLUE}[5/6]${NC} Creating pod management service..."
sudo -u $ML_USER podman generate systemd --new --name $POD_NAME \\
| sudo tee /etc/systemd/system/$POD_NAME-pod.service >/dev/null
sudo systemctl daemon-reload
echo -e "${GREEN}${NC} Pod service created"
# 6. Setup firewall rules
echo -e "${BLUE}[6/6]${NC} Configuring firewall..."
if command -v firewall-cmd &>/dev/null; then
# RHEL/Rocky/Fedora (firewalld)
sudo firewall-cmd --permanent --add-port=3000/tcp # Grafana
sudo firewall-cmd --permanent --add-port=9090/tcp # Prometheus
sudo firewall-cmd --reload
echo -e "${GREEN}${NC} Firewall configured (firewalld)"
elif command -v ufw &>/dev/null; then
# Ubuntu/Debian (ufw)
sudo ufw allow 3000/tcp comment 'Grafana'
sudo ufw allow 9090/tcp comment 'Prometheus'
echo -e "${GREEN}${NC} Firewall configured (ufw)"
else
echo -e "${YELLOW}!${NC} No firewall detected. You may need to manually open ports 3000 and 9090"
fi
# Summary
echo ""
echo -e "${BOLD}=== Monitoring Stack Setup Complete! ===${NC}"
echo ""
echo "Services created:"
echo " - prometheus.service (Metrics collection)"
echo " - loki.service (Log aggregation)"
echo " - grafana.service (Visualization)"
echo " - promtail.service (Log shipping)"
echo ""
echo -e "${BOLD}Next steps:${NC}"
echo "1. Start services:"
echo " sudo systemctl start prometheus"
echo " sudo systemctl start loki"
echo " sudo systemctl start promtail"
echo " sudo systemctl start grafana"
echo ""
echo "2. Enable on boot:"
echo " sudo systemctl enable prometheus loki promtail grafana"
echo ""
echo "3. Access Grafana:"
echo " http://YOUR_SERVER_IP:3000"
echo " Username: admin"
echo " Password: admin (change on first login)"
echo ""
echo "4. Check logs:"
echo " sudo journalctl -u prometheus -f"
echo " sudo journalctl -u grafana -f"
echo ""