fetch_ml/scripts/validate-prod-config.sh
Jeremie Fraeys bb25743b0f feat: add comprehensive setup scripts and management tools
- Add production setup scripts for automated deployment
- Include monitoring setup and configuration validation
- Add legacy setup scripts for various Linux distributions
- Implement Bitwarden integration for secure credential management
- Add development and production environment setup
- Include comprehensive management tools and utilities
- Add shell script library with common functions

Provides complete automation for setup, deployment, and management
of FetchML platform in development and production environments.
2025-12-04 16:55:04 -05:00

204 lines
6.1 KiB
Bash
Executable file

#!/bin/bash
# Production Configuration Validator
# Verifies all paths and configs are consistent for experiment lifecycle
set -e
BOLD='\033[1m'
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${BOLD}=== FetchML Production Configuration Validator ===${NC}\n"
# Configuration file paths
API_CONFIG="${1:-configs/config-prod.yaml}"
WORKER_CONFIG="${2:-configs/worker-prod.toml}"
errors=0
warnings=0
check_pass() {
echo -e "${GREEN}${NC} $1"
}
check_fail() {
echo -e "${RED}${NC} $1"
((errors++))
}
check_warn() {
echo -e "${YELLOW}${NC} $1"
((warnings++))
}
# 1. Check API server config exists
echo -e "${BOLD}Checking API Server Configuration${NC}"
if [ ! -f "$API_CONFIG" ]; then
check_fail "API config not found: $API_CONFIG"
else
check_pass "API config found: $API_CONFIG"
# Extract base_path from API config
API_BASE_PATH=$(grep 'base_path:' "$API_CONFIG" | head -1 | awk '{print $2}' | tr -d '"')
echo " Base path: $API_BASE_PATH"
# Check if path is absolute
if [[ "$API_BASE_PATH" != /* ]]; then
check_fail "base_path must be absolute: $API_BASE_PATH"
else
check_pass "base_path is absolute"
fi
# Check Redis config
if grep -q 'redis:' "$API_CONFIG"; then
check_pass "Redis configuration present"
else
check_fail "Redis configuration missing"
fi
# Check auth enabled
if grep -q 'enabled: true' "$API_CONFIG"; then
check_pass "Authentication enabled"
else
check_warn "Authentication disabled (not recommended for production)"
fi
fi
echo ""
# 2. Check Worker config (if provided)
if [ -f "$WORKER_CONFIG" ]; then
echo -e "${BOLD}Checking Worker Configuration${NC}"
check_pass "Worker config found: $WORKER_CONFIG"
# Extract base_path from worker config
WORKER_BASE_PATH=$(grep 'base_path' "$WORKER_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "')
echo " Base path: $WORKER_BASE_PATH"
# Compare paths
if [ "$API_BASE_PATH" = "$WORKER_BASE_PATH" ]; then
check_pass "API and Worker base_path match"
else
check_fail "base_path mismatch! API: $API_BASE_PATH, Worker: $WORKER_BASE_PATH"
fi
# Check podman_image configured
if grep -q 'podman_image' "$WORKER_CONFIG"; then
PODMAN_IMAGE=$(grep 'podman_image' "$WORKER_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "')
check_pass "Podman image configured: $PODMAN_IMAGE"
else
check_fail "podman_image not configured"
fi
else
check_warn "Worker config not found: $WORKER_CONFIG (optional for API server only)"
fi
echo ""
# 3. Check directory structure (if base_path exists)
if [ -n "$API_BASE_PATH" ] && [ -d "$API_BASE_PATH" ]; then
echo -e "${BOLD}Checking Directory Structure${NC}"
check_pass "Base directory exists: $API_BASE_PATH"
# Check subdirectories
for dir in experiments pending running finished failed; do
if [ -d "$API_BASE_PATH/$dir" ]; then
check_pass "$dir/ directory exists"
else
check_warn "$dir/ directory missing (will be created automatically)"
fi
done
# Check permissions
if [ -w "$API_BASE_PATH" ]; then
check_pass "Base directory is writable"
else
check_fail "Base directory is not writable (check permissions)"
fi
elif [ -n "$API_BASE_PATH" ]; then
check_warn "Base directory does not exist: $API_BASE_PATH (will need to be created)"
fi
echo ""
# 4. Check Redis connectivity (if server is running)
echo -e "${BOLD}Checking Redis Connectivity${NC}"
if command -v redis-cli &> /dev/null; then
if redis-cli ping &> /dev/null; then
check_pass "Redis server is running and accessible"
# Check queue
QUEUE_SIZE=$(redis-cli llen fetchml:tasks:queue 2>/dev/null || echo "0")
echo " Queue size: $QUEUE_SIZE tasks"
else
check_warn "Redis server not accessible (start with: redis-server)"
fi
else
check_warn "redis-cli not installed (cannot verify Redis connectivity)"
fi
echo ""
# 5. Check Podman (if worker config exists)
if [ -f "$WORKER_CONFIG" ]; then
echo -e "${BOLD}Checking Podman${NC}"
if command -v podman &> /dev/null; then
check_pass "Podman is installed"
# Check if image exists
if [ -n "$PODMAN_IMAGE" ]; then
if podman image exists "$PODMAN_IMAGE" 2>/dev/null; then
check_pass "Podman image exists: $PODMAN_IMAGE"
else
check_warn "Podman image not found: $PODMAN_IMAGE (needs to be built)"
fi
fi
# Check GPU access (if configured)
if grep -q 'gpu_access.*true' "$WORKER_CONFIG" 2>/dev/null; then
if podman run --rm --device nvidia.com/gpu=all nvidia/cuda:11.8.0-base nvidia-smi &>/dev/null; then
check_pass "GPU access working"
else
check_warn "GPU access configured but not working (check nvidia-container-toolkit)"
fi
fi
else
check_fail "Podman not installed (required for worker)"
fi
fi
echo ""
# 6. Check CLI config consistency
echo -e "${BOLD}Checking CLI Configuration${NC}"
CLI_CONFIG="$HOME/.ml/config.toml"
if [ -f "$CLI_CONFIG" ]; then
check_pass "CLI config found: $CLI_CONFIG"
CLI_BASE=$(grep 'worker_base' "$CLI_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "')
if [ "$CLI_BASE" = "$API_BASE_PATH" ]; then
check_pass "CLI worker_base matches server base_path"
else
check_warn "CLI worker_base ($CLI_BASE) differs from server ($API_BASE_PATH)"
fi
else
check_warn "CLI config not found (run: ml init)"
fi
echo ""
# Summary
echo -e "${BOLD}=== Summary ===${NC}"
if [ $errors -eq 0 ] && [ $warnings -eq 0 ]; then
echo -e "${GREEN}All checks passed! Configuration is ready for production.${NC}"
exit 0
elif [ $errors -eq 0 ]; then
echo -e "${YELLOW}Configuration has $warnings warning(s). Review before deployment.${NC}"
exit 0
else
echo -e "${RED}Configuration has $errors error(s) and $warnings warning(s). Fix before deployment.${NC}"
exit 1
fi