#!/bin/bash # Production Configuration Validator # Verifies all paths and configs are consistent for experiment lifecycle set -e BOLD='\033[1m' GREEN='\033[0;32m' RED='\033[0;31m' YELLOW='\033[1;33m' NC='\033[0m' # No Color echo -e "${BOLD}=== FetchML Production Configuration Validator ===${NC}\n" # Configuration file paths API_CONFIG="${1:-configs/config-prod.yaml}" WORKER_CONFIG="${2:-configs/worker-prod.toml}" errors=0 warnings=0 check_pass() { echo -e "${GREEN}✓${NC} $1" } check_fail() { echo -e "${RED}✗${NC} $1" ((errors++)) } check_warn() { echo -e "${YELLOW}⚠${NC} $1" ((warnings++)) } # 1. Check API server config exists echo -e "${BOLD}Checking API Server Configuration${NC}" if [ ! -f "$API_CONFIG" ]; then check_fail "API config not found: $API_CONFIG" else check_pass "API config found: $API_CONFIG" # Extract base_path from API config API_BASE_PATH=$(grep 'base_path:' "$API_CONFIG" | head -1 | awk '{print $2}' | tr -d '"') echo " Base path: $API_BASE_PATH" # Check if path is absolute if [[ "$API_BASE_PATH" != /* ]]; then check_fail "base_path must be absolute: $API_BASE_PATH" else check_pass "base_path is absolute" fi # Check Redis config if grep -q 'redis:' "$API_CONFIG"; then check_pass "Redis configuration present" else check_fail "Redis configuration missing" fi # Check auth enabled if grep -q 'enabled: true' "$API_CONFIG"; then check_pass "Authentication enabled" else check_warn "Authentication disabled (not recommended for production)" fi fi echo "" # 2. Check Worker config (if provided) if [ -f "$WORKER_CONFIG" ]; then echo -e "${BOLD}Checking Worker Configuration${NC}" check_pass "Worker config found: $WORKER_CONFIG" # Extract base_path from worker config WORKER_BASE_PATH=$(grep 'base_path' "$WORKER_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "') echo " Base path: $WORKER_BASE_PATH" # Compare paths if [ "$API_BASE_PATH" = "$WORKER_BASE_PATH" ]; then check_pass "API and Worker base_path match" else check_fail "base_path mismatch! API: $API_BASE_PATH, Worker: $WORKER_BASE_PATH" fi # Check podman_image configured if grep -q 'podman_image' "$WORKER_CONFIG"; then PODMAN_IMAGE=$(grep 'podman_image' "$WORKER_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "') check_pass "Podman image configured: $PODMAN_IMAGE" else check_fail "podman_image not configured" fi else check_warn "Worker config not found: $WORKER_CONFIG (optional for API server only)" fi echo "" # 3. Check directory structure (if base_path exists) if [ -n "$API_BASE_PATH" ] && [ -d "$API_BASE_PATH" ]; then echo -e "${BOLD}Checking Directory Structure${NC}" check_pass "Base directory exists: $API_BASE_PATH" # Check subdirectories for dir in experiments pending running finished failed; do if [ -d "$API_BASE_PATH/$dir" ]; then check_pass "$dir/ directory exists" else check_warn "$dir/ directory missing (will be created automatically)" fi done # Check permissions if [ -w "$API_BASE_PATH" ]; then check_pass "Base directory is writable" else check_fail "Base directory is not writable (check permissions)" fi elif [ -n "$API_BASE_PATH" ]; then check_warn "Base directory does not exist: $API_BASE_PATH (will need to be created)" fi echo "" # 4. Check Redis connectivity (if server is running) echo -e "${BOLD}Checking Redis Connectivity${NC}" if command -v redis-cli &> /dev/null; then if redis-cli ping &> /dev/null; then check_pass "Redis server is running and accessible" # Check queue QUEUE_SIZE=$(redis-cli llen fetchml:tasks:queue 2>/dev/null || echo "0") echo " Queue size: $QUEUE_SIZE tasks" else check_warn "Redis server not accessible (start with: redis-server)" fi else check_warn "redis-cli not installed (cannot verify Redis connectivity)" fi echo "" # 5. Check Podman (if worker config exists) if [ -f "$WORKER_CONFIG" ]; then echo -e "${BOLD}Checking Podman${NC}" if command -v podman &> /dev/null; then check_pass "Podman is installed" # Check if image exists if [ -n "$PODMAN_IMAGE" ]; then if podman image exists "$PODMAN_IMAGE" 2>/dev/null; then check_pass "Podman image exists: $PODMAN_IMAGE" else check_warn "Podman image not found: $PODMAN_IMAGE (needs to be built)" fi fi # Check GPU access (if configured) if grep -q 'gpu_access.*true' "$WORKER_CONFIG" 2>/dev/null; then if podman run --rm --device nvidia.com/gpu=all nvidia/cuda:11.8.0-base nvidia-smi &>/dev/null; then check_pass "GPU access working" else check_warn "GPU access configured but not working (check nvidia-container-toolkit)" fi fi else check_fail "Podman not installed (required for worker)" fi fi echo "" # 6. Check CLI config consistency echo -e "${BOLD}Checking CLI Configuration${NC}" CLI_CONFIG="$HOME/.ml/config.toml" if [ -f "$CLI_CONFIG" ]; then check_pass "CLI config found: $CLI_CONFIG" CLI_BASE=$(grep 'worker_base' "$CLI_CONFIG" | awk -F '=' '{print $2}' | tr -d ' "') if [ "$CLI_BASE" = "$API_BASE_PATH" ]; then check_pass "CLI worker_base matches server base_path" else check_warn "CLI worker_base ($CLI_BASE) differs from server ($API_BASE_PATH)" fi else check_warn "CLI config not found (run: ml init)" fi echo "" # Summary echo -e "${BOLD}=== Summary ===${NC}" if [ $errors -eq 0 ] && [ $warnings -eq 0 ]; then echo -e "${GREEN}All checks passed! Configuration is ready for production.${NC}" exit 0 elif [ $errors -eq 0 ]; then echo -e "${YELLOW}Configuration has $warnings warning(s). Review before deployment.${NC}" exit 0 else echo -e "${RED}Configuration has $errors error(s) and $warnings warning(s). Fix before deployment.${NC}" exit 1 fi