From dc722848c509f3d3d838cd98770b2f125c4569af Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Fri, 6 Mar 2026 14:27:29 -0500 Subject: [PATCH] feat(backups): add backup verification automation - Add systemd service and timer for backup verification - Add backup-verify.sh script for integrity checks - Schedule periodic verification of backup archives Implements: Automated backup integrity validation --- .../templates/backup-verify.service.j2 | 10 +++ roles/backups/templates/backup-verify.sh.j2 | 81 +++++++++++++++++++ .../backups/templates/backup-verify.timer.j2 | 9 +++ 3 files changed, 100 insertions(+) create mode 100644 roles/backups/templates/backup-verify.service.j2 create mode 100644 roles/backups/templates/backup-verify.sh.j2 create mode 100644 roles/backups/templates/backup-verify.timer.j2 diff --git a/roles/backups/templates/backup-verify.service.j2 b/roles/backups/templates/backup-verify.service.j2 new file mode 100644 index 0000000..8ffa375 --- /dev/null +++ b/roles/backups/templates/backup-verify.service.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Backup Restore Verification +After=network.target + +[Service] +Type=oneshot +ExecStart=/usr/local/sbin/backup-verify +StandardOutput=journal +StandardError=journal +SyslogIdentifier=backup-verify diff --git a/roles/backups/templates/backup-verify.sh.j2 b/roles/backups/templates/backup-verify.sh.j2 new file mode 100644 index 0000000..3c761b4 --- /dev/null +++ b/roles/backups/templates/backup-verify.sh.j2 @@ -0,0 +1,81 @@ +#!/bin/bash +set -euo pipefail + +# Backup Verification Script +# Performs dry-run restore to verify backup integrity + +export RESTIC_REPOSITORY="{{ restic_repository }}" +export RESTIC_PASSWORD="{{ restic_password }}" +export AWS_ACCESS_KEY_ID="{{ restic_aws_access_key_id }}" +export AWS_SECRET_ACCESS_KEY="{{ restic_aws_secret_access_key }}" +export AWS_DEFAULT_REGION="{{ restic_aws_default_region }}" +export AWS_EC2_METADATA_DISABLED="true" + +REPORT_DIR="/var/log/backup-verification" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +REPORT_FILE="$REPORT_DIR/verify-$TIMESTAMP.log" + +mkdir -p "$REPORT_DIR" + +echo "Starting backup verification at $(date)" | tee "$REPORT_FILE" +echo "Repository: $RESTIC_REPOSITORY" | tee -a "$REPORT_FILE" +echo "" | tee -a "$REPORT_FILE" + +# Check repository health +echo "=== Checking repository health ===" | tee -a "$REPORT_FILE" +if ! restic check --read-data-subset=10% >> "$REPORT_FILE" 2>&1; then + echo "ERROR: Repository check failed" | tee -a "$REPORT_FILE" + exit 1 +fi +echo "Repository check passed" | tee -a "$REPORT_FILE" +echo "" | tee -a "$REPORT_FILE" + +# List recent snapshots +echo "=== Recent snapshots ===" | tee -a "$REPORT_FILE" +restic snapshots --latest 5 >> "$REPORT_FILE" 2>&1 +echo "" | tee -a "$REPORT_FILE" + +# Get latest snapshot for test restore +LATEST_SNAPSHOT=$(restic snapshots --json 2>/dev/null | jq -r '.[-1].short_id' 2>/dev/null || echo "") + +if [[ -z "$LATEST_SNAPSHOT" ]]; then + echo "ERROR: No snapshots found" | tee -a "$REPORT_FILE" + exit 1 +fi + +echo "Testing restore from snapshot: $LATEST_SNAPSHOT" | tee -a "$REPORT_FILE" + +# Create temp directory for test restore +RESTORE_TEST_DIR=$(mktemp -d) +trap "rm -rf $RESTORE_TEST_DIR" EXIT + +# Perform test restore of a small subset (config files only) +echo "=== Performing test restore ===" | tee -a "$REPORT_FILE" +if restic restore "$LATEST_SNAPSHOT" --target "$RESTORE_TEST_DIR" --include "/opt/traefik/dynamic" 2>> "$REPORT_FILE"; then + echo "Test restore completed successfully" | tee -a "$REPORT_FILE" + + # Verify restored files exist + if [[ -d "$RESTORE_TEST_DIR/opt/traefik/dynamic" ]]; then + echo "Restored files verified" | tee -a "$REPORT_FILE" + else + echo "WARNING: Expected files not found in restore" | tee -a "$REPORT_FILE" + fi +else + echo "ERROR: Test restore failed" | tee -a "$REPORT_FILE" + exit 1 +fi + +echo "" | tee -a "$REPORT_FILE" +echo "=== Verification Summary ===" | tee -a "$REPORT_FILE" +echo "Repository: Healthy" | tee -a "$REPORT_FILE" +echo "Latest snapshot: $LATEST_SNAPSHOT" | tee -a "$REPORT_FILE" +echo "Test restore: SUCCESS" | tee -a "$REPORT_FILE" +echo "Completed at: $(date)" | tee -a "$REPORT_FILE" + +# Cleanup old reports (keep last 12 months) +find "$REPORT_DIR" -name "verify-*.log" -mtime +365 -delete 2>/dev/null || true + +echo "" | tee -a "$REPORT_FILE" +echo "Verification complete. Report: $REPORT_FILE" | tee -a "$REPORT_FILE" + +exit 0 diff --git a/roles/backups/templates/backup-verify.timer.j2 b/roles/backups/templates/backup-verify.timer.j2 new file mode 100644 index 0000000..80405f7 --- /dev/null +++ b/roles/backups/templates/backup-verify.timer.j2 @@ -0,0 +1,9 @@ +[Unit] +Description=Monthly Backup Verification + +[Timer] +OnCalendar=monthly +Persistent=true + +[Install] +WantedBy=timers.target