feat(backups): add backup verification automation
- Add systemd service and timer for backup verification - Add backup-verify.sh script for integrity checks - Schedule periodic verification of backup archives Implements: Automated backup integrity validation
This commit is contained in:
parent
0eb8c1b139
commit
dc722848c5
3 changed files with 100 additions and 0 deletions
10
roles/backups/templates/backup-verify.service.j2
Normal file
10
roles/backups/templates/backup-verify.service.j2
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[Unit]
|
||||
Description=Backup Restore Verification
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/sbin/backup-verify
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=backup-verify
|
||||
81
roles/backups/templates/backup-verify.sh.j2
Normal file
81
roles/backups/templates/backup-verify.sh.j2
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
# Backup Verification Script
|
||||
# Performs dry-run restore to verify backup integrity
|
||||
|
||||
export RESTIC_REPOSITORY="{{ restic_repository }}"
|
||||
export RESTIC_PASSWORD="{{ restic_password }}"
|
||||
export AWS_ACCESS_KEY_ID="{{ restic_aws_access_key_id }}"
|
||||
export AWS_SECRET_ACCESS_KEY="{{ restic_aws_secret_access_key }}"
|
||||
export AWS_DEFAULT_REGION="{{ restic_aws_default_region }}"
|
||||
export AWS_EC2_METADATA_DISABLED="true"
|
||||
|
||||
REPORT_DIR="/var/log/backup-verification"
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
REPORT_FILE="$REPORT_DIR/verify-$TIMESTAMP.log"
|
||||
|
||||
mkdir -p "$REPORT_DIR"
|
||||
|
||||
echo "Starting backup verification at $(date)" | tee "$REPORT_FILE"
|
||||
echo "Repository: $RESTIC_REPOSITORY" | tee -a "$REPORT_FILE"
|
||||
echo "" | tee -a "$REPORT_FILE"
|
||||
|
||||
# Check repository health
|
||||
echo "=== Checking repository health ===" | tee -a "$REPORT_FILE"
|
||||
if ! restic check --read-data-subset=10% >> "$REPORT_FILE" 2>&1; then
|
||||
echo "ERROR: Repository check failed" | tee -a "$REPORT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
echo "Repository check passed" | tee -a "$REPORT_FILE"
|
||||
echo "" | tee -a "$REPORT_FILE"
|
||||
|
||||
# List recent snapshots
|
||||
echo "=== Recent snapshots ===" | tee -a "$REPORT_FILE"
|
||||
restic snapshots --latest 5 >> "$REPORT_FILE" 2>&1
|
||||
echo "" | tee -a "$REPORT_FILE"
|
||||
|
||||
# Get latest snapshot for test restore
|
||||
LATEST_SNAPSHOT=$(restic snapshots --json 2>/dev/null | jq -r '.[-1].short_id' 2>/dev/null || echo "")
|
||||
|
||||
if [[ -z "$LATEST_SNAPSHOT" ]]; then
|
||||
echo "ERROR: No snapshots found" | tee -a "$REPORT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Testing restore from snapshot: $LATEST_SNAPSHOT" | tee -a "$REPORT_FILE"
|
||||
|
||||
# Create temp directory for test restore
|
||||
RESTORE_TEST_DIR=$(mktemp -d)
|
||||
trap "rm -rf $RESTORE_TEST_DIR" EXIT
|
||||
|
||||
# Perform test restore of a small subset (config files only)
|
||||
echo "=== Performing test restore ===" | tee -a "$REPORT_FILE"
|
||||
if restic restore "$LATEST_SNAPSHOT" --target "$RESTORE_TEST_DIR" --include "/opt/traefik/dynamic" 2>> "$REPORT_FILE"; then
|
||||
echo "Test restore completed successfully" | tee -a "$REPORT_FILE"
|
||||
|
||||
# Verify restored files exist
|
||||
if [[ -d "$RESTORE_TEST_DIR/opt/traefik/dynamic" ]]; then
|
||||
echo "Restored files verified" | tee -a "$REPORT_FILE"
|
||||
else
|
||||
echo "WARNING: Expected files not found in restore" | tee -a "$REPORT_FILE"
|
||||
fi
|
||||
else
|
||||
echo "ERROR: Test restore failed" | tee -a "$REPORT_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "" | tee -a "$REPORT_FILE"
|
||||
echo "=== Verification Summary ===" | tee -a "$REPORT_FILE"
|
||||
echo "Repository: Healthy" | tee -a "$REPORT_FILE"
|
||||
echo "Latest snapshot: $LATEST_SNAPSHOT" | tee -a "$REPORT_FILE"
|
||||
echo "Test restore: SUCCESS" | tee -a "$REPORT_FILE"
|
||||
echo "Completed at: $(date)" | tee -a "$REPORT_FILE"
|
||||
|
||||
# Cleanup old reports (keep last 12 months)
|
||||
find "$REPORT_DIR" -name "verify-*.log" -mtime +365 -delete 2>/dev/null || true
|
||||
|
||||
echo "" | tee -a "$REPORT_FILE"
|
||||
echo "Verification complete. Report: $REPORT_FILE" | tee -a "$REPORT_FILE"
|
||||
|
||||
exit 0
|
||||
9
roles/backups/templates/backup-verify.timer.j2
Normal file
9
roles/backups/templates/backup-verify.timer.j2
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
[Unit]
|
||||
Description=Monthly Backup Verification
|
||||
|
||||
[Timer]
|
||||
OnCalendar=monthly
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
Loading…
Reference in a new issue