groups: - name: security.rules rules: # High rate of failed authentication attempts - alert: HighFailedAuthRate expr: rate(failed_auth_total[5m]) > 10 for: 2m labels: severity: warning annotations: summary: "High rate of failed authentication attempts" description: "More than 10 failed auth attempts per minute for the last 2 minutes" # Potential brute force attack - alert: BruteForceAttack expr: rate(failed_auth_total[1m]) > 30 for: 1m labels: severity: critical annotations: summary: "Potential brute force attack detected" description: "More than 30 failed auth attempts per minute" # Unusual WebSocket connection patterns - alert: UnusualWebSocketActivity expr: rate(websocket_connections_total[5m]) > 100 for: 3m labels: severity: warning annotations: summary: "Unusual WebSocket connection activity" description: "WebSocket connection rate is unusually high" # Rate limit breaches - alert: RateLimitBreached expr: rate(rate_limit_exceeded_total[5m]) > 5 for: 1m labels: severity: warning annotations: summary: "Rate limits being exceeded" description: "Rate limit exceeded more than 5 times per minute" # SSL certificate expiration warning - alert: SSLCertificateExpiring expr: ssl_certificate_expiry_days < 30 for: 1h labels: severity: warning annotations: summary: "SSL certificate expiring soon" description: "SSL certificate will expire in less than 30 days" # High memory usage - alert: HighMemoryUsage expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9 for: 5m labels: severity: warning annotations: summary: "High memory usage detected" description: "Memory usage is above 90%" # High CPU usage - alert: HighCPUUsage expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80 for: 5m labels: severity: warning annotations: summary: "High CPU usage detected" description: "CPU usage is above 80%" # Disk space running low - alert: LowDiskSpace expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10 for: 5m labels: severity: critical annotations: summary: "Low disk space" description: "Disk space is below 10%" # Service down - alert: ServiceDown expr: up == 0 for: 1m labels: severity: critical annotations: summary: "Service is down" description: "{{ $labels.instance }} service has been down for more than 1 minute" # Unexpected error rates - alert: HighErrorRate expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.1 for: 2m labels: severity: warning annotations: summary: "High error rate detected" description: "Error rate is above 10%" # Suspicious IP activity - alert: SuspiciousIPActivity expr: rate(requests_by_ip[5m]) > 1000 for: 2m labels: severity: warning annotations: summary: "Suspicious IP activity" description: "IP address making unusually many requests"