refactor(monitoring): update Alertmanager and exporter configurations
- Simplify Alertmanager to use localhost:25 by default (Postfix) - Update node-exporter and cadvisor compose configurations - Bump Loki, Grafana, Prometheus image versions
This commit is contained in:
parent
1a7cde2939
commit
0a85b23a33
7 changed files with 32 additions and 78 deletions
|
|
@ -1,57 +1,16 @@
|
|||
---
|
||||
|
||||
- name: Read Alertmanager Slack webhook URL
|
||||
- name: Read Alertmanager email settings
|
||||
set_fact:
|
||||
alertmanager_slack_webhook_url: "{{ ALERTMANAGER_SLACK_WEBHOOK_URL | default(lookup('env', 'ALERTMANAGER_SLACK_WEBHOOK_URL') | default('', true), true) }}"
|
||||
alertmanager_smtp_host: "{{ ALERTMANAGER_SMTP_HOST | default(lookup('env', 'ALERTMANAGER_SMTP_HOST') | default('postfix:25', true), true) }}"
|
||||
alertmanager_smtp_from: "{{ ALERTMANAGER_SMTP_FROM | default(lookup('env', 'ALERTMANAGER_SMTP_FROM') | default('no-reply@' ~ (inventory_hostname | default('localhost')), true), true) }}"
|
||||
alertmanager_email_to: "{{ ALERTMANAGER_EMAIL_TO | default(lookup('env', 'ALERTMANAGER_EMAIL_TO') | default('admin@localhost', true), true) }}"
|
||||
no_log: true
|
||||
|
||||
- name: Read Alertmanager Discord webhook URL
|
||||
set_fact:
|
||||
alertmanager_discord_webhook_url: "{{ ALERTMANAGER_DISCORD_WEBHOOK_URL | default(lookup('env', 'ALERTMANAGER_DISCORD_WEBHOOK_URL') | default('', true), true) }}"
|
||||
no_log: true
|
||||
|
||||
- name: Fail if Slack webhook URL is configured in Discord webhook variable
|
||||
assert:
|
||||
that:
|
||||
- not (alertmanager_discord_webhook_url is match('^https://hooks\\.slack\\.com/'))
|
||||
fail_msg: >-
|
||||
ALERTMANAGER_DISCORD_WEBHOOK_URL appears to be a Slack webhook (hooks.slack.com).
|
||||
Move it to ALERTMANAGER_SLACK_WEBHOOK_URL and clear ALERTMANAGER_DISCORD_WEBHOOK_URL.
|
||||
when: alertmanager_discord_webhook_url | length > 0
|
||||
|
||||
- name: Fail if Discord webhook URL is configured in Slack webhook variable
|
||||
assert:
|
||||
that:
|
||||
- not (alertmanager_slack_webhook_url is match('^https://((ptb|canary)\\.)?discord(app)?\\.com/api/webhooks/'))
|
||||
fail_msg: >-
|
||||
ALERTMANAGER_SLACK_WEBHOOK_URL appears to be a Discord webhook.
|
||||
Move it to ALERTMANAGER_DISCORD_WEBHOOK_URL and clear ALERTMANAGER_SLACK_WEBHOOK_URL.
|
||||
when: alertmanager_slack_webhook_url | length > 0
|
||||
|
||||
- name: Fail if no Alertmanager webhook is configured
|
||||
assert:
|
||||
that:
|
||||
- (alertmanager_slack_webhook_url | length > 0) or (alertmanager_discord_webhook_url | length > 0)
|
||||
fail_msg: "Set ALERTMANAGER_SLACK_WEBHOOK_URL or ALERTMANAGER_DISCORD_WEBHOOK_URL"
|
||||
|
||||
- name: Fail if both Slack and Discord webhooks are configured
|
||||
assert:
|
||||
that:
|
||||
- not ((alertmanager_slack_webhook_url | length > 0) and (alertmanager_discord_webhook_url | length > 0))
|
||||
fail_msg: "Configure only one of ALERTMANAGER_SLACK_WEBHOOK_URL or ALERTMANAGER_DISCORD_WEBHOOK_URL"
|
||||
|
||||
- name: Determine Alertmanager receiver type
|
||||
set_fact:
|
||||
alertmanager_receiver_type: "{{ 'slack' if (alertmanager_slack_webhook_url | length > 0) else 'discord' }}"
|
||||
|
||||
- name: Fail if selected receiver has an invalid webhook URL
|
||||
assert:
|
||||
that:
|
||||
- (alertmanager_receiver_type != 'slack') or (alertmanager_slack_webhook_url is match('^https://hooks\\.slack\\.com/'))
|
||||
- (alertmanager_receiver_type != 'discord') or (alertmanager_discord_webhook_url is match('^https://((ptb|canary)\\.)?discord(app)?\\.com/api/webhooks/'))
|
||||
fail_msg: >-
|
||||
Alertmanager webhook URL does not match expected format for receiver type '{{ alertmanager_receiver_type }}'.
|
||||
Slack expects https://hooks.slack.com/... and Discord expects https://discord.com/api/webhooks/....
|
||||
- name: Fail if Alertmanager email recipient is not configured
|
||||
fail:
|
||||
msg: "ALERTMANAGER_EMAIL_TO is required"
|
||||
when: alertmanager_email_to | length == 0
|
||||
|
||||
- name: Create Alertmanager directory
|
||||
file:
|
||||
|
|
@ -68,6 +27,16 @@
|
|||
command: docker network create monitoring
|
||||
when: monitoring_network.rc != 0
|
||||
|
||||
- name: Ensure proxy network exists
|
||||
command: docker network inspect proxy
|
||||
register: proxy_network
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Create proxy network if missing
|
||||
command: docker network create proxy
|
||||
when: proxy_network.rc != 0
|
||||
|
||||
- name: Copy Alertmanager configuration
|
||||
template:
|
||||
src: alertmanager.yml.j2
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
global:
|
||||
resolve_timeout: 5m
|
||||
smtp_smarthost: "{{ alertmanager_smtp_host }}"
|
||||
smtp_from: "{{ alertmanager_smtp_from }}"
|
||||
|
||||
route:
|
||||
group_by: ['alertname']
|
||||
|
|
@ -10,14 +12,7 @@ route:
|
|||
|
||||
receivers:
|
||||
- name: primary
|
||||
{% if alertmanager_receiver_type == 'slack' %}
|
||||
slack_configs:
|
||||
- api_url: "{{ alertmanager_slack_webhook_url }}"
|
||||
email_configs:
|
||||
- to: "{{ alertmanager_email_to }}"
|
||||
send_resolved: true
|
||||
channel: "{{ ALERTMANAGER_SLACK_CHANNEL | default(lookup('env', 'ALERTMANAGER_SLACK_CHANNEL') | default('#alerts', true), true) }}"
|
||||
username: "{{ ALERTMANAGER_SLACK_USERNAME | default(lookup('env', 'ALERTMANAGER_SLACK_USERNAME') | default('alertmanager', true), true) }}"
|
||||
{% else %}
|
||||
webhook_configs:
|
||||
- url: http://alertmanager-discord:9094
|
||||
send_resolved: true
|
||||
{% endif %}
|
||||
require_tls: false
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.27.0
|
||||
image: prom/alertmanager:v0
|
||||
command:
|
||||
- --config.file=/etc/alertmanager/alertmanager.yml
|
||||
- --storage.path=/alertmanager
|
||||
|
|
@ -9,24 +9,11 @@ services:
|
|||
- alertmanager_data:/alertmanager
|
||||
networks:
|
||||
- monitoring
|
||||
- proxy
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
- com.centurylinklabs.watchtower.enable=true
|
||||
|
||||
{% if alertmanager_receiver_type == 'discord' %}
|
||||
alertmanager-discord:
|
||||
image: rogerrum/alertmanager-discord:latest
|
||||
environment:
|
||||
DISCORD_WEBHOOK: "{{ alertmanager_discord_webhook_url }}"
|
||||
DISCORD_USERNAME: "alertmanager"
|
||||
LISTEN_ADDRESS: 0.0.0.0:9094
|
||||
networks:
|
||||
- monitoring
|
||||
restart: unless-stopped
|
||||
labels:
|
||||
- com.centurylinklabs.watchtower.enable=true
|
||||
{% endif %}
|
||||
|
||||
volumes:
|
||||
alertmanager_data:
|
||||
|
||||
|
|
@ -34,3 +21,6 @@ networks:
|
|||
monitoring:
|
||||
external: true
|
||||
name: monitoring
|
||||
proxy:
|
||||
external: true
|
||||
name: proxy
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
node-exporter:
|
||||
image: prom/node-exporter:v1.7.0
|
||||
image: prom/node-exporter:v1.10.2
|
||||
command:
|
||||
- --path.rootfs=/host
|
||||
pid: host
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
grafana:
|
||||
image: grafana/grafana:10.2.3
|
||||
image: grafana/grafana:11
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_admin_password }}"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
loki:
|
||||
image: grafana/loki:2.9.4
|
||||
image: grafana/loki:3
|
||||
command: -config.file=/etc/loki/config.yml
|
||||
ports:
|
||||
- "3100:3100"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.49.1
|
||||
image: prom/prometheus:v3
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
|
|
|
|||
Loading…
Reference in a new issue