refactor(monitoring): update Alertmanager and exporter configurations

- Simplify Alertmanager to use localhost:25 by default (Postfix)
- Update node-exporter and cadvisor compose configurations
- Bump Loki, Grafana, Prometheus image versions
This commit is contained in:
Jeremie Fraeys 2026-03-06 10:31:52 -05:00
parent 1a7cde2939
commit 0a85b23a33
No known key found for this signature in database
7 changed files with 32 additions and 78 deletions

View file

@ -1,57 +1,16 @@
---
- name: Read Alertmanager Slack webhook URL
- name: Read Alertmanager email settings
set_fact:
alertmanager_slack_webhook_url: "{{ ALERTMANAGER_SLACK_WEBHOOK_URL | default(lookup('env', 'ALERTMANAGER_SLACK_WEBHOOK_URL') | default('', true), true) }}"
alertmanager_smtp_host: "{{ ALERTMANAGER_SMTP_HOST | default(lookup('env', 'ALERTMANAGER_SMTP_HOST') | default('postfix:25', true), true) }}"
alertmanager_smtp_from: "{{ ALERTMANAGER_SMTP_FROM | default(lookup('env', 'ALERTMANAGER_SMTP_FROM') | default('no-reply@' ~ (inventory_hostname | default('localhost')), true), true) }}"
alertmanager_email_to: "{{ ALERTMANAGER_EMAIL_TO | default(lookup('env', 'ALERTMANAGER_EMAIL_TO') | default('admin@localhost', true), true) }}"
no_log: true
- name: Read Alertmanager Discord webhook URL
set_fact:
alertmanager_discord_webhook_url: "{{ ALERTMANAGER_DISCORD_WEBHOOK_URL | default(lookup('env', 'ALERTMANAGER_DISCORD_WEBHOOK_URL') | default('', true), true) }}"
no_log: true
- name: Fail if Slack webhook URL is configured in Discord webhook variable
assert:
that:
- not (alertmanager_discord_webhook_url is match('^https://hooks\\.slack\\.com/'))
fail_msg: >-
ALERTMANAGER_DISCORD_WEBHOOK_URL appears to be a Slack webhook (hooks.slack.com).
Move it to ALERTMANAGER_SLACK_WEBHOOK_URL and clear ALERTMANAGER_DISCORD_WEBHOOK_URL.
when: alertmanager_discord_webhook_url | length > 0
- name: Fail if Discord webhook URL is configured in Slack webhook variable
assert:
that:
- not (alertmanager_slack_webhook_url is match('^https://((ptb|canary)\\.)?discord(app)?\\.com/api/webhooks/'))
fail_msg: >-
ALERTMANAGER_SLACK_WEBHOOK_URL appears to be a Discord webhook.
Move it to ALERTMANAGER_DISCORD_WEBHOOK_URL and clear ALERTMANAGER_SLACK_WEBHOOK_URL.
when: alertmanager_slack_webhook_url | length > 0
- name: Fail if no Alertmanager webhook is configured
assert:
that:
- (alertmanager_slack_webhook_url | length > 0) or (alertmanager_discord_webhook_url | length > 0)
fail_msg: "Set ALERTMANAGER_SLACK_WEBHOOK_URL or ALERTMANAGER_DISCORD_WEBHOOK_URL"
- name: Fail if both Slack and Discord webhooks are configured
assert:
that:
- not ((alertmanager_slack_webhook_url | length > 0) and (alertmanager_discord_webhook_url | length > 0))
fail_msg: "Configure only one of ALERTMANAGER_SLACK_WEBHOOK_URL or ALERTMANAGER_DISCORD_WEBHOOK_URL"
- name: Determine Alertmanager receiver type
set_fact:
alertmanager_receiver_type: "{{ 'slack' if (alertmanager_slack_webhook_url | length > 0) else 'discord' }}"
- name: Fail if selected receiver has an invalid webhook URL
assert:
that:
- (alertmanager_receiver_type != 'slack') or (alertmanager_slack_webhook_url is match('^https://hooks\\.slack\\.com/'))
- (alertmanager_receiver_type != 'discord') or (alertmanager_discord_webhook_url is match('^https://((ptb|canary)\\.)?discord(app)?\\.com/api/webhooks/'))
fail_msg: >-
Alertmanager webhook URL does not match expected format for receiver type '{{ alertmanager_receiver_type }}'.
Slack expects https://hooks.slack.com/... and Discord expects https://discord.com/api/webhooks/....
- name: Fail if Alertmanager email recipient is not configured
fail:
msg: "ALERTMANAGER_EMAIL_TO is required"
when: alertmanager_email_to | length == 0
- name: Create Alertmanager directory
file:
@ -68,6 +27,16 @@
command: docker network create monitoring
when: monitoring_network.rc != 0
- name: Ensure proxy network exists
command: docker network inspect proxy
register: proxy_network
changed_when: false
failed_when: false
- name: Create proxy network if missing
command: docker network create proxy
when: proxy_network.rc != 0
- name: Copy Alertmanager configuration
template:
src: alertmanager.yml.j2

View file

@ -1,5 +1,7 @@
global:
resolve_timeout: 5m
smtp_smarthost: "{{ alertmanager_smtp_host }}"
smtp_from: "{{ alertmanager_smtp_from }}"
route:
group_by: ['alertname']
@ -10,14 +12,7 @@ route:
receivers:
- name: primary
{% if alertmanager_receiver_type == 'slack' %}
slack_configs:
- api_url: "{{ alertmanager_slack_webhook_url }}"
email_configs:
- to: "{{ alertmanager_email_to }}"
send_resolved: true
channel: "{{ ALERTMANAGER_SLACK_CHANNEL | default(lookup('env', 'ALERTMANAGER_SLACK_CHANNEL') | default('#alerts', true), true) }}"
username: "{{ ALERTMANAGER_SLACK_USERNAME | default(lookup('env', 'ALERTMANAGER_SLACK_USERNAME') | default('alertmanager', true), true) }}"
{% else %}
webhook_configs:
- url: http://alertmanager-discord:9094
send_resolved: true
{% endif %}
require_tls: false

View file

@ -1,6 +1,6 @@
services:
alertmanager:
image: prom/alertmanager:v0.27.0
image: prom/alertmanager:v0
command:
- --config.file=/etc/alertmanager/alertmanager.yml
- --storage.path=/alertmanager
@ -9,24 +9,11 @@ services:
- alertmanager_data:/alertmanager
networks:
- monitoring
- proxy
restart: unless-stopped
labels:
- com.centurylinklabs.watchtower.enable=true
{% if alertmanager_receiver_type == 'discord' %}
alertmanager-discord:
image: rogerrum/alertmanager-discord:latest
environment:
DISCORD_WEBHOOK: "{{ alertmanager_discord_webhook_url }}"
DISCORD_USERNAME: "alertmanager"
LISTEN_ADDRESS: 0.0.0.0:9094
networks:
- monitoring
restart: unless-stopped
labels:
- com.centurylinklabs.watchtower.enable=true
{% endif %}
volumes:
alertmanager_data:
@ -34,3 +21,6 @@ networks:
monitoring:
external: true
name: monitoring
proxy:
external: true
name: proxy

View file

@ -1,6 +1,6 @@
services:
node-exporter:
image: prom/node-exporter:v1.7.0
image: prom/node-exporter:v1.10.2
command:
- --path.rootfs=/host
pid: host

View file

@ -1,6 +1,6 @@
services:
grafana:
image: grafana/grafana:10.2.3
image: grafana/grafana:11
environment:
GF_SECURITY_ADMIN_USER: admin
GF_SECURITY_ADMIN_PASSWORD: "{{ grafana_admin_password }}"

View file

@ -1,6 +1,6 @@
services:
loki:
image: grafana/loki:2.9.4
image: grafana/loki:3
command: -config.file=/etc/loki/config.yml
ports:
- "3100:3100"

View file

@ -1,6 +1,6 @@
services:
prometheus:
image: prom/prometheus:v2.49.1
image: prom/prometheus:v3
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus