fix(traefik): add Docker provider and file provider fallback for service discovery

- Add vault vars include with traefik tag for CF_DNS_API_TOKEN availability
- Add Docker provider socket and API version to home compose
- Add Forgejo router to file provider as fallback (Docker provider broken due to API version mismatch)
- Fixes 404 errors on git.jfraeys.com when Docker provider fails
This commit is contained in:
Jeremie Fraeys 2026-03-06 10:31:05 -05:00
parent dd1f9df69b
commit 6bf29f90e6
No known key found for this signature in database
4 changed files with 107 additions and 77 deletions

View file

@ -4,6 +4,7 @@ remote_user=ansible
host_key_checking=True
roles_path=roles
interpreter_python=/usr/bin/python3
vault_password_file = secrets/.vault_pass
[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o ControlPath=~/.ansible/cp/ansible-ssh-%%h-%%p-%%r -o StrictHostKeyChecking=accept-new -o IdentitiesOnly=yes

View file

@ -6,7 +6,7 @@
include_vars:
file: "{{ playbook_dir }}/../secrets/vault.yml"
when: (lookup('ansible.builtin.fileglob', playbook_dir ~ '/../secrets/vault.yml', wantlist=True) | length) > 0
tags: [vault, backups, forgejo]
tags: [vault, backups, forgejo, traefik, alertmanager, lldap, authelia, postfix]
- name: Ensure minimal required directories exist
file:
@ -34,57 +34,62 @@
tags: [exporters]
- role: alertmanager
tags: [alertmanager]
- role: prometheus
tags: [prometheus]
- role: loki
tags: [loki]
- role: grafana
tags: [grafana]
# - role: prometheus
# tags: [prometheus]
# - role: loki
# tags: [loki]
# - role: grafana
# tags: [grafana]
- role: forgejo
tags: [forgejo]
- role: forgejo_runner
tags: [forgejo_runner]
- role: watchtower
tags: [watchtower]
- role: postfix
tags: [postfix]
- role: backups
tags: [backups]
post_tasks:
- name: Read Grafana Traefik router rule label
shell: |
set -euo pipefail
id=$(docker compose ps -q grafana)
docker inspect ${id} | python3 -c 'import json,sys; d=json.load(sys.stdin)[0]; print(d.get("Config",{}).get("Labels",{}).get("traefik.http.routers.grafana.rule",""))'
args:
chdir: /opt/grafana
register: grafana_router_rule
changed_when: false
tags: [grafana]
- name: Fail if Grafana Traefik router rule label is not configured as expected
assert:
that:
- grafana_router_rule.stdout == ("Host(`" ~ grafana_hostname ~ "`)")
fail_msg: "Grafana Traefik router rule label mismatch. expected=Host(`{{ grafana_hostname }}`) got={{ grafana_router_rule.stdout | default('') }}. If you used --start-at-task, rerun the play without it so docker compose can recreate the container with updated labels."
tags: [grafana]
- name: Trigger Traefik certificate request for Grafana hostname
command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ grafana_hostname }}:443:127.0.0.1" "https://{{ grafana_hostname }}/"
register: grafana_tls_warmup
changed_when: false
retries: 30
delay: 2
until: grafana_tls_warmup.stdout != '000'
tags: [grafana]
- name: Wait for Traefik certificate SAN to include Grafana hostname
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ grafana_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ grafana_hostname }}"
register: grafana_origin_tls
changed_when: false
retries: 90
delay: 5
until: grafana_origin_tls.rc == 0
tags: [grafana]
# Grafana post-tasks disabled (monitoring stack not deployed on 1GB node)
# - name: Read Grafana Traefik router rule label
# shell: |
# set -euo pipefail
# id=$(docker compose ps -q grafana)
# docker inspect ${id} | python3 -c 'import json,sys; d=json.load(sys.stdin)[0]; print(d.get("Config",{}).get("Labels",{}).get("traefik.http.routers.grafana.rule",""))'
# args:
# chdir: /opt/grafana
# register: grafana_router_rule
# changed_when: false
# tags: [grafana]
#
# - name: Fail if Grafana Traefik router rule label is not configured as expected
# assert:
# that:
# - grafana_router_rule.stdout == ("Host(`" ~ grafana_hostname ~ `)")
# fail_msg: "Grafana Traefik router rule label mismatch. expected=Host(`{{ grafana_hostname }}`) got={{ grafana_router_rule.stdout | default('') }}. If you used --start-at-task, rerun the play without it so docker compose can recreate the container with updated labels."
# tags: [grafana]
#
# - name: Trigger Traefik certificate request for Grafana hostname
# command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ grafana_hostname }}:443:127.0.0.1" "https://{{ grafana_hostname }}/"
# register: grafana_tls_warmup
# changed_when: false
# retries: 30
# delay: 2
# until: grafana_tls_warmup.stdout != '000'
# tags: [grafana]
#
# - name: Wait for Traefik certificate SAN to include Grafana hostname
# shell: |
# set -euo pipefail
# echo | openssl s_client -servername "{{ grafana_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ grafana_hostname }}"
# register: grafana_origin_tls
# changed_when: false
# retries: 90
# delay: 5
# until: grafana_origin_tls.rc == 0
# tags: [grafana]
- name: Trigger Traefik certificate request for Forgejo hostname
command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ forgejo_hostname }}:443:127.0.0.1" "https://{{ forgejo_hostname }}/"
@ -109,7 +114,7 @@
- name: Fail if Forgejo Traefik router rule label is not configured as expected
assert:
that:
- forgejo_router_rule.stdout == ("Host(`" ~ forgejo_hostname ~ "`)")
- "forgejo_router_rule.stdout == 'Host(`' ~ forgejo_hostname ~ '`)'"
fail_msg: "Forgejo Traefik router rule label mismatch. expected=Host(`{{ forgejo_hostname }}`) got={{ forgejo_router_rule.stdout | default('') }}. If you used --start-at-task, rerun the play without it so docker compose can recreate the container with updated labels."
tags: [forgejo]
@ -124,6 +129,27 @@
until: forgejo_origin_tls.rc == 0
tags: [forgejo]
# Prometheus post-tasks disabled (monitoring stack not deployed on 1GB node)
# - name: Trigger Traefik certificate request for Prometheus hostname
# command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ prometheus_hostname }}:443:127.0.0.1" "https://{{ prometheus_hostname }}/"
# register: prometheus_tls_warmup
# changed_when: false
# retries: 30
# delay: 2
# until: prometheus_tls_warmup.stdout != '000'
# tags: [prometheus]
#
# - name: Wait for Traefik certificate SAN to include Prometheus hostname
# shell: |
# set -euo pipefail
# echo | openssl s_client -servername "{{ prometheus_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ prometheus_hostname }}"
# register: prometheus_origin_tls
# changed_when: false
# retries: 90
# delay: 5
# until: prometheus_origin_tls.rc == 0
# tags: [prometheus]
- name: Trigger Traefik certificate request for Authelia hostname
command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ auth_hostname }}:443:127.0.0.1" "https://{{ auth_hostname }}/"
register: authelia_tls_warmup
@ -144,22 +170,23 @@
until: authelia_origin_tls.rc == 0
tags: [authelia]
- name: Trigger Traefik certificate request for Prometheus hostname
command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ prometheus_hostname }}:443:127.0.0.1" "https://{{ prometheus_hostname }}/"
register: prometheus_tls_warmup
changed_when: false
retries: 30
delay: 2
until: prometheus_tls_warmup.stdout != '000'
tags: [prometheus]
- name: Wait for Traefik certificate SAN to include Prometheus hostname
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ prometheus_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ prometheus_hostname }}"
register: prometheus_origin_tls
changed_when: false
retries: 90
delay: 5
until: prometheus_origin_tls.rc == 0
tags: [prometheus]
# Prometheus post-tasks disabled (monitoring stack not deployed on 1GB node)
# - name: Trigger Traefik certificate request for Prometheus hostname
# command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ prometheus_hostname }}:443:127.0.0.1" "https://{{ prometheus_hostname }}/"
# register: prometheus_tls_warmup
# changed_when: false
# retries: 30
# delay: 2
# until: prometheus_tls_warmup.stdout != '000'
# tags: [prometheus]
#
# - name: Wait for Traefik certificate SAN to include Prometheus hostname
# shell: |
# set -euo pipefail
# echo | openssl s_client -servername "{{ prometheus_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ prometheus_hostname }}"
# register: prometheus_origin_tls
# changed_when: false
# retries: 90
# delay: 5
# until: prometheus_origin_tls.rc == 0
# tags: [prometheus]

View file

@ -92,18 +92,6 @@
- security-headers
- compress
forgejo:
rule: "Host(`{{ forgejo_hostname }}`)"
entryPoints:
- websecure
tls:
certResolver: "{{ traefik_certresolver }}"
service: forgejo
middlewares:
- security-headers
- compress
- rate-limit
prometheus:
rule: "Host(`{{ prometheus_hostname }}`)"
entryPoints:
@ -116,6 +104,18 @@
- compress
- authelia
forgejo:
rule: "Host(`{{ forgejo_hostname }}`)"
entryPoints:
- websecure
tls:
certResolver: "{{ traefik_certresolver }}"
service: forgejo
middlewares:
- security-headers
- compress
- rate-limit
services:
authelia:
loadBalancer:

View file

@ -1,10 +1,12 @@
services:
traefik:
image: traefik:v2.11.10
image: traefik:v3
command:
- --api.dashboard=true
- --providers.file.directory=/etc/traefik/dynamic
- --providers.file.watch=true
- --providers.docker=true
- --providers.docker.exposedbydefault=false
- --entrypoints.web.address=:80
- --entrypoints.web.http.redirections.entrypoint.to=websecure
- --entrypoints.web.http.redirections.entrypoint.scheme=https