Restructure playbooks and update main deployment workflows

- Delete playbooks/app.yml (replaced by deploy-app.yml)
- Delete playbooks/test_config.yml (moved to playbooks/tests/)
- Delete setup.sh (renamed to setup)
- Update deploy.yml with improved deployment orchestration
- Update services.yml to include new infrastructure roles
This commit is contained in:
Jeremie Fraeys 2026-02-21 18:31:53 -05:00
parent 2610b904a2
commit dd1f9df69b
No known key found for this signature in database
5 changed files with 28 additions and 651 deletions

View file

@ -1,18 +0,0 @@
---
- hosts: web_hosts
become: true
pre_tasks:
- name: Load vault vars if present
include_vars:
file: "{{ playbook_dir }}/../secrets/vault.yml"
when: (lookup('ansible.builtin.fileglob', playbook_dir ~ '/../secrets/vault.yml', wantlist=True) | length) > 0
tags: always
roles:
- role: docker
tags: [docker]
- role: traefik
tags: [traefik]
- role: app_core
tags: [app_core]
- role: forgejo_runner
tags: [forgejo_runner]

View file

@ -11,6 +11,7 @@
roles:
- docker
- fail2ban
- traefik
- lldap
- authelia

View file

@ -6,7 +6,7 @@
include_vars:
file: "{{ playbook_dir }}/../secrets/vault.yml"
when: (lookup('ansible.builtin.fileglob', playbook_dir ~ '/../secrets/vault.yml', wantlist=True) | length) > 0
tags: always
tags: [vault, backups, forgejo]
- name: Ensure minimal required directories exist
file:
@ -18,6 +18,8 @@
roles:
- role: docker
tags: [docker]
- role: firewall
tags: [firewall]
- role: traefik
tags: [traefik]
- role: app_ssh_access
@ -30,6 +32,8 @@
tags: [authelia]
- role: exporters
tags: [exporters]
- role: alertmanager
tags: [alertmanager]
- role: prometheus
tags: [prometheus]
- role: loki
@ -40,6 +44,8 @@
tags: [forgejo]
- role: watchtower
tags: [watchtower]
- role: backups
tags: [backups]
post_tasks:
- name: Read Grafana Traefik router rule label
@ -137,3 +143,23 @@
delay: 5
until: authelia_origin_tls.rc == 0
tags: [authelia]
- name: Trigger Traefik certificate request for Prometheus hostname
command: curl -k -s -o /dev/null -w "%{http_code}" --resolve "{{ prometheus_hostname }}:443:127.0.0.1" "https://{{ prometheus_hostname }}/"
register: prometheus_tls_warmup
changed_when: false
retries: 30
delay: 2
until: prometheus_tls_warmup.stdout != '000'
tags: [prometheus]
- name: Wait for Traefik certificate SAN to include Prometheus hostname
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ prometheus_hostname }}" -connect 127.0.0.1:443 2>/dev/null | openssl x509 -noout -text | grep -q "DNS:{{ prometheus_hostname }}"
register: prometheus_origin_tls
changed_when: false
retries: 90
delay: 5
until: prometheus_origin_tls.rc == 0
tags: [prometheus]

View file

@ -1,434 +0,0 @@
---
- name: Test Deployment Configuration
hosts: all
become: true
tasks:
- name: Load vault vars if present
include_vars:
file: "{{ playbook_dir }}/../secrets/vault.yml"
no_log: true
when: (lookup('ansible.builtin.fileglob', playbook_dir ~ '/../secrets/vault.yml', wantlist=True) | length) > 0
- name: Check SSH service status
command: systemctl is-active sshd
register: ssh_status
changed_when: false
- debug:
msg: "SSH service is {{ ssh_status.stdout | default('') }}"
- name: Check SSH Port Configuration
command: sshd -T
register: ssh_port
changed_when: false
failed_when: false
- debug:
msg: "SSH port configured as {{ (ssh_port.stdout | default('') | regex_search('(?m)^port\\s+([0-9]+)$', '\\1')) | default('Unknown') }}"
- name: Check Docker version
command: docker --version
register: docker_version
changed_when: false
- debug:
msg: "Docker Version: {{ docker_version.stdout }}"
- name: Check Docker Compose version (hyphen)
command: docker-compose --version
register: docker_compose_version_hyphen
failed_when: false
changed_when: false
- name: Check Docker Compose version (docker compose)
command: docker compose version
register: docker_compose_version_space
failed_when: false
changed_when: false
- name: Display Docker Compose version
debug:
msg: >
{% if docker_compose_version_hyphen.stdout %}
Docker Compose version (docker-compose): {{ docker_compose_version_hyphen.stdout }}
{% elif docker_compose_version_space.stdout %}
Docker Compose version (docker compose): {{ docker_compose_version_space.stdout }}
{% else %}
Docker Compose not found
{% endif %}
- name: Check Ansible version
command: ansible --version
register: ansible_version
changed_when: false
failed_when: false
- debug:
msg: "Ansible Version: {{ (ansible_version.stdout | default('')) .split('\n')[0] if (ansible_version.stdout | default('') | length) > 0 else 'Not installed' }}"
- name: Check UFW status
command: ufw status verbose
register: ufw_status
changed_when: false
- debug:
msg: "UFW Status: {{ ufw_status.stdout }}"
- name: Check Fail2ban service status
command: systemctl is-active fail2ban
register: fail2ban_status
changed_when: false
failed_when: false
- debug:
msg: "Fail2ban is {{ fail2ban_status.stdout }}"
- name: Display logrotate custom config
command: cat /etc/logrotate.d/custom
register: logrotate_config
changed_when: false
failed_when: false
- debug:
msg: "Logrotate custom config:\n{{ logrotate_config.stdout | default('No custom logrotate config found') }}"
- name: Check running Docker containers
command: docker ps
register: docker_ps
changed_when: false
- debug:
msg: "Docker containers:\n{{ docker_ps.stdout }}"
- name: Determine host role
set_fact:
is_services_host: "{{ 'services_hosts' in group_names }}"
is_web_host: "{{ 'web_hosts' in group_names }}"
- name: Define expected stacks for services host
set_fact:
expected_stacks:
- { name: traefik, dir: /opt/traefik }
- { name: lldap, dir: /opt/lldap }
- { name: authelia, dir: /opt/authelia }
- { name: exporters, dir: /opt/exporters }
- { name: prometheus, dir: /opt/prometheus }
- { name: loki, dir: /opt/loki }
- { name: grafana, dir: /opt/grafana }
- { name: forgejo, dir: /opt/forgejo }
- { name: watchtower, dir: /opt/watchtower }
when: is_services_host
- name: Define expected stacks for web host
set_fact:
expected_stacks:
- { name: traefik, dir: /opt/traefik }
- { name: app_core, dir: /opt/app }
- { name: forgejo_runner, dir: /opt/forgejo-runner }
when: is_web_host
- name: Check minimal infra-controller directories exist on services host
stat:
path: "{{ item }}"
register: infra_dirs
loop:
- /var/run/active-apps
- /var/lib/infra-controller
changed_when: false
when: is_services_host
- name: Fail if any minimal infra-controller directory is missing on services host
assert:
that:
- item.stat.exists
- item.stat.isdir
fail_msg: "Missing required directory on services host: {{ item.item }}. This typically means the services playbook has not been applied yet. Run ./setup.sh (or ansible-playbook playbooks/services.yml) and re-run this test."
loop: "{{ infra_dirs.results | default([]) }}"
when: is_services_host
- name: Read deployer authorized_keys on services host
slurp:
src: /home/deployer/.ssh/authorized_keys
register: deployer_authorized_keys
changed_when: false
when: is_services_host
- name: Fail if deployer authorized_keys is missing forced-command restrictions
assert:
that:
- (deployer_authorized_keys.content | b64decode) is search('command="/usr/local/sbin/infra-register-stdin"')
- (deployer_authorized_keys.content | b64decode) is search('command="/usr/local/sbin/infra-deregister"')
fail_msg: "deployer authorized_keys does not include forced-command keys for infra-register-stdin/infra-deregister"
when: is_services_host
- name: Check that expected compose directories exist
stat:
path: "{{ item.dir }}/docker-compose.yml"
register: compose_files
loop: "{{ expected_stacks | default([]) }}"
changed_when: false
- name: Fail if any compose file is missing
assert:
that:
- item.stat.exists
fail_msg: "Missing docker-compose.yml for {{ item.item.name }} at {{ item.item.dir }}/docker-compose.yml"
loop: "{{ compose_files.results | default([]) }}"
when: expected_stacks is defined
- name: Read expected services per stack
command: docker compose config --services
args:
chdir: "{{ item.dir }}"
register: stack_expected
loop: "{{ expected_stacks | default([]) }}"
changed_when: false
- name: Read service status/health per stack (docker inspect)
shell: |
set -euo pipefail
ids=$(docker compose ps -q)
if [ -z "${ids}" ]; then
exit 0
fi
{% raw %}docker inspect --format '{{ index .Config.Labels "com.docker.compose.service" }} {{ .State.Status }} {{ if .State.Health }}{{ .State.Health.Status }}{{ else }}none{{ end }}' ${ids}{% endraw %}
args:
chdir: "{{ item.dir }}"
register: stack_status
loop: "{{ expected_stacks | default([]) }}"
changed_when: false
failed_when: false
- name: Assert all services in each stack are running (and healthy if healthcheck exists)
assert:
that:
- (expected | difference(running_services)) | length == 0
- bad_health_services | length == 0
fail_msg: >-
Stack {{ stack.name }} service status unhealthy.
Missing running={{ expected | difference(running_services) }}.
Bad health={{ bad_health_services }}.
Expected={{ expected }}
Inspect={{ status_lines }}
loop: "{{ (expected_stacks | default([])) | zip(stack_expected.results, stack_status.results) | list }}"
vars:
stack: "{{ item.0 }}"
expected: "{{ item.1.stdout_lines | default([]) }}"
status_lines: "{{ item.2.stdout_lines | default([]) }}"
running_services: >-
{{ status_lines
| map('regex_findall', '^(\S+)\s+running\s+')
| select('truthy')
| map('first')
| list }}
ok_services: >-
{{ status_lines
| map('regex_findall', '^(\S+)\s+running\s+(?:healthy|none)\s*$')
| select('truthy')
| map('first')
| list }}
bad_health_services: >-
{{ (running_services | default([])) | difference(ok_services | default([])) }}
when: expected_stacks is defined
- name: Ensure proxy network exists
command: docker network inspect proxy
register: proxy_network
changed_when: false
- name: Ensure monitoring network exists on services host
command: docker network inspect monitoring
register: monitoring_network
changed_when: false
when: is_services_host
- name: Check Prometheus readiness on services host
command: docker compose exec -T prometheus wget -qO- http://127.0.0.1:9090/-/ready
args:
chdir: /opt/prometheus
register: prometheus_ready
changed_when: false
when: is_services_host
- name: Fail if Prometheus is not ready
assert:
that:
- prometheus_ready.stdout | default('') in ['Prometheus is Ready.', 'Prometheus Server is Ready.']
fail_msg: "Prometheus readiness check failed. Output={{ prometheus_ready.stdout | default('') }}"
when: is_services_host
- name: Check Grafana health on services host
command: docker compose exec -T grafana wget -qO- http://127.0.0.1:3000/api/health
args:
chdir: /opt/grafana
register: grafana_health
changed_when: false
failed_when: false
when: is_services_host
- name: Fail if Grafana health endpoint is not reachable
assert:
that:
- grafana_health.rc == 0
fail_msg: "Grafana health endpoint check failed (inside container). rc={{ grafana_health.rc }} output={{ grafana_health.stdout | default('') }}"
when: is_services_host
- name: Check Loki readiness on services host
uri:
url: http://127.0.0.1:3100/ready
method: GET
status_code: [200, 503]
register: loki_ready
until: loki_ready.status == 200
retries: 30
delay: 2
changed_when: false
when: is_services_host
- name: Check Traefik dynamic config contains Grafana router rule
shell: |
set -euo pipefail
grep -Fq 'Host(`{{ grafana_hostname }}`)' /opt/traefik/dynamic/base.yml
register: grafana_router_rule
changed_when: false
failed_when: false
when: is_services_host
- name: Fail if Grafana Traefik router rule is not configured as expected
assert:
that:
- grafana_router_rule.rc == 0
fail_msg: "Grafana Traefik router rule mismatch in /opt/traefik/dynamic/base.yml. expected=Host(`{{ grafana_hostname }}`)"
when: is_services_host
- name: Check Traefik dynamic config contains Forgejo router rule
shell: |
set -euo pipefail
grep -Fq 'Host(`{{ forgejo_hostname }}`)' /opt/traefik/dynamic/base.yml
register: forgejo_router_rule
changed_when: false
failed_when: false
when: is_services_host
- name: Fail if Forgejo Traefik router rule is not configured as expected
assert:
that:
- forgejo_router_rule.rc == 0
fail_msg: "Forgejo Traefik router rule mismatch in /opt/traefik/dynamic/base.yml. expected=Host(`{{ forgejo_hostname }}`)"
when: is_services_host
- name: Check Traefik dynamic config contains Authelia router rule
shell: |
set -euo pipefail
grep -Fq 'Host(`{{ auth_hostname }}`)' /opt/traefik/dynamic/base.yml
register: authelia_router_rule
changed_when: false
failed_when: false
when: is_services_host
- name: Fail if Authelia Traefik router rule is not configured as expected
assert:
that:
- authelia_router_rule.rc == 0
fail_msg: "Authelia Traefik router rule mismatch in /opt/traefik/dynamic/base.yml. expected=Host(`{{ auth_hostname }}`)"
when: is_services_host
- name: Check Traefik serves a valid TLS certificate for Grafana hostname (origin)
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ grafana_hostname }}" -connect 127.0.0.1:443 2>/dev/null | grep -q "Verify return code: 0 (ok)"
register: grafana_origin_tls
changed_when: false
retries: 30
delay: 2
until: grafana_origin_tls.rc == 0
when: is_services_host
- name: Check Traefik serves a valid TLS certificate for Forgejo hostname (origin)
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ forgejo_hostname }}" -connect 127.0.0.1:443 2>/dev/null | grep -q "Verify return code: 0 (ok)"
register: forgejo_origin_tls
changed_when: false
retries: 30
delay: 2
until: forgejo_origin_tls.rc == 0
when: is_services_host
- name: Check Traefik serves a valid TLS certificate for Authelia hostname (origin)
shell: |
set -euo pipefail
echo | openssl s_client -servername "{{ auth_hostname }}" -connect 127.0.0.1:443 2>/dev/null | grep -q "Verify return code: 0 (ok)"
register: authelia_origin_tls
changed_when: false
retries: 30
delay: 2
until: authelia_origin_tls.rc == 0
when: is_services_host
- name: Check Authelia OIDC discovery issuer (origin)
shell: |
set -euo pipefail
curl -k -sS --resolve "{{ auth_hostname }}:443:127.0.0.1" "https://{{ auth_hostname }}/.well-known/openid-configuration" \
| python3 -c 'import json,sys; print(json.load(sys.stdin).get("issuer",""))'
register: authelia_oidc_issuer
changed_when: false
retries: 30
delay: 2
until: authelia_oidc_issuer.stdout | default('') | length > 0
when: is_services_host
- name: Fail if Authelia OIDC discovery issuer is not configured as expected
assert:
that:
- authelia_oidc_issuer.stdout == ("https://" ~ auth_hostname)
fail_msg: "Authelia OIDC issuer mismatch. expected=https://{{ auth_hostname }} got={{ authelia_oidc_issuer.stdout | default('') }}"
when: is_services_host
- name: Check LLDAP web UI is reachable on services host
uri:
url: http://127.0.0.1:17170/
method: GET
status_code: [200, 302]
register: lldap_web
changed_when: false
when: is_services_host
- name: Read object storage configuration from controller environment
set_fact:
s3_bucket: "{{ lookup('env', 'S3_BUCKET') | default('', true) }}"
s3_region: "{{ lookup('env', 'S3_REGION') | default(lookup('env', 'TF_VAR_object_storage_region'), true) | default('us-east-1', true) }}"
changed_when: false
- name: Compute object storage endpoint from controller environment
set_fact:
s3_endpoint: "{{ lookup('env', 'S3_ENDPOINT') | default('https://' ~ s3_region ~ '.linodeobjects.com', true) }}"
changed_when: false
- name: Smoke test Linode Object Storage credentials (head-bucket)
command: >-
docker run --rm
-e AWS_ACCESS_KEY_ID
-e AWS_SECRET_ACCESS_KEY
-e AWS_DEFAULT_REGION
-e AWS_EC2_METADATA_DISABLED=true
amazon/aws-cli:2.15.57
s3api head-bucket --bucket {{ s3_bucket | quote }} --endpoint-url {{ s3_endpoint | quote }}
environment:
AWS_ACCESS_KEY_ID: "{{ S3_ACCESS_KEY_ID | default('') }}"
AWS_SECRET_ACCESS_KEY: "{{ S3_SECRET_ACCESS_KEY | default('') }}"
AWS_DEFAULT_REGION: "{{ s3_region }}"
register: s3_head_bucket
changed_when: false
no_log: true
when:
- (s3_bucket | default('') | length) > 0
- (S3_ACCESS_KEY_ID | default('') | length) > 0
- (S3_SECRET_ACCESS_KEY | default('') | length) > 0
- name: Fail if object storage smoke test failed
assert:
that:
- s3_head_bucket.rc == 0
fail_msg: "Object storage smoke test failed (head-bucket). Check S3_BUCKET/S3_REGION/S3_ENDPOINT and S3_ACCESS_KEY_ID/S3_SECRET_ACCESS_KEY in vault."
when:
- (s3_bucket | default('') | length) > 0
- (S3_ACCESS_KEY_ID | default('') | length) > 0
- (S3_SECRET_ACCESS_KEY | default('') | length) > 0
- name: Check Loki is reachable from web host (allowlist)
uri:
url: "http://{{ hostvars['services'].ansible_host }}:3100/ready"
method: GET
status_code: 200
register: loki_from_web_ready
when: is_web_host

198
setup.sh
View file

@ -1,198 +0,0 @@
#! /usr/bin/env bash
set -euo pipefail
vault_args=()
temp_vault_pass_file=""
usage() {
cat <<'EOF'
Usage: ./setup.sh [--no-ansible] [--no-terraform|--ansible-only] [--] [terraform <args>]
Defaults:
- Runs Terraform (plan/apply) in terraform/
- Generates Ansible inventory from Terraform outputs
- Runs Ansible playbooks
Options:
--no-ansible Run Terraform only (no Ansible).
--no-terraform Skip Terraform; requires existing inventory/hosts.yml.
--ansible-only Alias for --no-terraform.
--help Show this help.
Terraform passthrough:
./setup.sh -- terraform <cmd> [args]
./setup.sh -- <terraform-subcommand> [args]
EOF
}
cleanup() {
if [[ -n "${temp_vault_pass_file}" ]] && [[ -f "${temp_vault_pass_file}" ]]; then
rm -f "${temp_vault_pass_file}"
fi
}
trap cleanup EXIT
ansible_extra_args=()
terraform_apply_args=()
terraform_passthrough=()
run_ansible=true
run_terraform=true
if [[ "${1:-}" == "--help" ]] || [[ "${1:-}" == "-h" ]]; then
usage
exit 0
fi
if [[ "${1:-}" == "--no-ansible" ]]; then
run_ansible=false
shift
fi
if [[ "${1:-}" == "--no-terraform" ]] || [[ "${1:-}" == "--ansible-only" ]]; then
run_terraform=false
shift
fi
if [[ "${1:-}" == "--" ]]; then
shift
if [[ "${1:-}" == "terraform" ]]; then
shift
terraform_passthrough=("$@")
else
case "${1:-}" in
output|state|workspace|providers|version|validate|fmt|taint|untaint|graph|show|console|import)
terraform_passthrough=("$@")
;;
*)
terraform_apply_args=("$@")
;;
esac
fi
fi
if [[ -f ".env" ]]; then
set -a
source .env
set +a
fi
if [[ "${run_terraform}" == "true" ]]; then
if ! command -v terraform >/dev/null 2>&1; then
echo "terraform is required (install terraform or run with --no-terraform)" >&2
exit 2
fi
fi
if [[ "${run_ansible}" == "true" ]]; then
if ! command -v ansible-playbook >/dev/null 2>&1; then
echo "ansible-playbook is required (install ansible or run with --no-ansible)" >&2
exit 2
fi
fi
if [[ -f "secrets/vault.yml" ]]; then
if ! command -v ansible-vault >/dev/null 2>&1; then
echo "ansible-vault is required to read secrets/vault.yml" >&2
exit 2
fi
if [[ -f "secrets/.vault_pass" ]]; then
vault_args+=(--vault-password-file "secrets/.vault_pass")
elif [[ -f ".vault_pass" ]]; then
vault_args+=(--vault-password-file ".vault_pass")
else
read -rsp "Vault password: " vault_password
echo
temp_vault_pass_file=$(mktemp)
chmod 600 "${temp_vault_pass_file}"
printf '%s' "${vault_password}" > "${temp_vault_pass_file}"
unset vault_password
vault_args+=(--vault-password-file "${temp_vault_pass_file}")
fi
if (( ${#vault_args[@]} )); then
vault_plain=$(ansible-vault view secrets/vault.yml "${vault_args[@]}")
else
vault_plain=$(ansible-vault view secrets/vault.yml)
fi
while IFS= read -r line; do
[[ -z "${line}" ]] && continue
[[ "${line}" == "---" ]] && continue
[[ "${line}" != TF_VAR_*:* ]] && [[ "${line}" != CF_DNS_API_TOKEN:* ]] && [[ "${line}" != CF_ZONE_API_TOKEN:* ]] && [[ "${line}" != S3_ACCESS_KEY_ID:* ]] && [[ "${line}" != S3_SECRET_ACCESS_KEY:* ]] && continue
key="${line%%:*}"
value="${line#*:}"
value="${value# }"
[[ -z "${value}" ]] && continue
escaped=$(printf '%q' "${value}")
eval "export ${key}=${escaped}"
done <<< "${vault_plain}"
if [[ -z "${TF_VAR_cloudflare_api_token:-}" ]] && [[ -n "${CF_DNS_API_TOKEN:-}" ]]; then
export TF_VAR_cloudflare_api_token="${CF_DNS_API_TOKEN}"
fi
if [[ -z "${TF_VAR_cloudflare_zone_id:-}" ]] && [[ -n "${CF_ZONE_API_TOKEN:-}" ]]; then
export TF_VAR_cloudflare_zone_id="${CF_ZONE_API_TOKEN}"
fi
fi
if [[ "${run_terraform}" == "true" ]]; then
terraform -chdir=terraform init
if (( ${#terraform_passthrough[@]} )); then
terraform -chdir=terraform "${terraform_passthrough[@]}"
exit 0
fi
if (( ${#terraform_apply_args[@]} )); then
terraform -chdir=terraform apply "${terraform_apply_args[@]}"
else
terraform -chdir=terraform plan -out=tfplan
terraform -chdir=terraform apply tfplan
fi
rm -f terraform/tfplan
web_ipv4=$(terraform -chdir=terraform output -raw web_ip)
web_ipv6=$(terraform -chdir=terraform output -raw web_ipv6)
services_ipv4=$(terraform -chdir=terraform output -raw services_ip)
ssh_user=${TF_VAR_user:-ansible}
mkdir -p inventory/host_vars
cat > inventory/hosts.yml <<EOF
all:
children:
web_hosts:
hosts:
web:
ansible_host: ${web_ipv4}
ansible_port: ${TF_VAR_ssh_port:-22}
ansible_user: ${ssh_user}
services_hosts:
hosts:
services:
ansible_host: ${services_ipv4}
ansible_port: ${TF_VAR_ssh_port:-22}
ansible_user: ${ssh_user}
EOF
cat > inventory/host_vars/web.yml <<EOF
public_ipv4: ${web_ipv4}
public_ipv6: ${web_ipv6%%/*}
EOF
else
if [[ ! -f inventory/hosts.yml ]]; then
echo "inventory/hosts.yml is missing; run without --no-terraform at least once to generate it" >&2
exit 2
fi
fi
if [[ "${run_ansible}" == "true" ]]; then
if [[ -n "${vault_args+x}" ]] && (( ${#vault_args[@]} )); then
ansible_extra_args=("${vault_args[@]}")
fi
ansible-playbook playbooks/services.yml ${ansible_extra_args[@]+"${ansible_extra_args[@]}"}
ansible-playbook playbooks/app.yml ${ansible_extra_args[@]+"${ansible_extra_args[@]}"}
fi