diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml index 9a69152..70e3eac 100644 --- a/.forgejo/workflows/deploy.yml +++ b/.forgejo/workflows/deploy.yml @@ -49,7 +49,8 @@ jobs: set -euo pipefail APP_NAME="${{ github.event.repository.name }}" echo "Registering app $APP_NAME with infra-controller..." - test -f .infra.toml - ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" \ - "cat > /var/run/active-apps/$APP_NAME.toml.tmp && mv /var/run/active-apps/$APP_NAME.toml.tmp /var/run/active-apps/$APP_NAME.toml" \ - < .infra.toml + if [[ -f .infra.toml ]]; then + ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" infra-register-stdin "$APP_NAME" < .infra.toml + else + ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" infra-deregister "$APP_NAME" + fi diff --git a/.forgejo/workflows/reusable_test.yml b/.forgejo/workflows/reusable_test.yml new file mode 100644 index 0000000..3608098 --- /dev/null +++ b/.forgejo/workflows/reusable_test.yml @@ -0,0 +1,11 @@ +name: Reusable + +on: + workflow_call: + +jobs: + test: + runs-on: docker + steps: + - name: Hello + run: echo "Hello from reusable" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c056bd0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +__pycache__/ +*.py[cod] +*$py.class + +.pytest_cache/ +.ruff_cache/ + +*.egg-info/ +.eggs/ + +build/ +dist/ + +.venv/ +venv/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/README.md b/README.md index 553595f..0518ea3 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ To avoid running a daemon or polling timer, you can trigger a one-shot run whene - enable path trigger: `sudo systemctl enable --now infra-controller.path` - view logs: `journalctl -u infra-controller-once.service -f` +Services that are no longer required are stopped after `grace_period_minutes` (see config) using `docker compose down`. + ## Remote app registration Run `infra-controller` on the service server. When you deploy, create/update a registration file in `/var/run/active-apps/` (this triggers the path unit). @@ -51,3 +53,15 @@ ssh infractl@service-host \ "cat > /var/run/active-apps/$APP_NAME.toml.tmp && mv /var/run/active-apps/$APP_NAME.toml.tmp /var/run/active-apps/$APP_NAME.toml" \ < .infra.toml ``` + +## Restricted SSH keys (recommended) + +If you want to avoid giving CI a general shell on the services server, install the helper scripts to `/usr/local/sbin` (see `install.sh`) and restrict the runner key in `authorized_keys`. + +Example (services server, `~infractl/.ssh/authorized_keys`): + +```text +command="/usr/local/sbin/infra-register-stdin",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding ssh-ed25519 AAAA... runner +``` + +For deregistration, use a separate key restricted to `/usr/local/sbin/infra-deregister`. diff --git a/install.sh b/install.sh index 6340d08..644275e 100644 --- a/install.sh +++ b/install.sh @@ -27,6 +27,13 @@ sudo python3 -m venv /opt/infra-controller/venv sudo /opt/infra-controller/venv/bin/pip install --upgrade pip sudo /opt/infra-controller/venv/bin/pip install -e . +echo "Installing helper scripts..." +sudo install -d /usr/local/sbin +sudo install -m 0755 scripts/deploy-app /usr/local/sbin/deploy-app +sudo install -m 0755 scripts/sync-infra /usr/local/sbin/sync-infra +sudo install -m 0755 scripts/infra-register-stdin /usr/local/sbin/infra-register-stdin +sudo install -m 0755 scripts/infra-deregister /usr/local/sbin/infra-deregister + if [ ! -f /etc/infra-controller/config.toml ]; then echo "Installing default configuration..." sudo cp config/controller.toml.example /etc/infra-controller/config.toml diff --git a/pyproject.toml b/pyproject.toml index 56cb0b3..6c8fa11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ infra-controller = "infra_controller.__main__:main" infra-register = "infra_controller.cli:register" infra-deregister = "infra_controller.cli:deregister" infra-status = "infra_controller.cli:status" +infra-ensure = "infra_controller.cli:ensure_service_cli" [tool.setuptools] package-dir = {"" = "src"} @@ -46,3 +47,6 @@ where = ["src"] [tool.ruff] line-length = 100 + +[tool.pytest.ini_options] +asyncio_default_fixture_loop_scope = "function" diff --git a/scripts/deploy-app b/scripts/deploy-app new file mode 100644 index 0000000..7c87ee9 --- /dev/null +++ b/scripts/deploy-app @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +APP_NAME="${1:-}" +GIT_REF="${2:-}" + +if [[ -z "$APP_NAME" || -z "$GIT_REF" ]]; then + echo "usage: deploy-app " >&2 + exit 2 +fi + +if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "invalid app name: $APP_NAME" >&2 + exit 2 +fi + +if ! [[ "$GIT_REF" =~ ^[0-9a-fA-F]{7,40}$ ]]; then + echo "invalid git ref: $GIT_REF" >&2 + exit 2 +fi + +APP_DIR="/srv/apps/$APP_NAME" + +if [[ ! -d "$APP_DIR/.git" ]]; then + echo "app repo not present at $APP_DIR; clone it first (or extend deploy-app to clone)" >&2 + exit 1 +fi + +cd "$APP_DIR" +git fetch --all --prune + +git checkout -f "$GIT_REF" + +git submodule update --init --recursive + +if [[ -x "./deploy.sh" ]]; then + ./deploy.sh +else + echo "ERROR: deploy.sh missing or not executable" >&2 + exit 1 +fi + +/usr/local/sbin/sync-infra "$APP_NAME" "$APP_DIR" diff --git a/scripts/infra-deregister b/scripts/infra-deregister new file mode 100644 index 0000000..af4f7d0 --- /dev/null +++ b/scripts/infra-deregister @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -euo pipefail + +APP_NAME="${1:-}" + +if [[ -z "$APP_NAME" ]]; then + echo "usage: infra-deregister " >&2 + exit 2 +fi + +if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "invalid app name: $APP_NAME" >&2 + exit 2 +fi + +rm -f "/var/run/active-apps/$APP_NAME.toml" "/var/run/active-apps/$APP_NAME.yml" "/var/run/active-apps/$APP_NAME.yaml" diff --git a/scripts/infra-register-stdin b/scripts/infra-register-stdin new file mode 100644 index 0000000..efe96ce --- /dev/null +++ b/scripts/infra-register-stdin @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +set -euo pipefail + +APP_NAME="${1:-}" + +if [[ -z "$APP_NAME" ]]; then + echo "usage: infra-register-stdin " >&2 + exit 2 +fi + +if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "invalid app name: $APP_NAME" >&2 + exit 2 +fi + +DST_DIR="/var/run/active-apps" +DST="$DST_DIR/$APP_NAME.toml" +TMP="$DST.toml.tmp" + +mkdir -p "$DST_DIR" + +cat > "$TMP" + +mv "$TMP" "$DST" diff --git a/scripts/sync-infra b/scripts/sync-infra new file mode 100644 index 0000000..2d3a68e --- /dev/null +++ b/scripts/sync-infra @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +APP_NAME="${1:-}" +APP_DIR="${2:-}" + +if [[ -z "$APP_NAME" || -z "$APP_DIR" ]]; then + echo "usage: sync-infra " >&2 + exit 2 +fi + +if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then + echo "invalid app name: $APP_NAME" >&2 + exit 2 +fi + +INFRA_FILE="$APP_DIR/.infra.toml" + +if [[ -f "$INFRA_FILE" ]]; then + ssh infra@services-server infra-register-stdin "$APP_NAME" < "$INFRA_FILE" +else + ssh infra@services-server infra-deregister "$APP_NAME" +fi diff --git a/src/infra_controller/controller.py b/src/infra_controller/controller.py index 319ab63..2a5e8b5 100644 --- a/src/infra_controller/controller.py +++ b/src/infra_controller/controller.py @@ -1,7 +1,10 @@ from __future__ import annotations +import json import logging import time +from pathlib import Path + from infra_controller.config import ControllerConfig from infra_controller.discovery import AppRegistration, DiscoveryManager from infra_controller.service_manager import ServiceManager @@ -10,10 +13,15 @@ from infra_controller.service_manager import ServiceManager logger = logging.getLogger(__name__) class InfraController: - def __init__(self, cfg: ControllerConfig): + def __init__( + self, + cfg: ControllerConfig, + discovery: DiscoveryManager | None = None, + services: ServiceManager | None = None, + ): self._cfg = cfg - self._discovery = DiscoveryManager(cfg.discovery) - self._services = ServiceManager(cfg.docker) + self._discovery = discovery or DiscoveryManager(cfg.discovery) + self._services = services or ServiceManager(cfg.docker) def run(self) -> None: while True: @@ -23,10 +31,55 @@ class InfraController: def run_once(self) -> None: discovered = self._discovery.discover_all() required = self._required_services(discovered) + state = self._load_state(self._cfg.services.state_file) + unused_since = state.get("unused_since") + if not isinstance(unused_since, dict): + unused_since = {} + + known_services_val = state.get("known_services") + if isinstance(known_services_val, list): + known_services = {str(s) for s in known_services_val if isinstance(s, str) and s.strip()} + else: + known_services = set() + + now = time.time() for service in sorted(required): logger.info("Ensuring service: %s", service) self.ensure_service(service) + unused_since.pop(service, None) + known_services.add(service) + + known_services |= set(unused_since.keys()) + grace_seconds = int(self._cfg.services.grace_period_minutes) * 60 + + for service in sorted(known_services - set(required)): + since = unused_since.get(service) + if since is None: + unused_since[service] = now + logger.info("Service no longer required (grace period started): %s", service) + continue + + try: + since_ts = float(since) + except Exception: + since_ts = now + unused_since[service] = now + continue + + if (now - since_ts) < grace_seconds: + continue + + logger.info("Stopping unused service: %s", service) + res = self._services.stop_service(service) + if res.returncode != 0: + raise RuntimeError(res.stderr or res.stdout) + unused_since.pop(service, None) + known_services.discard(service) + + state["unused_since"] = unused_since + state["known_services"] = sorted(known_services) + self._save_state(self._cfg.services.state_file, state) def ensure_service(self, service_name: str) -> None: res = self._services.apply_service(service_name) @@ -50,3 +103,22 @@ class InfraController: required.add(services.strip()) return required + + def _load_state(self, path: Path) -> dict: + try: + if not path.exists(): + return {} + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, dict): + return data + return {} + except Exception: + return {} + + def _save_state(self, path: Path, state: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + with open(tmp, "w", encoding="utf-8") as f: + json.dump(state, f) + tmp.replace(path) diff --git a/src/infra_controller/service_manager.py b/src/infra_controller/service_manager.py index 2cd692f..fee9b9a 100644 --- a/src/infra_controller/service_manager.py +++ b/src/infra_controller/service_manager.py @@ -64,3 +64,21 @@ class ServiceManager: proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(service_dir)) return ServiceResult(returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr) + + def stop_service(self, service_name: str) -> ServiceResult: + service_dir = self.service_dir_for_service(service_name) + if not service_dir.exists(): + raise FileNotFoundError(f"Service directory not found: {service_dir}") + + compose_file = self._resolve_compose_file(service_dir) + + cmd = [ + "docker", + "compose", + "-f", + str(compose_file), + "down", + ] + + proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(service_dir)) + return ServiceResult(returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr) diff --git a/tests/test_controller.py b/tests/test_controller.py new file mode 100644 index 0000000..2e34550 --- /dev/null +++ b/tests/test_controller.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + +import pytest + +from infra_controller.config import ControllerConfig +from infra_controller.controller import InfraController +from infra_controller.discovery import AppRegistration, InfraMetadata + + +@dataclass +class FakeServiceResult: + returncode: int = 0 + stdout: str = "" + stderr: str = "" + + +class FakeServiceManager: + def __init__(self): + self.applied: list[str] = [] + self.stopped: list[str] = [] + + def apply_service(self, service_name: str) -> FakeServiceResult: + self.applied.append(service_name) + return FakeServiceResult() + + def stop_service(self, service_name: str) -> FakeServiceResult: + self.stopped.append(service_name) + return FakeServiceResult() + + +class FakeDiscoveryManager: + def __init__(self, apps: dict[str, AppRegistration]): + self._apps = apps + + def set_apps(self, apps: dict[str, AppRegistration]) -> None: + self._apps = apps + + def discover_all(self) -> dict[str, AppRegistration]: + return dict(self._apps) + + +def _app(name: str, services: list[str]) -> AppRegistration: + md = InfraMetadata(project=name, requires={"services": services}) + return AppRegistration(name=name, metadata=md, last_seen=datetime.now(), discovery_method="test") + + +def test_controller_stops_unused_services_after_grace_period(tmp_path: Path, monkeypatch): + cfg = ControllerConfig() + cfg.services.grace_period_minutes = 0 + cfg.services.state_file = tmp_path / "state.json" + + discovery = FakeDiscoveryManager({"a": _app("a", ["svc1"])}) + services = FakeServiceManager() + + c = InfraController(cfg, discovery=discovery, services=services) + + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 0.0) + c.run_once() + assert services.applied == ["svc1"] + assert services.stopped == [] + + discovery.set_apps({}) + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 10.0) + c.run_once() + assert services.stopped == [] + + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 20.0) + c.run_once() + assert services.stopped == ["svc1"] + + +def test_controller_does_not_stop_service_within_grace_period(tmp_path: Path, monkeypatch): + cfg = ControllerConfig() + cfg.services.grace_period_minutes = 1 + cfg.services.state_file = tmp_path / "state.json" + + discovery = FakeDiscoveryManager({"a": _app("a", ["svc1"])}) + services = FakeServiceManager() + c = InfraController(cfg, discovery=discovery, services=services) + + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 0.0) + c.run_once() + + discovery.set_apps({}) + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 10.0) + c.run_once() + + monkeypatch.setattr("infra_controller.controller.time.time", lambda: 20.0) + c.run_once() + assert services.stopped == []