Option B: stop unused services; add restricted SSH scripts
Some checks failed
Deploy / deploy (push) Failing after 7s
Some checks failed
Deploy / deploy (push) Failing after 7s
This commit is contained in:
parent
4cd7e72e2b
commit
0291800ef5
14 changed files with 350 additions and 7 deletions
|
|
@ -49,7 +49,8 @@ jobs:
|
|||
set -euo pipefail
|
||||
APP_NAME="${{ github.event.repository.name }}"
|
||||
echo "Registering app $APP_NAME with infra-controller..."
|
||||
test -f .infra.toml
|
||||
ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" \
|
||||
"cat > /var/run/active-apps/$APP_NAME.toml.tmp && mv /var/run/active-apps/$APP_NAME.toml.tmp /var/run/active-apps/$APP_NAME.toml" \
|
||||
< .infra.toml
|
||||
if [[ -f .infra.toml ]]; then
|
||||
ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" infra-register-stdin "$APP_NAME" < .infra.toml
|
||||
else
|
||||
ssh -i ~/.ssh/id_ed25519 "$SERVICE_USER@$SERVICE_HOST" infra-deregister "$APP_NAME"
|
||||
fi
|
||||
|
|
|
|||
11
.forgejo/workflows/reusable_test.yml
Normal file
11
.forgejo/workflows/reusable_test.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
name: Reusable
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: docker
|
||||
steps:
|
||||
- name: Hello
|
||||
run: echo "Hello from reusable"
|
||||
15
.gitignore
vendored
Normal file
15
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
.pytest_cache/
|
||||
.ruff_cache/
|
||||
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
|
||||
build/
|
||||
dist/
|
||||
|
||||
.venv/
|
||||
venv/
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
|
|
@ -0,0 +1 @@
|
|||
3.11
|
||||
14
README.md
14
README.md
|
|
@ -34,6 +34,8 @@ To avoid running a daemon or polling timer, you can trigger a one-shot run whene
|
|||
- enable path trigger: `sudo systemctl enable --now infra-controller.path`
|
||||
- view logs: `journalctl -u infra-controller-once.service -f`
|
||||
|
||||
Services that are no longer required are stopped after `grace_period_minutes` (see config) using `docker compose down`.
|
||||
|
||||
## Remote app registration
|
||||
|
||||
Run `infra-controller` on the service server. When you deploy, create/update a registration file in `/var/run/active-apps/` (this triggers the path unit).
|
||||
|
|
@ -51,3 +53,15 @@ ssh infractl@service-host \
|
|||
"cat > /var/run/active-apps/$APP_NAME.toml.tmp && mv /var/run/active-apps/$APP_NAME.toml.tmp /var/run/active-apps/$APP_NAME.toml" \
|
||||
< .infra.toml
|
||||
```
|
||||
|
||||
## Restricted SSH keys (recommended)
|
||||
|
||||
If you want to avoid giving CI a general shell on the services server, install the helper scripts to `/usr/local/sbin` (see `install.sh`) and restrict the runner key in `authorized_keys`.
|
||||
|
||||
Example (services server, `~infractl/.ssh/authorized_keys`):
|
||||
|
||||
```text
|
||||
command="/usr/local/sbin/infra-register-stdin",no-pty,no-agent-forwarding,no-port-forwarding,no-X11-forwarding ssh-ed25519 AAAA... runner
|
||||
```
|
||||
|
||||
For deregistration, use a separate key restricted to `/usr/local/sbin/infra-deregister`.
|
||||
|
|
|
|||
|
|
@ -27,6 +27,13 @@ sudo python3 -m venv /opt/infra-controller/venv
|
|||
sudo /opt/infra-controller/venv/bin/pip install --upgrade pip
|
||||
sudo /opt/infra-controller/venv/bin/pip install -e .
|
||||
|
||||
echo "Installing helper scripts..."
|
||||
sudo install -d /usr/local/sbin
|
||||
sudo install -m 0755 scripts/deploy-app /usr/local/sbin/deploy-app
|
||||
sudo install -m 0755 scripts/sync-infra /usr/local/sbin/sync-infra
|
||||
sudo install -m 0755 scripts/infra-register-stdin /usr/local/sbin/infra-register-stdin
|
||||
sudo install -m 0755 scripts/infra-deregister /usr/local/sbin/infra-deregister
|
||||
|
||||
if [ ! -f /etc/infra-controller/config.toml ]; then
|
||||
echo "Installing default configuration..."
|
||||
sudo cp config/controller.toml.example /etc/infra-controller/config.toml
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ infra-controller = "infra_controller.__main__:main"
|
|||
infra-register = "infra_controller.cli:register"
|
||||
infra-deregister = "infra_controller.cli:deregister"
|
||||
infra-status = "infra_controller.cli:status"
|
||||
infra-ensure = "infra_controller.cli:ensure_service_cli"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "src"}
|
||||
|
|
@ -46,3 +47,6 @@ where = ["src"]
|
|||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
|
|
|
|||
43
scripts/deploy-app
Normal file
43
scripts/deploy-app
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_NAME="${1:-}"
|
||||
GIT_REF="${2:-}"
|
||||
|
||||
if [[ -z "$APP_NAME" || -z "$GIT_REF" ]]; then
|
||||
echo "usage: deploy-app <app_name> <git_ref>" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then
|
||||
echo "invalid app name: $APP_NAME" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$GIT_REF" =~ ^[0-9a-fA-F]{7,40}$ ]]; then
|
||||
echo "invalid git ref: $GIT_REF" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
APP_DIR="/srv/apps/$APP_NAME"
|
||||
|
||||
if [[ ! -d "$APP_DIR/.git" ]]; then
|
||||
echo "app repo not present at $APP_DIR; clone it first (or extend deploy-app to clone)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$APP_DIR"
|
||||
git fetch --all --prune
|
||||
|
||||
git checkout -f "$GIT_REF"
|
||||
|
||||
git submodule update --init --recursive
|
||||
|
||||
if [[ -x "./deploy.sh" ]]; then
|
||||
./deploy.sh
|
||||
else
|
||||
echo "ERROR: deploy.sh missing or not executable" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
/usr/local/sbin/sync-infra "$APP_NAME" "$APP_DIR"
|
||||
16
scripts/infra-deregister
Normal file
16
scripts/infra-deregister
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_NAME="${1:-}"
|
||||
|
||||
if [[ -z "$APP_NAME" ]]; then
|
||||
echo "usage: infra-deregister <app_name>" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then
|
||||
echo "invalid app name: $APP_NAME" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
rm -f "/var/run/active-apps/$APP_NAME.toml" "/var/run/active-apps/$APP_NAME.yml" "/var/run/active-apps/$APP_NAME.yaml"
|
||||
24
scripts/infra-register-stdin
Normal file
24
scripts/infra-register-stdin
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_NAME="${1:-}"
|
||||
|
||||
if [[ -z "$APP_NAME" ]]; then
|
||||
echo "usage: infra-register-stdin <app_name>" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then
|
||||
echo "invalid app name: $APP_NAME" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
DST_DIR="/var/run/active-apps"
|
||||
DST="$DST_DIR/$APP_NAME.toml"
|
||||
TMP="$DST.toml.tmp"
|
||||
|
||||
mkdir -p "$DST_DIR"
|
||||
|
||||
cat > "$TMP"
|
||||
|
||||
mv "$TMP" "$DST"
|
||||
23
scripts/sync-infra
Normal file
23
scripts/sync-infra
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_NAME="${1:-}"
|
||||
APP_DIR="${2:-}"
|
||||
|
||||
if [[ -z "$APP_NAME" || -z "$APP_DIR" ]]; then
|
||||
echo "usage: sync-infra <app_name> <app_dir>" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if ! [[ "$APP_NAME" =~ ^[A-Za-z0-9._-]+$ ]]; then
|
||||
echo "invalid app name: $APP_NAME" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
INFRA_FILE="$APP_DIR/.infra.toml"
|
||||
|
||||
if [[ -f "$INFRA_FILE" ]]; then
|
||||
ssh infra@services-server infra-register-stdin "$APP_NAME" < "$INFRA_FILE"
|
||||
else
|
||||
ssh infra@services-server infra-deregister "$APP_NAME"
|
||||
fi
|
||||
|
|
@ -1,7 +1,10 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from infra_controller.config import ControllerConfig
|
||||
from infra_controller.discovery import AppRegistration, DiscoveryManager
|
||||
from infra_controller.service_manager import ServiceManager
|
||||
|
|
@ -10,10 +13,15 @@ from infra_controller.service_manager import ServiceManager
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
class InfraController:
|
||||
def __init__(self, cfg: ControllerConfig):
|
||||
def __init__(
|
||||
self,
|
||||
cfg: ControllerConfig,
|
||||
discovery: DiscoveryManager | None = None,
|
||||
services: ServiceManager | None = None,
|
||||
):
|
||||
self._cfg = cfg
|
||||
self._discovery = DiscoveryManager(cfg.discovery)
|
||||
self._services = ServiceManager(cfg.docker)
|
||||
self._discovery = discovery or DiscoveryManager(cfg.discovery)
|
||||
self._services = services or ServiceManager(cfg.docker)
|
||||
|
||||
def run(self) -> None:
|
||||
while True:
|
||||
|
|
@ -23,10 +31,55 @@ class InfraController:
|
|||
def run_once(self) -> None:
|
||||
discovered = self._discovery.discover_all()
|
||||
required = self._required_services(discovered)
|
||||
state = self._load_state(self._cfg.services.state_file)
|
||||
unused_since = state.get("unused_since")
|
||||
if not isinstance(unused_since, dict):
|
||||
unused_since = {}
|
||||
|
||||
known_services_val = state.get("known_services")
|
||||
if isinstance(known_services_val, list):
|
||||
known_services = {str(s) for s in known_services_val if isinstance(s, str) and s.strip()}
|
||||
else:
|
||||
known_services = set()
|
||||
|
||||
now = time.time()
|
||||
|
||||
for service in sorted(required):
|
||||
logger.info("Ensuring service: %s", service)
|
||||
self.ensure_service(service)
|
||||
unused_since.pop(service, None)
|
||||
known_services.add(service)
|
||||
|
||||
known_services |= set(unused_since.keys())
|
||||
grace_seconds = int(self._cfg.services.grace_period_minutes) * 60
|
||||
|
||||
for service in sorted(known_services - set(required)):
|
||||
since = unused_since.get(service)
|
||||
if since is None:
|
||||
unused_since[service] = now
|
||||
logger.info("Service no longer required (grace period started): %s", service)
|
||||
continue
|
||||
|
||||
try:
|
||||
since_ts = float(since)
|
||||
except Exception:
|
||||
since_ts = now
|
||||
unused_since[service] = now
|
||||
continue
|
||||
|
||||
if (now - since_ts) < grace_seconds:
|
||||
continue
|
||||
|
||||
logger.info("Stopping unused service: %s", service)
|
||||
res = self._services.stop_service(service)
|
||||
if res.returncode != 0:
|
||||
raise RuntimeError(res.stderr or res.stdout)
|
||||
unused_since.pop(service, None)
|
||||
known_services.discard(service)
|
||||
|
||||
state["unused_since"] = unused_since
|
||||
state["known_services"] = sorted(known_services)
|
||||
self._save_state(self._cfg.services.state_file, state)
|
||||
|
||||
def ensure_service(self, service_name: str) -> None:
|
||||
res = self._services.apply_service(service_name)
|
||||
|
|
@ -50,3 +103,22 @@ class InfraController:
|
|||
required.add(services.strip())
|
||||
|
||||
return required
|
||||
|
||||
def _load_state(self, path: Path) -> dict:
|
||||
try:
|
||||
if not path.exists():
|
||||
return {}
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
return data
|
||||
return {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def _save_state(self, path: Path, state: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w", encoding="utf-8") as f:
|
||||
json.dump(state, f)
|
||||
tmp.replace(path)
|
||||
|
|
|
|||
|
|
@ -64,3 +64,21 @@ class ServiceManager:
|
|||
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(service_dir))
|
||||
return ServiceResult(returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr)
|
||||
|
||||
def stop_service(self, service_name: str) -> ServiceResult:
|
||||
service_dir = self.service_dir_for_service(service_name)
|
||||
if not service_dir.exists():
|
||||
raise FileNotFoundError(f"Service directory not found: {service_dir}")
|
||||
|
||||
compose_file = self._resolve_compose_file(service_dir)
|
||||
|
||||
cmd = [
|
||||
"docker",
|
||||
"compose",
|
||||
"-f",
|
||||
str(compose_file),
|
||||
"down",
|
||||
]
|
||||
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, cwd=str(service_dir))
|
||||
return ServiceResult(returncode=proc.returncode, stdout=proc.stdout, stderr=proc.stderr)
|
||||
|
|
|
|||
94
tests/test_controller.py
Normal file
94
tests/test_controller.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from infra_controller.config import ControllerConfig
|
||||
from infra_controller.controller import InfraController
|
||||
from infra_controller.discovery import AppRegistration, InfraMetadata
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeServiceResult:
|
||||
returncode: int = 0
|
||||
stdout: str = ""
|
||||
stderr: str = ""
|
||||
|
||||
|
||||
class FakeServiceManager:
|
||||
def __init__(self):
|
||||
self.applied: list[str] = []
|
||||
self.stopped: list[str] = []
|
||||
|
||||
def apply_service(self, service_name: str) -> FakeServiceResult:
|
||||
self.applied.append(service_name)
|
||||
return FakeServiceResult()
|
||||
|
||||
def stop_service(self, service_name: str) -> FakeServiceResult:
|
||||
self.stopped.append(service_name)
|
||||
return FakeServiceResult()
|
||||
|
||||
|
||||
class FakeDiscoveryManager:
|
||||
def __init__(self, apps: dict[str, AppRegistration]):
|
||||
self._apps = apps
|
||||
|
||||
def set_apps(self, apps: dict[str, AppRegistration]) -> None:
|
||||
self._apps = apps
|
||||
|
||||
def discover_all(self) -> dict[str, AppRegistration]:
|
||||
return dict(self._apps)
|
||||
|
||||
|
||||
def _app(name: str, services: list[str]) -> AppRegistration:
|
||||
md = InfraMetadata(project=name, requires={"services": services})
|
||||
return AppRegistration(name=name, metadata=md, last_seen=datetime.now(), discovery_method="test")
|
||||
|
||||
|
||||
def test_controller_stops_unused_services_after_grace_period(tmp_path: Path, monkeypatch):
|
||||
cfg = ControllerConfig()
|
||||
cfg.services.grace_period_minutes = 0
|
||||
cfg.services.state_file = tmp_path / "state.json"
|
||||
|
||||
discovery = FakeDiscoveryManager({"a": _app("a", ["svc1"])})
|
||||
services = FakeServiceManager()
|
||||
|
||||
c = InfraController(cfg, discovery=discovery, services=services)
|
||||
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 0.0)
|
||||
c.run_once()
|
||||
assert services.applied == ["svc1"]
|
||||
assert services.stopped == []
|
||||
|
||||
discovery.set_apps({})
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 10.0)
|
||||
c.run_once()
|
||||
assert services.stopped == []
|
||||
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 20.0)
|
||||
c.run_once()
|
||||
assert services.stopped == ["svc1"]
|
||||
|
||||
|
||||
def test_controller_does_not_stop_service_within_grace_period(tmp_path: Path, monkeypatch):
|
||||
cfg = ControllerConfig()
|
||||
cfg.services.grace_period_minutes = 1
|
||||
cfg.services.state_file = tmp_path / "state.json"
|
||||
|
||||
discovery = FakeDiscoveryManager({"a": _app("a", ["svc1"])})
|
||||
services = FakeServiceManager()
|
||||
c = InfraController(cfg, discovery=discovery, services=services)
|
||||
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 0.0)
|
||||
c.run_once()
|
||||
|
||||
discovery.set_apps({})
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 10.0)
|
||||
c.run_once()
|
||||
|
||||
monkeypatch.setattr("infra_controller.controller.time.time", lambda: 20.0)
|
||||
c.run_once()
|
||||
assert services.stopped == []
|
||||
Loading…
Reference in a new issue