From 6646f3a382b64ef86307ae71e77e149884be92e6 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Thu, 12 Mar 2026 14:05:53 -0400 Subject: [PATCH] ci(docker): add test workflow and container architecture docs - Create docker-tests.yml for merge-to-main CI pipeline - Add mock GPU test matrix (NVIDIA, Metal, CPU-only) - Add AGENTS.md with container architecture rules: * Docker for CI/CD testing and deployments * Podman for ML experiment isolation only - Update .gitignore to track AGENTS.md --- .forgejo/workflows/docker-tests.yml | 160 ++++++++++++++++++++++++++++ .gitignore | 2 +- AGENTS.md | 126 ++++++++++++++++++++++ 3 files changed, 287 insertions(+), 1 deletion(-) create mode 100644 .forgejo/workflows/docker-tests.yml create mode 100644 AGENTS.md diff --git a/.forgejo/workflows/docker-tests.yml b/.forgejo/workflows/docker-tests.yml new file mode 100644 index 0000000..711cb7d --- /dev/null +++ b/.forgejo/workflows/docker-tests.yml @@ -0,0 +1,160 @@ +name: Docker Tests + +on: + push: + branches: [main] + paths-ignore: + - 'docs/**' + - 'README.md' + - 'CHANGELOG.md' + - '.forgejo/ISSUE_TEMPLATE/**' + - '**/*.md' + workflow_dispatch: + +concurrency: + group: ${{ gitea.workflow }}-${{ gitea.ref }} + cancel-in-progress: true + +permissions: + contents: read + security-events: write + +env: + GO_VERSION: '1.25.0' + ZIG_VERSION: '0.15.2' + +jobs: + docker-tests: + name: Docker Container Tests + runs-on: self-hosted + timeout-minutes: 45 + + services: + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + run: | + REQUIRED_GO="${{ env.GO_VERSION }}" + if command -v go &> /dev/null && go version | grep -q "go${REQUIRED_GO}"; then + echo "Go ${REQUIRED_GO} already installed" + else + echo "Installing Go ${REQUIRED_GO}..." + curl -sL "https://go.dev/dl/go${REQUIRED_GO}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xzf - + export PATH="/usr/local/go/bin:$PATH" + echo "/usr/local/go/bin" >> $GITHUB_PATH + fi + go version + + - name: Set up Zig + run: | + ZIG_VERSION="${{ env.ZIG_VERSION }}" + if command -v zig &> /dev/null && zig version | grep -q "${ZIG_VERSION}"; then + echo "Zig ${ZIG_VERSION} already installed" + else + echo "Installing Zig ${ZIG_VERSION}..." + ZIG_DIR="/usr/local/zig-${ZIG_VERSION}" + curl -fsSL --retry 3 "https://ziglang.org/download/${ZIG_VERSION}/zig-x86_64-linux-${ZIG_VERSION}.tar.xz" -o /tmp/zig.tar.xz + sudo mkdir -p "${ZIG_DIR}" + sudo tar -C "${ZIG_DIR}" --strip-components=1 -xJf /tmp/zig.tar.xz + sudo ln -sf "${ZIG_DIR}/zig" /usr/local/bin/zig + echo "${ZIG_DIR}" >> $GITHUB_PATH + fi + zig version + + - name: Run unit tests + run: go test -v -race ./... -tags=unit -timeout 15m + env: + LOG_LEVEL: warn + + - name: Run integration tests + run: go test -v -race ./tests/integration/... -timeout 20m + env: + LOG_LEVEL: warn + + - name: Run E2E tests + run: go test -v ./tests/e2e/... -timeout 20m + env: + LOG_LEVEL: warn + + - name: Run chaos tests + run: go test -v ./tests/chaos/... -timeout 10m + env: + LOG_LEVEL: warn + + - name: Run stress tests + run: go test -v ./tests/stress/... -timeout 5m + env: + LOG_LEVEL: warn + + - name: Run mock GPU tests - NVIDIA 4x A100 + run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m + env: + FETCH_ML_MOCK_GPU_TYPE: NVIDIA + FETCH_ML_MOCK_GPU_COUNT: "4" + FETCH_ML_MOCK_VRAM_GB: "80" + LOG_LEVEL: warn + + - name: Run mock GPU tests - Metal 8x + run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m + env: + FETCH_ML_MOCK_GPU_TYPE: Apple + FETCH_ML_MOCK_GPU_COUNT: "8" + FETCH_ML_MOCK_VRAM_GB: "128" + LOG_LEVEL: warn + + - name: Run mock GPU tests - CPU only + run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m + env: + FETCH_ML_MOCK_GPU_TYPE: "" + FETCH_ML_MOCK_GPU_COUNT: "0" + FETCH_ML_MOCK_CPU_COUNT: "32" + LOG_LEVEL: warn + + - name: Build Docker image + run: | + docker build -t fetchml-test:latest -f build/docker/simple.Dockerfile . + + - name: Run tests in Docker container + run: | + docker run --rm \ + --network host \ + -e REDIS_ADDR=localhost:6379 \ + fetchml-test:latest \ + go test -v ./tests/integration/... -timeout 10m + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-docker + path: | + *.out + *.log + retention-days: 7 + + - name: Generate coverage report + run: | + go test -coverprofile=coverage.out ./... + go tool cover -html=coverage.out -o coverage.html + + - name: Upload coverage + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report-docker + path: | + coverage.out + coverage.html + retention-days: 30 diff --git a/.gitignore b/.gitignore index 84ea91e..a1a0c37 100644 --- a/.gitignore +++ b/.gitignore @@ -291,7 +291,7 @@ ssl/ .api-keys # AI assitant files -AGENTS.md +# AGENTS.md .windsurf/* # Scheduler/worker config files with tokens (examples are allowed) diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..52c259c --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,126 @@ +# AGENTS.md - FetchML + +## Architecture + +``` +┌─────────┐ ┌─────────┐ ┌──────────┐ ┌─────────┐ ┌──────────┐ +│ CLI │────▶│ API │────▶│ Scheduler│────▶│ Worker │────▶│ Storage │ +│ (Zig) │◄────│(Go/HTTP)│◄────│ (Go) │◄────│ (Go) │◄────│ (MinIO) │ +└─────────┘ └─────────┘ └──────────┘ └─────────┘ └──────────┘ + │ + ▼ + ┌──────────┐ + │ Redis │ + │ (Queue) │ + └──────────┘ +``` + +**CLI ↔ Server**: HTTP (default) or Unix socket (local). `execution_mode` config: +`direct` (bypass scheduler) or `queue` (full flow). Auth via API key in header. + +--- + +## Container Architecture + +**Docker** - Used for: +- CI/CD testing pipelines (`.forgejo/workflows/docker-tests.yml`) +- Application deployments (staging/production) +- Build environments + +**Podman** - Used for: +- ML experiment isolation only +- Running untrusted/3rd party ML workloads +- Rootless container execution for security + +**Rule**: Never use Podman for CI testing or deployments. Never use Docker for experiment isolation. + +--- + +## Critical Invariants + +### Audit Log — never break these + +- **Append-only** — entries are never modified or deleted +- **Hash chain** — every entry includes SHA256 of the previous entry +- **All mutations** to tasks/groups/tokens must produce an audit entry +- Write the audit entry before the storage write — partial failures must be audited + +### Auth + +- `TokenFromContext(ctx)` is the only authorised way to extract auth in handlers +- Group visibility enforced at DB query level — never filter in application code +- API keys hashed with bcrypt before storage — never log raw keys + +### Storage + +- All DB access through repository types in `internal/db/repository/` +- Transactions via `WithTx(ctx, db, func(tx *sql.Tx) error)` — never manage tx manually +- Migrations: additive only — new columns must be nullable or have defaults, + never drop columns (mark deprecated, remove later) + +### CGO / Native Libs + +Use `-tags native_libs` when building with C++ extensions. This has broken twice — +always check build tags when touching GPU detection or native code. + +--- + +## Build Commands + +```bash +make build # all components +make dev # fast, no LTO +make prod # production-optimized +make prod-with-native # production + C++ libs +make cross-platform # Linux/macOS/Windows + +cd cli && make dev # Zig: fast compile + format +cd cli && make prod # Zig: release=fast, LTO +cd cli && make debug # Zig: no optimizations +cd cli && zig build test +``` + +## Test Commands + +```bash +make test # all tests (Docker) +make test-unit +make test-integration +make test-e2e +make test-coverage + +go test -v ./path/to/package -run TestName +go test -race ./path/to/package/... +LOG_LEVEL=debug go test -v ./path/to/package +FETCH_ML_E2E_PODMAN=1 go test ./tests/e2e/... +``` + +## Lint / Security + +```bash +make lint +make security-scan +make configlint +make openapi-validate +go vet ./... +cd cli && zig fmt . +``` + +--- + +## Legacy Go — modernize when touching existing code only + +| Legacy | Modern | +| -------------------------- | ----------------------- | +| `interface{}` | `any` | +| `for i := 0; i < n; i++` | `for i := range items` | +| `[]byte(fmt.Sprintf(...))` | `fmt.Appendf(nil, ...)` | +| `sort.Slice` with closure | `slices.Sort(x)` | +| Manual contains loop | `slices.Contains` | + +--- + +## Dependencies + +- Go 1.25+, Zig 0.15+, Python 3.11+ +- Redis (integration tests), Docker/Podman (container tests)