ci(docker): add test workflow and container architecture docs
- Create docker-tests.yml for merge-to-main CI pipeline - Add mock GPU test matrix (NVIDIA, Metal, CPU-only) - Add AGENTS.md with container architecture rules: * Docker for CI/CD testing and deployments * Podman for ML experiment isolation only - Update .gitignore to track AGENTS.md
This commit is contained in:
parent
6af85ddaf6
commit
6646f3a382
3 changed files with 287 additions and 1 deletions
160
.forgejo/workflows/docker-tests.yml
Normal file
160
.forgejo/workflows/docker-tests.yml
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
name: Docker Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- 'docs/**'
|
||||
- 'README.md'
|
||||
- 'CHANGELOG.md'
|
||||
- '.forgejo/ISSUE_TEMPLATE/**'
|
||||
- '**/*.md'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ gitea.workflow }}-${{ gitea.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
env:
|
||||
GO_VERSION: '1.25.0'
|
||||
ZIG_VERSION: '0.15.2'
|
||||
|
||||
jobs:
|
||||
docker-tests:
|
||||
name: Docker Container Tests
|
||||
runs-on: self-hosted
|
||||
timeout-minutes: 45
|
||||
|
||||
services:
|
||||
redis:
|
||||
image: redis:7
|
||||
ports:
|
||||
- 6379:6379
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
run: |
|
||||
REQUIRED_GO="${{ env.GO_VERSION }}"
|
||||
if command -v go &> /dev/null && go version | grep -q "go${REQUIRED_GO}"; then
|
||||
echo "Go ${REQUIRED_GO} already installed"
|
||||
else
|
||||
echo "Installing Go ${REQUIRED_GO}..."
|
||||
curl -sL "https://go.dev/dl/go${REQUIRED_GO}.linux-amd64.tar.gz" | sudo tar -C /usr/local -xzf -
|
||||
export PATH="/usr/local/go/bin:$PATH"
|
||||
echo "/usr/local/go/bin" >> $GITHUB_PATH
|
||||
fi
|
||||
go version
|
||||
|
||||
- name: Set up Zig
|
||||
run: |
|
||||
ZIG_VERSION="${{ env.ZIG_VERSION }}"
|
||||
if command -v zig &> /dev/null && zig version | grep -q "${ZIG_VERSION}"; then
|
||||
echo "Zig ${ZIG_VERSION} already installed"
|
||||
else
|
||||
echo "Installing Zig ${ZIG_VERSION}..."
|
||||
ZIG_DIR="/usr/local/zig-${ZIG_VERSION}"
|
||||
curl -fsSL --retry 3 "https://ziglang.org/download/${ZIG_VERSION}/zig-x86_64-linux-${ZIG_VERSION}.tar.xz" -o /tmp/zig.tar.xz
|
||||
sudo mkdir -p "${ZIG_DIR}"
|
||||
sudo tar -C "${ZIG_DIR}" --strip-components=1 -xJf /tmp/zig.tar.xz
|
||||
sudo ln -sf "${ZIG_DIR}/zig" /usr/local/bin/zig
|
||||
echo "${ZIG_DIR}" >> $GITHUB_PATH
|
||||
fi
|
||||
zig version
|
||||
|
||||
- name: Run unit tests
|
||||
run: go test -v -race ./... -tags=unit -timeout 15m
|
||||
env:
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run integration tests
|
||||
run: go test -v -race ./tests/integration/... -timeout 20m
|
||||
env:
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run E2E tests
|
||||
run: go test -v ./tests/e2e/... -timeout 20m
|
||||
env:
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run chaos tests
|
||||
run: go test -v ./tests/chaos/... -timeout 10m
|
||||
env:
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run stress tests
|
||||
run: go test -v ./tests/stress/... -timeout 5m
|
||||
env:
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run mock GPU tests - NVIDIA 4x A100
|
||||
run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m
|
||||
env:
|
||||
FETCH_ML_MOCK_GPU_TYPE: NVIDIA
|
||||
FETCH_ML_MOCK_GPU_COUNT: "4"
|
||||
FETCH_ML_MOCK_VRAM_GB: "80"
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run mock GPU tests - Metal 8x
|
||||
run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m
|
||||
env:
|
||||
FETCH_ML_MOCK_GPU_TYPE: Apple
|
||||
FETCH_ML_MOCK_GPU_COUNT: "8"
|
||||
FETCH_ML_MOCK_VRAM_GB: "128"
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Run mock GPU tests - CPU only
|
||||
run: go test -v ./tests/e2e/... -run TestCapability -timeout 10m
|
||||
env:
|
||||
FETCH_ML_MOCK_GPU_TYPE: ""
|
||||
FETCH_ML_MOCK_GPU_COUNT: "0"
|
||||
FETCH_ML_MOCK_CPU_COUNT: "32"
|
||||
LOG_LEVEL: warn
|
||||
|
||||
- name: Build Docker image
|
||||
run: |
|
||||
docker build -t fetchml-test:latest -f build/docker/simple.Dockerfile .
|
||||
|
||||
- name: Run tests in Docker container
|
||||
run: |
|
||||
docker run --rm \
|
||||
--network host \
|
||||
-e REDIS_ADDR=localhost:6379 \
|
||||
fetchml-test:latest \
|
||||
go test -v ./tests/integration/... -timeout 10m
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-results-docker
|
||||
path: |
|
||||
*.out
|
||||
*.log
|
||||
retention-days: 7
|
||||
|
||||
- name: Generate coverage report
|
||||
run: |
|
||||
go test -coverprofile=coverage.out ./...
|
||||
go tool cover -html=coverage.out -o coverage.html
|
||||
|
||||
- name: Upload coverage
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: coverage-report-docker
|
||||
path: |
|
||||
coverage.out
|
||||
coverage.html
|
||||
retention-days: 30
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -291,7 +291,7 @@ ssl/
|
|||
.api-keys
|
||||
|
||||
# AI assitant files
|
||||
AGENTS.md
|
||||
# AGENTS.md
|
||||
.windsurf/*
|
||||
|
||||
# Scheduler/worker config files with tokens (examples are allowed)
|
||||
|
|
|
|||
126
AGENTS.md
Normal file
126
AGENTS.md
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# AGENTS.md - FetchML
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────┐ ┌─────────┐ ┌──────────┐ ┌─────────┐ ┌──────────┐
|
||||
│ CLI │────▶│ API │────▶│ Scheduler│────▶│ Worker │────▶│ Storage │
|
||||
│ (Zig) │◄────│(Go/HTTP)│◄────│ (Go) │◄────│ (Go) │◄────│ (MinIO) │
|
||||
└─────────┘ └─────────┘ └──────────┘ └─────────┘ └──────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────┐
|
||||
│ Redis │
|
||||
│ (Queue) │
|
||||
└──────────┘
|
||||
```
|
||||
|
||||
**CLI ↔ Server**: HTTP (default) or Unix socket (local). `execution_mode` config:
|
||||
`direct` (bypass scheduler) or `queue` (full flow). Auth via API key in header.
|
||||
|
||||
---
|
||||
|
||||
## Container Architecture
|
||||
|
||||
**Docker** - Used for:
|
||||
- CI/CD testing pipelines (`.forgejo/workflows/docker-tests.yml`)
|
||||
- Application deployments (staging/production)
|
||||
- Build environments
|
||||
|
||||
**Podman** - Used for:
|
||||
- ML experiment isolation only
|
||||
- Running untrusted/3rd party ML workloads
|
||||
- Rootless container execution for security
|
||||
|
||||
**Rule**: Never use Podman for CI testing or deployments. Never use Docker for experiment isolation.
|
||||
|
||||
---
|
||||
|
||||
## Critical Invariants
|
||||
|
||||
### Audit Log — never break these
|
||||
|
||||
- **Append-only** — entries are never modified or deleted
|
||||
- **Hash chain** — every entry includes SHA256 of the previous entry
|
||||
- **All mutations** to tasks/groups/tokens must produce an audit entry
|
||||
- Write the audit entry before the storage write — partial failures must be audited
|
||||
|
||||
### Auth
|
||||
|
||||
- `TokenFromContext(ctx)` is the only authorised way to extract auth in handlers
|
||||
- Group visibility enforced at DB query level — never filter in application code
|
||||
- API keys hashed with bcrypt before storage — never log raw keys
|
||||
|
||||
### Storage
|
||||
|
||||
- All DB access through repository types in `internal/db/repository/`
|
||||
- Transactions via `WithTx(ctx, db, func(tx *sql.Tx) error)` — never manage tx manually
|
||||
- Migrations: additive only — new columns must be nullable or have defaults,
|
||||
never drop columns (mark deprecated, remove later)
|
||||
|
||||
### CGO / Native Libs
|
||||
|
||||
Use `-tags native_libs` when building with C++ extensions. This has broken twice —
|
||||
always check build tags when touching GPU detection or native code.
|
||||
|
||||
---
|
||||
|
||||
## Build Commands
|
||||
|
||||
```bash
|
||||
make build # all components
|
||||
make dev # fast, no LTO
|
||||
make prod # production-optimized
|
||||
make prod-with-native # production + C++ libs
|
||||
make cross-platform # Linux/macOS/Windows
|
||||
|
||||
cd cli && make dev # Zig: fast compile + format
|
||||
cd cli && make prod # Zig: release=fast, LTO
|
||||
cd cli && make debug # Zig: no optimizations
|
||||
cd cli && zig build test
|
||||
```
|
||||
|
||||
## Test Commands
|
||||
|
||||
```bash
|
||||
make test # all tests (Docker)
|
||||
make test-unit
|
||||
make test-integration
|
||||
make test-e2e
|
||||
make test-coverage
|
||||
|
||||
go test -v ./path/to/package -run TestName
|
||||
go test -race ./path/to/package/...
|
||||
LOG_LEVEL=debug go test -v ./path/to/package
|
||||
FETCH_ML_E2E_PODMAN=1 go test ./tests/e2e/...
|
||||
```
|
||||
|
||||
## Lint / Security
|
||||
|
||||
```bash
|
||||
make lint
|
||||
make security-scan
|
||||
make configlint
|
||||
make openapi-validate
|
||||
go vet ./...
|
||||
cd cli && zig fmt .
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Legacy Go — modernize when touching existing code only
|
||||
|
||||
| Legacy | Modern |
|
||||
| -------------------------- | ----------------------- |
|
||||
| `interface{}` | `any` |
|
||||
| `for i := 0; i < n; i++` | `for i := range items` |
|
||||
| `[]byte(fmt.Sprintf(...))` | `fmt.Appendf(nil, ...)` |
|
||||
| `sort.Slice` with closure | `slices.Sort(x)` |
|
||||
| Manual contains loop | `slices.Contains` |
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Go 1.25+, Zig 0.15+, Python 3.11+
|
||||
- Redis (integration tests), Docker/Podman (container tests)
|
||||
Loading…
Reference in a new issue