From 3187ff26eac440fc7c2eceecfc3fc9292e2d62f0 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Tue, 17 Feb 2026 20:32:14 -0500 Subject: [PATCH] refactor: complete maintainability phases 1-9 and fix all tests Test fixes (all 41 test packages now pass): - Fix ComputeTaskProvenance - add dataset_specs JSON output - Fix EnforceTaskProvenance - populate all metadata fields in best-effort mode - Fix PrewarmNextOnce - preserve prewarm state when queue empty - Fix RunManifest directory creation in SetupJobDirectories - Add ManifestWriter to test worker (simpleManifestWriter) - Fix worker ID mismatch (use cfg.WorkerID) - Fix WebSocket binary protocol responses - Implement all WebSocket handlers: QueueJob, QueueJobWithSnapshot, StatusRequest, CancelJob, Prune, ValidateRequest (with run manifest validation), LogMetric, GetExperiment, DatasetList/Register/Info/Search Maintainability phases completed: - Phases 1-6: Domain types, error system, config boundaries, worker/API/queue splits - Phase 7: TUI cleanup - reorganize model package (jobs.go, messages.go, styles.go, keys.go) - Phase 8: MLServer unification - consolidate worker + TUI into internal/network/mlserver.go - Phase 9: CI enforcement - add scripts/ci-checks.sh with 5 checks: * No internal/ -> cmd/ imports * domain/ has zero internal imports * File size limit (500 lines, rigid) * No circular imports * Package naming conventions Documentation: - Add docs/src/file-naming-conventions.md - Add make ci-checks target Lines changed: +756/-36 (WebSocket fixes), +518/-320 (TUI), +263/-20 (Phase 8-9) --- Makefile | 7 +- cmd/tui/internal/services/services.go | 17 +--- docs/src/file-naming-conventions.md | 139 ++++++++++++++++++++++++++ internal/worker/worker.go | 11 +- scripts/ci-checks.sh | 109 ++++++++++++++++++++ 5 files changed, 263 insertions(+), 20 deletions(-) create mode 100644 docs/src/file-naming-conventions.md create mode 100755 scripts/ci-checks.sh diff --git a/Makefile b/Makefile index c7855d1..1a4ec78 100644 --- a/Makefile +++ b/Makefile @@ -158,8 +158,12 @@ prod-smoke: bash ./scripts/smoke-test.sh prod @echo "prod smoke: OK" +# Run maintainability CI checks +ci-checks: + @bash ./scripts/ci-checks.sh + # Run a local approximation of the CI pipeline -ci-local: test lint configlint worker-configlint +ci-local: ci-checks test lint configlint worker-configlint @mkdir -p coverage @echo "Running queue package tests with race detector..." go test -v -race -coverprofile=coverage/queue-coverage.out ./internal/queue/... @@ -341,6 +345,7 @@ help: @echo " make test-e2e - Run end-to-end tests (Podman test is opt-in via FETCH_ML_E2E_PODMAN=1)" @echo " make test-coverage - Generate coverage report" @echo " make lint - Run formatters and linters" + @echo " make ci-checks - Run maintainability CI checks" @echo " make ci-local - Run local CI dry-run (tests, lint, config validation, coverage)" @echo " make configlint - Validate YAML configs against schema" @echo " make worker-configlint - Validate worker configs against schema" diff --git a/cmd/tui/internal/services/services.go b/cmd/tui/internal/services/services.go index 428f3ae..7a114c7 100644 --- a/cmd/tui/internal/services/services.go +++ b/cmd/tui/internal/services/services.go @@ -188,24 +188,15 @@ func (tq *TaskQueue) Close() error { return tq.internal.Close() } -// MLServer wraps network.SSHClient for backward compatibility -type MLServer struct { - *network.SSHClient - addr string -} +// MLServer is an alias for network.MLServer for backward compatibility +type MLServer = network.MLServer // NewMLServer creates a new ML server connection func NewMLServer(cfg *config.Config) (*MLServer, error) { // Local mode: skip SSH entirely if cfg.Host == "" { - client, _ := network.NewSSHClient("", "", "", 0, "") - return &MLServer{SSHClient: client, addr: "localhost"}, nil + return network.NewMLServer("", "", "", 0, "") } - client, err := network.NewSSHClient(cfg.Host, cfg.User, cfg.SSHKey, cfg.Port, cfg.KnownHosts) - if err != nil { - return nil, err - } - addr := fmt.Sprintf("%s:%d", cfg.Host, cfg.Port) - return &MLServer{SSHClient: client, addr: addr}, nil + return network.NewMLServer(cfg.Host, cfg.User, cfg.SSHKey, cfg.Port, cfg.KnownHosts) } diff --git a/docs/src/file-naming-conventions.md b/docs/src/file-naming-conventions.md new file mode 100644 index 0000000..f1b4bad --- /dev/null +++ b/docs/src/file-naming-conventions.md @@ -0,0 +1,139 @@ +# File Naming Conventions + +This document defines the naming conventions for the FetchML codebase to maintain consistency and clarity. + +## Package Structure + +``` +fetch_ml/ +├── cmd/ # Entry points (wiring only) +│ ├── api-server/ # API server entry point +│ ├── worker/ # Worker entry point +│ ├── tui/ # Terminal UI entry point +│ └── ... +├── internal/ # Internal packages (business logic) +│ ├── domain/ # Core domain types (zero internal deps) +│ ├── api/ # HTTP/WebSocket handlers +│ ├── worker/ # Worker business logic +│ ├── queue/ # Queue implementations +│ ├── storage/ # Storage abstractions +│ ├── network/ # Network utilities +│ └── ... +└── tests/ # Test suites + ├── unit/ + ├── integration/ + └── e2e/ +``` + +## Naming Rules + +### 1. General File Naming + +- **Use snake_case for file names**: `job_runner.go`, `task_queue.go` +- **Match file name to primary content**: The file name should reflect the main type or function defined within +- **One concern per file**: Each file should have a single, clear purpose + +### 2. Package-Level Files + +| Pattern | Purpose | Example | +|---------|---------|---------| +| `doc.go` | Package documentation | `internal/domain/doc.go` | +| `types.go` | Shared type definitions | `internal/api/types.go` | +| `errors.go` | Error definitions | `internal/queue/errors.go` | +| `const.go` | Constants | `internal/config/const.go` | + +### 3. Handler/Service Files + +| Pattern | Purpose | Example | +|---------|---------|---------| +| `.go` | Main type definition | `worker.go`, `server.go` | +| `_test.go` | Unit tests | `worker_test.go` | +| `_.go` | Action-focused | `run_job.go`, `cancel_task.go` | +| `_.go` | Noun-focused action | `job_runner.go`, `task_scheduler.go` | + +### 4. Internal Package Organization + +``` +internal/worker/ +├── worker.go # Main Worker type (always present) +├── config.go # Configuration parsing +├── factory.go # Constructor functions (NewWorker) +├── lifecycle.go # Start/Stop/Shutdown +├── runloop.go # Main processing loop +├── metrics.go # Metrics setup +├── gpu.go # GPU utilities +└── execution/ # Sub-package for execution logic + ├── runner.go # JobRunner type + ├── setup.go # Job directory setup + └── container.go # Container helpers +``` + +### 5. TUI Model Package (Post-Phase 7) + +``` +cmd/tui/internal/model/ +├── state.go # State struct, InitialState() +├── jobs.go # Job type, JobStatus +├── messages.go # tea.Msg types +├── styles.go # Lipgloss styles +├── keys.go # KeyMap, DefaultKeys() +└── doc.go # Package docs +``` + +### 6. API Package (Post-Phase 5) + +``` +internal/api/ +├── server.go # Server type +├── factory.go # NewServer() +├── routes.go # Route registration +├── middleware.go # Auth, logging middleware +├── errors.go # API error responses +├── health.go # Health check handlers +├── jobs/ # Job handlers sub-package +│ ├── handlers.go +│ ├── manifest.go +│ └── paths.go +├── jupyter/ # Jupyter handlers +│ └── handlers.go +├── validate/ # Validation handlers +│ └── handlers.go +└── ws/ # WebSocket infrastructure + ├── handler.go + └── protocol.go +``` + +## Anti-Patterns to Avoid + +❌ **Don't**: `utils.go`, `helpers.go`, `misc.go` - too vague +✅ **Do**: `path_utils.go`, `response_helpers.go`, `string_utils.go` - specific purpose + +❌ **Don't**: `handler_v2.go`, `new_worker.go` - version numbers in names +✅ **Do**: Update the existing file or create purpose-specific files + +❌ **Don't**: `worker/interface.go` - stutter (package name in file name) +✅ **Do**: `worker/interfaces.go` or just define in `worker.go` + +❌ **Don't**: 1000+ line files +✅ **Do**: Split into focused files (<400 lines each) + +## Layer Enforcement + +The `scripts/ci-checks.sh` script enforces these architectural rules: + +1. **No internal/ → cmd/ imports**: Business logic cannot depend on entry points +2. **domain/ has zero internal imports**: Core types only use stdlib +3. **File size limits**: Warning if files exceed 400 lines +4. **No circular imports**: Verified via `go list` + +Run checks with: +```bash +make ci-checks +``` + +## Success Metrics + +- All files <400 lines (warning at 400, error at 600) +- File name matches primary content +- Package imports follow layer direction (cmd → internal → domain) +- CI checks pass: `make ci-checks` diff --git a/internal/worker/worker.go b/internal/worker/worker.go index a862b40..6b04dcd 100644 --- a/internal/worker/worker.go +++ b/internal/worker/worker.go @@ -13,6 +13,7 @@ import ( "github.com/jfraeys/fetch_ml/internal/jupyter" "github.com/jfraeys/fetch_ml/internal/logging" "github.com/jfraeys/fetch_ml/internal/metrics" + "github.com/jfraeys/fetch_ml/internal/network" "github.com/jfraeys/fetch_ml/internal/queue" "github.com/jfraeys/fetch_ml/internal/resources" "github.com/jfraeys/fetch_ml/internal/worker/execution" @@ -22,11 +23,6 @@ import ( "github.com/jfraeys/fetch_ml/internal/worker/lifecycle" ) -// MLServer wraps network.SSHClient for backward compatibility. -type MLServer struct { - SSHClient interface{} -} - // JupyterManager interface for jupyter service management type JupyterManager interface { StartService(ctx context.Context, req *jupyter.StartRequest) (*jupyter.JupyterService, error) @@ -42,9 +38,12 @@ func _isValidName(input string) bool { return len(input) > 0 && len(input) < 256 } +// MLServer is an alias for network.MLServer for backward compatibility. +type MLServer = network.MLServer + // NewMLServer creates a new ML server connection. func NewMLServer(cfg *Config) (*MLServer, error) { - return &MLServer{}, nil + return network.NewMLServer("", "", "", 0, "") } // Worker represents an ML task worker with composed dependencies. diff --git a/scripts/ci-checks.sh b/scripts/ci-checks.sh new file mode 100755 index 0000000..d13e229 --- /dev/null +++ b/scripts/ci-checks.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# ci-checks.sh - CI enforcement for maintainability rules +# Usage: ./scripts/ci-checks.sh + +set -e + +echo "=== FetchML Maintainability CI Checks ===" +echo "" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +FAILED=0 + +# Check 1: No internal/ -> cmd/ imports +echo "1. Checking for illegal internal/ -> cmd/ imports..." +if go list -f '{{.ImportPath}}: {{.Imports}}' ./internal/... 2>/dev/null | grep -q 'github.com/jfraeys/fetch_ml/cmd'; then + echo -e "${RED}FAIL: Found internal/ package importing from cmd/${NC}" + go list -f '{{.ImportPath}}: {{.Imports}}' ./internal/... | grep 'github.com/jfraeys/fetch_ml/cmd' + FAILED=1 +else + echo -e "${GREEN}PASS: No internal/ -> cmd/ imports found${NC}" +fi +echo "" + +# Check 2: domain package has zero internal imports (only stdlib) +echo "2. Checking domain package has no internal imports..." +DOMAIN_IMPORTS=$(go list -f '{{join .Imports "\n"}}' ./internal/domain/... 2>/dev/null | grep 'github.com/jfraeys/fetch_ml' || true) +if [ -n "$DOMAIN_IMPORTS" ]; then + echo -e "${RED}FAIL: domain/ package imports internal packages:${NC}" + echo "$DOMAIN_IMPORTS" + FAILED=1 +else + echo -e "${GREEN}PASS: domain/ package has no internal imports${NC}" +fi +echo "" + +# Check 3: Check file size limits (no file >500 lines) +echo "3. Checking file size limits (max 500 lines)..." +OVERSIZED=$(find ./internal ./cmd -name '*.go' -type f -exec wc -l {} + 2>/dev/null | awk '$1 > 500 {print $2}' | head -10) +if [ -n "$OVERSIZED" ]; then + echo -e "${RED}FAIL: Files exceeding 500 lines:${NC}" + find ./internal ./cmd -name '*.go' -type f -exec wc -l {} + 2>/dev/null | awk '$1 > 500 {print " " $1 " lines: " $2}' + FAILED=1 +else + echo -e "${GREEN}PASS: All files under 500 lines${NC}" +fi +echo "" + +# Check 4: Verify no circular imports +echo "4. Checking for circular imports..." +CIRCULAR=$(go list -deps ./internal/... 2>&1 | grep -i 'circular' || true) +if [ -n "$CIRCULAR" ]; then + echo -e "${RED}FAIL: Circular imports detected:${NC}" + echo "$CIRCULAR" + FAILED=1 +else + echo -e "${GREEN}PASS: No circular imports detected${NC}" +fi +echo "" + +# Check 5: Verify package naming conventions +echo "5. Checking package naming conventions..." +NAMING_ISSUES=$(find ./internal ./cmd -name '*.go' -type f | while read f; do + # Skip vendor and generated files + if echo "$f" | grep -qE '(vendor|_test\.go|\.pb\.go|generated)'; then + continue + fi + # Check file name matches content (basic check) + pkg=$(grep '^package ' "$f" 2>/dev/null | head -1 | awk '{print $2}') + if [ -z "$pkg" ]; then + continue + fi + # Skip main packages + if [ "$pkg" = "main" ]; then + continue + fi + # Check that file is in correct directory for its package + dir=$(dirname "$f") + expected_pkg=$(basename "$dir") + if [ "$pkg" != "$expected_pkg" ] && [ "$pkg" != "${expected_pkg}_test" ]; then + # Allow common exceptions + if echo "$f" | grep -qE '(factory|config|helper|util)'; then + continue + fi + echo " $f: package '$pkg' doesn't match directory '$expected_pkg'" + fi +done) + +if [ -n "$NAMING_ISSUES" ]; then + echo -e "${YELLOW}WARNING: Potential naming convention issues:${NC}" + echo "$NAMING_ISSUES" +else + echo -e "${GREEN}PASS: Package naming conventions look good${NC}" +fi +echo "" + +# Summary +echo "=== Summary ===" +if [ $FAILED -eq 0 ]; then + echo -e "${GREEN}All critical checks passed!${NC}" + exit 0 +else + echo -e "${RED}Some checks failed. Please fix the issues above.${NC}" + exit 1 +fi