chore: implement centralized path registry and file organization conventions

Add PathRegistry for centralized path management:
- Create internal/config/paths.go with PathRegistry type
- Binary paths: BinDir(), APIServerBinary(), WorkerBinary(), etc.
- Data paths: DataDir(), JupyterStateDir(), ExperimentsDir()
- Config paths: ConfigDir(), APIServerConfig()
- Helper methods: EnsureDir(), EnsureDirSecure(), FileExists()
- Auto-detect repo root by looking for go.mod

Update .gitignore for root protection:
- Add explicit /api-server, /worker, /tui, /data_manager rules
- Add /coverage.out and .DS_Store to root protection
- Prevents accidental commits of binaries to root

Add path verification script:
- Create scripts/verify-paths.sh
- Checks for binaries in root directory
- Checks for .DS_Store files
- Checks for coverage.out in root
- Verifies data/ is gitignored
- Returns exit code 1 on violations

Cleaned .DS_Store files from repository
This commit is contained in:
Jeremie Fraeys 2026-02-18 16:48:50 -05:00
parent 64e306bd72
commit e127f97442
No known key found for this signature in database
3 changed files with 187 additions and 3 deletions

8
.gitignore vendored
View file

@ -1,3 +1,11 @@
# Root directory protection - binaries must be in bin/
/api-server
/worker
/tui
/data_manager
/coverage.out
.DS_Store
# Binaries for programs and plugins
*.exe
*.exe~

View file

@ -1,5 +1,109 @@
// Package config provides shared utilities for the fetch_ml project.
// Package config provides centralized path management for the fetch_ml project.
package config
// Deprecated: Use config.ExpandPath from shared.go instead
// This file is kept for backward compatibility during migration
import (
"os"
"path/filepath"
)
// PathRegistry provides centralized path management
type PathRegistry struct {
RootDir string // Repository root (auto-detected or from env)
}
// NewPathRegistry creates a path registry from root directory.
// If root is empty, attempts to auto-detect repository root.
func NewPathRegistry(root string) *PathRegistry {
if root == "" {
root = detectRepoRoot()
}
return &PathRegistry{RootDir: root}
}
// Binary paths
func (p *PathRegistry) BinDir() string { return filepath.Join(p.RootDir, "bin") }
func (p *PathRegistry) APIServerBinary() string { return filepath.Join(p.BinDir(), "api-server") }
func (p *PathRegistry) WorkerBinary() string { return filepath.Join(p.BinDir(), "worker") }
func (p *PathRegistry) TUIBinary() string { return filepath.Join(p.BinDir(), "tui") }
func (p *PathRegistry) DataManagerBinary() string { return filepath.Join(p.BinDir(), "data_manager") }
// Data paths
func (p *PathRegistry) DataDir() string { return filepath.Join(p.RootDir, "data") }
func (p *PathRegistry) ActiveDataDir() string { return filepath.Join(p.DataDir(), "active") }
func (p *PathRegistry) JupyterStateDir() string { return filepath.Join(p.DataDir(), "active", "jupyter") }
func (p *PathRegistry) ExperimentsDir() string { return filepath.Join(p.DataDir(), "experiments") }
func (p *PathRegistry) ProdSmokeDir() string { return filepath.Join(p.DataDir(), "prod-smoke") }
// Database paths
func (p *PathRegistry) DBDir() string { return filepath.Join(p.RootDir, "db") }
func (p *PathRegistry) SQLitePath() string { return filepath.Join(p.DBDir(), "fetch_ml.db") }
// Log paths
func (p *PathRegistry) LogDir() string { return filepath.Join(p.RootDir, "logs") }
func (p *PathRegistry) AuditLogPath() string { return filepath.Join(p.LogDir(), "fetchml-audit.log") }
// Config paths
func (p *PathRegistry) ConfigDir() string { return filepath.Join(p.RootDir, "configs") }
func (p *PathRegistry) APIServerConfig() string { return filepath.Join(p.ConfigDir(), "api", "dev.yaml") }
func (p *PathRegistry) WorkerConfigDir() string { return filepath.Join(p.ConfigDir(), "workers") }
// Test paths
func (p *PathRegistry) TestResultsDir() string { return filepath.Join(p.RootDir, "test_results") }
func (p *PathRegistry) TempDir() string { return filepath.Join(p.RootDir, "tmp") }
// State file paths (for service persistence)
func (p *PathRegistry) JupyterServicesFile() string {
return filepath.Join(p.JupyterStateDir(), "fetch_ml_jupyter_services.json")
}
func (p *PathRegistry) JupyterWorkspacesFile() string {
return filepath.Join(p.JupyterStateDir(), "fetch_ml_jupyter_workspaces.json")
}
// EnsureDir creates directory if it doesn't exist with appropriate permissions.
func (p *PathRegistry) EnsureDir(path string) error {
return os.MkdirAll(path, 0750)
}
// EnsureDirSecure creates directory with restricted permissions (for sensitive data).
func (p *PathRegistry) EnsureDirSecure(path string) error {
return os.MkdirAll(path, 0700)
}
// FileExists checks if a file exists.
func (p *PathRegistry) FileExists(path string) bool {
_, err := os.Stat(path)
return err == nil
}
// detectRepoRoot finds repository root by looking for go.mod.
// Returns current directory if not found.
func detectRepoRoot() string {
dir, err := os.Getwd()
if err != nil {
return "."
}
// Walk up directory tree looking for go.mod
for {
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
return dir
}
parent := filepath.Dir(dir)
if parent == dir {
// Reached root
break
}
dir = parent
}
return "."
}
// FromEnv creates PathRegistry with root from FETCHML_ROOT env var,
// or auto-detects if env var not set.
func FromEnv() *PathRegistry {
root := os.Getenv("FETCHML_ROOT")
return NewPathRegistry(root)
}

72
scripts/verify-paths.sh Executable file
View file

@ -0,0 +1,72 @@
#!/bin/bash
# Verify repository path conventions
set -euo pipefail
FAILED=0
cd "$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
echo "=== Path Convention Verification ==="
# Check 1: No binaries in root
echo "Checking for binaries in root..."
for binary in api-server worker tui data_manager; do
if [ -f "./$binary" ]; then
echo "✗ FAIL: Binary $binary found in root (should be in bin/)"
FAILED=1
fi
done
if [ $FAILED -eq 0 ]; then
echo "✓ No binaries in root"
fi
# Check 2: No .DS_Store files
echo "Checking for .DS_Store files..."
DSSTORE_COUNT=$(find . -name ".DS_Store" -type f 2>/dev/null | wc -l)
if [ "$DSSTORE_COUNT" -gt 0 ]; then
echo "✗ FAIL: $DSSTORE_COUNT .DS_Store file(s) found"
find . -name ".DS_Store" -type f | head -5
FAILED=1
else
echo "✓ No .DS_Store files"
fi
# Check 3: No coverage.out in root
echo "Checking for coverage.out in root..."
if [ -f "./coverage.out" ]; then
echo "✗ FAIL: coverage.out found in root (should be in coverage/)"
FAILED=1
else
echo "✓ No coverage.out in root"
fi
# Check 4: Bin directory should exist or be empty
echo "Checking bin/ directory..."
if [ -d "./bin" ]; then
BIN_COUNT=$(ls -1 ./bin 2>/dev/null | wc -l)
echo "✓ bin/ exists ($BIN_COUNT files)"
else
echo " bin/ does not exist (will be created by make build)"
fi
# Check 5: Data directories should be gitignored
echo "Checking data/ directory..."
if [ -d "./data" ]; then
if git check-ignore -q ./data 2>/dev/null; then
echo "✓ data/ is gitignored"
else
echo "⚠ WARNING: data/ exists but may not be gitignored"
fi
else
echo " data/ does not exist"
fi
# Summary
echo ""
if [ $FAILED -eq 0 ]; then
echo "✓ All path conventions verified"
exit 0
else
echo "✗ Path convention verification failed"
exit 1
fi