diff --git a/.gitignore b/.gitignore index 12d76d2..74a41bb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,11 @@ +# Root directory protection - binaries must be in bin/ +/api-server +/worker +/tui +/data_manager +/coverage.out +.DS_Store + # Binaries for programs and plugins *.exe *.exe~ diff --git a/internal/config/paths.go b/internal/config/paths.go index d629db4..8c4c324 100644 --- a/internal/config/paths.go +++ b/internal/config/paths.go @@ -1,5 +1,109 @@ -// Package config provides shared utilities for the fetch_ml project. +// Package config provides centralized path management for the fetch_ml project. package config -// Deprecated: Use config.ExpandPath from shared.go instead -// This file is kept for backward compatibility during migration +import ( + "os" + "path/filepath" +) + +// PathRegistry provides centralized path management +type PathRegistry struct { + RootDir string // Repository root (auto-detected or from env) +} + +// NewPathRegistry creates a path registry from root directory. +// If root is empty, attempts to auto-detect repository root. +func NewPathRegistry(root string) *PathRegistry { + if root == "" { + root = detectRepoRoot() + } + return &PathRegistry{RootDir: root} +} + +// Binary paths +func (p *PathRegistry) BinDir() string { return filepath.Join(p.RootDir, "bin") } +func (p *PathRegistry) APIServerBinary() string { return filepath.Join(p.BinDir(), "api-server") } +func (p *PathRegistry) WorkerBinary() string { return filepath.Join(p.BinDir(), "worker") } +func (p *PathRegistry) TUIBinary() string { return filepath.Join(p.BinDir(), "tui") } +func (p *PathRegistry) DataManagerBinary() string { return filepath.Join(p.BinDir(), "data_manager") } + +// Data paths +func (p *PathRegistry) DataDir() string { return filepath.Join(p.RootDir, "data") } +func (p *PathRegistry) ActiveDataDir() string { return filepath.Join(p.DataDir(), "active") } +func (p *PathRegistry) JupyterStateDir() string { return filepath.Join(p.DataDir(), "active", "jupyter") } +func (p *PathRegistry) ExperimentsDir() string { return filepath.Join(p.DataDir(), "experiments") } +func (p *PathRegistry) ProdSmokeDir() string { return filepath.Join(p.DataDir(), "prod-smoke") } + +// Database paths +func (p *PathRegistry) DBDir() string { return filepath.Join(p.RootDir, "db") } +func (p *PathRegistry) SQLitePath() string { return filepath.Join(p.DBDir(), "fetch_ml.db") } + +// Log paths +func (p *PathRegistry) LogDir() string { return filepath.Join(p.RootDir, "logs") } +func (p *PathRegistry) AuditLogPath() string { return filepath.Join(p.LogDir(), "fetchml-audit.log") } + +// Config paths +func (p *PathRegistry) ConfigDir() string { return filepath.Join(p.RootDir, "configs") } +func (p *PathRegistry) APIServerConfig() string { return filepath.Join(p.ConfigDir(), "api", "dev.yaml") } +func (p *PathRegistry) WorkerConfigDir() string { return filepath.Join(p.ConfigDir(), "workers") } + +// Test paths +func (p *PathRegistry) TestResultsDir() string { return filepath.Join(p.RootDir, "test_results") } +func (p *PathRegistry) TempDir() string { return filepath.Join(p.RootDir, "tmp") } + +// State file paths (for service persistence) +func (p *PathRegistry) JupyterServicesFile() string { + return filepath.Join(p.JupyterStateDir(), "fetch_ml_jupyter_services.json") +} + +func (p *PathRegistry) JupyterWorkspacesFile() string { + return filepath.Join(p.JupyterStateDir(), "fetch_ml_jupyter_workspaces.json") +} + +// EnsureDir creates directory if it doesn't exist with appropriate permissions. +func (p *PathRegistry) EnsureDir(path string) error { + return os.MkdirAll(path, 0750) +} + +// EnsureDirSecure creates directory with restricted permissions (for sensitive data). +func (p *PathRegistry) EnsureDirSecure(path string) error { + return os.MkdirAll(path, 0700) +} + +// FileExists checks if a file exists. +func (p *PathRegistry) FileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} + +// detectRepoRoot finds repository root by looking for go.mod. +// Returns current directory if not found. +func detectRepoRoot() string { + dir, err := os.Getwd() + if err != nil { + return "." + } + + // Walk up directory tree looking for go.mod + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir + } + + parent := filepath.Dir(dir) + if parent == dir { + // Reached root + break + } + dir = parent + } + + return "." +} + +// FromEnv creates PathRegistry with root from FETCHML_ROOT env var, +// or auto-detects if env var not set. +func FromEnv() *PathRegistry { + root := os.Getenv("FETCHML_ROOT") + return NewPathRegistry(root) +} diff --git a/scripts/verify-paths.sh b/scripts/verify-paths.sh new file mode 100755 index 0000000..cff2e94 --- /dev/null +++ b/scripts/verify-paths.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Verify repository path conventions + +set -euo pipefail + +FAILED=0 +cd "$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + +echo "=== Path Convention Verification ===" + +# Check 1: No binaries in root +echo "Checking for binaries in root..." +for binary in api-server worker tui data_manager; do + if [ -f "./$binary" ]; then + echo "✗ FAIL: Binary $binary found in root (should be in bin/)" + FAILED=1 + fi +done +if [ $FAILED -eq 0 ]; then + echo "✓ No binaries in root" +fi + +# Check 2: No .DS_Store files +echo "Checking for .DS_Store files..." +DSSTORE_COUNT=$(find . -name ".DS_Store" -type f 2>/dev/null | wc -l) +if [ "$DSSTORE_COUNT" -gt 0 ]; then + echo "✗ FAIL: $DSSTORE_COUNT .DS_Store file(s) found" + find . -name ".DS_Store" -type f | head -5 + FAILED=1 +else + echo "✓ No .DS_Store files" +fi + +# Check 3: No coverage.out in root +echo "Checking for coverage.out in root..." +if [ -f "./coverage.out" ]; then + echo "✗ FAIL: coverage.out found in root (should be in coverage/)" + FAILED=1 +else + echo "✓ No coverage.out in root" +fi + +# Check 4: Bin directory should exist or be empty +echo "Checking bin/ directory..." +if [ -d "./bin" ]; then + BIN_COUNT=$(ls -1 ./bin 2>/dev/null | wc -l) + echo "✓ bin/ exists ($BIN_COUNT files)" +else + echo "ℹ bin/ does not exist (will be created by make build)" +fi + +# Check 5: Data directories should be gitignored +echo "Checking data/ directory..." +if [ -d "./data" ]; then + if git check-ignore -q ./data 2>/dev/null; then + echo "✓ data/ is gitignored" + else + echo "⚠ WARNING: data/ exists but may not be gitignored" + fi +else + echo "ℹ data/ does not exist" +fi + +# Summary +echo "" +if [ $FAILED -eq 0 ]; then + echo "✓ All path conventions verified" + exit 0 +else + echo "✗ Path convention verification failed" + exit 1 +fi