refactor: Phase 3 - fix config/storage boundaries
Move schema ownership to infrastructure layer: - Redis keys: config/constants.go -> queue/keys.go (TaskQueueKey, TaskPrefix, etc.) - Filesystem paths: config/paths.go -> storage/paths.go (JobPaths) - Create config/shared.go with RedisConfig, SSHConfig - Update all imports: worker/, api/helpers, api/ws_jobs, api/ws_validate - Clean up: remove duplicates from queue/task.go, queue/queue.go, config/paths.go Build status: Compiles successfully
This commit is contained in:
parent
6580917ba8
commit
d1bef0a450
11 changed files with 158 additions and 98 deletions
|
|
@ -7,10 +7,10 @@ import (
|
|||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/jfraeys/fetch_ml/internal/config"
|
||||
"github.com/jfraeys/fetch_ml/internal/experiment"
|
||||
"github.com/jfraeys/fetch_ml/internal/manifest"
|
||||
"github.com/jfraeys/fetch_ml/internal/queue"
|
||||
"github.com/jfraeys/fetch_ml/internal/storage"
|
||||
"github.com/jfraeys/fetch_ml/internal/worker"
|
||||
)
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ func FindRunManifestDir(basePath string, jobName string) (dir string, bucket str
|
|||
if strings.TrimSpace(basePath) == "" || strings.TrimSpace(jobName) == "" {
|
||||
return "", "", false
|
||||
}
|
||||
jobPaths := config.NewJobPaths(basePath)
|
||||
jobPaths := storage.NewJobPaths(basePath)
|
||||
typedRoots := []struct {
|
||||
bucket string
|
||||
root string
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ import (
|
|||
"github.com/gorilla/websocket"
|
||||
"github.com/jfraeys/fetch_ml/internal/api/helpers"
|
||||
"github.com/jfraeys/fetch_ml/internal/auth"
|
||||
"github.com/jfraeys/fetch_ml/internal/config"
|
||||
"github.com/jfraeys/fetch_ml/internal/container"
|
||||
"github.com/jfraeys/fetch_ml/internal/manifest"
|
||||
"github.com/jfraeys/fetch_ml/internal/queue"
|
||||
|
|
@ -71,7 +70,7 @@ func (h *WSHandler) handleAnnotateRun(conn *websocket.Conn, payload []byte) erro
|
|||
return h.sendErrorPacket(conn, ErrorCodeInvalidConfiguration, "Missing api base_path", "")
|
||||
}
|
||||
|
||||
jobPaths := config.NewJobPaths(base)
|
||||
jobPaths := storage.NewJobPaths(base)
|
||||
typedRoots := []struct{ root string }{
|
||||
{root: jobPaths.RunningPath()},
|
||||
{root: jobPaths.PendingPath()},
|
||||
|
|
@ -148,7 +147,7 @@ func (h *WSHandler) handleSetRunNarrative(conn *websocket.Conn, payload []byte)
|
|||
return h.sendErrorPacket(conn, ErrorCodeInvalidConfiguration, "Missing api base_path", "")
|
||||
}
|
||||
|
||||
jobPaths := config.NewJobPaths(base)
|
||||
jobPaths := storage.NewJobPaths(base)
|
||||
typedRoots := []struct{ root string }{
|
||||
{root: jobPaths.RunningPath()},
|
||||
{root: jobPaths.PendingPath()},
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ import (
|
|||
|
||||
"github.com/gorilla/websocket"
|
||||
"github.com/jfraeys/fetch_ml/internal/api/helpers"
|
||||
"github.com/jfraeys/fetch_ml/internal/config"
|
||||
"github.com/jfraeys/fetch_ml/internal/container"
|
||||
"github.com/jfraeys/fetch_ml/internal/manifest"
|
||||
"github.com/jfraeys/fetch_ml/internal/queue"
|
||||
"github.com/jfraeys/fetch_ml/internal/storage"
|
||||
"github.com/jfraeys/fetch_ml/internal/worker"
|
||||
)
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ func findRunManifestDir(basePath string, jobName string) (string, string, bool)
|
|||
if strings.TrimSpace(basePath) == "" || strings.TrimSpace(jobName) == "" {
|
||||
return "", "", false
|
||||
}
|
||||
jobPaths := config.NewJobPaths(basePath)
|
||||
jobPaths := storage.NewJobPaths(basePath)
|
||||
typedRoots := []struct {
|
||||
bucket string
|
||||
root string
|
||||
|
|
|
|||
|
|
@ -1,73 +1,5 @@
|
|||
// Package config provides shared utilities for the fetch_ml project.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExpandPath expands environment variables and tilde in a path
|
||||
func ExpandPath(path string) string {
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
expanded := os.ExpandEnv(path)
|
||||
if strings.HasPrefix(expanded, "~") {
|
||||
home, err := os.UserHomeDir()
|
||||
if err == nil {
|
||||
expanded = filepath.Join(home, expanded[1:])
|
||||
}
|
||||
}
|
||||
return expanded
|
||||
}
|
||||
|
||||
// ResolveConfigPath resolves a config file path, checking multiple locations
|
||||
func ResolveConfigPath(path string) (string, error) {
|
||||
candidates := []string{path}
|
||||
if !filepath.IsAbs(path) {
|
||||
candidates = append(candidates, filepath.Join("configs", path))
|
||||
}
|
||||
|
||||
var checked []string
|
||||
for _, candidate := range candidates {
|
||||
resolved := ExpandPath(candidate)
|
||||
checked = append(checked, resolved)
|
||||
if _, err := os.Stat(resolved); err == nil {
|
||||
return resolved, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("config file not found (looked in %s)", strings.Join(checked, ", "))
|
||||
}
|
||||
|
||||
// JobPaths provides helper methods for job directory paths
|
||||
type JobPaths struct {
|
||||
BasePath string
|
||||
}
|
||||
|
||||
// NewJobPaths creates a new JobPaths instance
|
||||
func NewJobPaths(basePath string) *JobPaths {
|
||||
return &JobPaths{BasePath: basePath}
|
||||
}
|
||||
|
||||
// PendingPath returns the path to pending jobs directory
|
||||
func (j *JobPaths) PendingPath() string {
|
||||
return filepath.Join(j.BasePath, "pending")
|
||||
}
|
||||
|
||||
// RunningPath returns the path to running jobs directory
|
||||
func (j *JobPaths) RunningPath() string {
|
||||
return filepath.Join(j.BasePath, "running")
|
||||
}
|
||||
|
||||
// FinishedPath returns the path to finished jobs directory
|
||||
func (j *JobPaths) FinishedPath() string {
|
||||
return filepath.Join(j.BasePath, "finished")
|
||||
}
|
||||
|
||||
// FailedPath returns the path to failed jobs directory
|
||||
func (j *JobPaths) FailedPath() string {
|
||||
return filepath.Join(j.BasePath, "failed")
|
||||
}
|
||||
// Deprecated: Use config.ExpandPath from shared.go instead
|
||||
// This file is kept for backward compatibility during migration
|
||||
|
|
|
|||
61
internal/config/shared.go
Normal file
61
internal/config/shared.go
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
// Package config provides shared configuration types for fetch_ml.
|
||||
// These types are embedded by per-command config structs.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// RedisConfig holds Redis connection settings
|
||||
type RedisConfig struct {
|
||||
Addr string `yaml:"addr" json:"addr"`
|
||||
Password string `yaml:"password" json:"password"`
|
||||
DB int `yaml:"db" json:"db"`
|
||||
}
|
||||
|
||||
// SSHConfig holds SSH connection settings
|
||||
type SSHConfig struct {
|
||||
Host string `yaml:"host" json:"host"`
|
||||
Port int `yaml:"port" json:"port"`
|
||||
User string `yaml:"user" json:"user"`
|
||||
KeyPath string `yaml:"key_path" json:"key_path"`
|
||||
KnownHosts string `yaml:"known_hosts" json:"known_hosts"`
|
||||
}
|
||||
|
||||
// ExpandPath expands environment variables and tilde in a path
|
||||
// This remains in config package as it's a config parsing utility
|
||||
func ExpandPath(path string) string {
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
expanded := os.ExpandEnv(path)
|
||||
if strings.HasPrefix(expanded, "~") {
|
||||
home, err := os.UserHomeDir()
|
||||
if err == nil {
|
||||
expanded = filepath.Join(home, expanded[1:])
|
||||
}
|
||||
}
|
||||
return expanded
|
||||
}
|
||||
|
||||
// ResolveConfigPath resolves a config file path, checking multiple locations
|
||||
func ResolveConfigPath(path string) (string, error) {
|
||||
candidates := []string{path}
|
||||
if !filepath.IsAbs(path) {
|
||||
candidates = append(candidates, filepath.Join("configs", path))
|
||||
}
|
||||
|
||||
var checked []string
|
||||
for _, candidate := range candidates {
|
||||
resolved := ExpandPath(candidate)
|
||||
checked = append(checked, resolved)
|
||||
if _, err := os.Stat(resolved); err == nil {
|
||||
return resolved, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("config file not found (looked in %s)", strings.Join(checked, ", "))
|
||||
}
|
||||
23
internal/queue/keys.go
Normal file
23
internal/queue/keys.go
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
// Package queue provides task queue functionality
|
||||
package queue
|
||||
|
||||
// Redis key prefixes - schema ownership belongs with the queue package
|
||||
const (
|
||||
TaskQueueKey = "ml:queue"
|
||||
TaskPrefix = "ml:task:"
|
||||
JobMetricsPrefix = "ml:metrics:"
|
||||
TaskStatusPrefix = "ml:status:"
|
||||
DatasetPrefix = "ml:dataset:"
|
||||
WorkerHeartbeat = "ml:workers:heartbeat"
|
||||
WorkerPrewarmKey = "ml:workers:prewarm:"
|
||||
PrewarmGCRequestKey = "ml:prewarm:gc:request"
|
||||
)
|
||||
|
||||
// Task status constants
|
||||
const (
|
||||
TaskStatusQueued = "queued"
|
||||
TaskStatusRunning = "running"
|
||||
TaskStatusCompleted = "completed"
|
||||
TaskStatusFailed = "failed"
|
||||
TaskStatusCancelled = "cancelled"
|
||||
)
|
||||
|
|
@ -16,7 +16,6 @@ const (
|
|||
defaultLeaseDuration = 30 * time.Minute
|
||||
defaultMaxRetries = 3
|
||||
defaultBlockTimeout = 1 * time.Second
|
||||
PrewarmGCRequestKey = "ml:prewarm:gc_request"
|
||||
)
|
||||
|
||||
// TaskQueue manages ML experiment tasks via Redis
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package queue
|
||||
|
||||
import (
|
||||
"github.com/jfraeys/fetch_ml/internal/config"
|
||||
"github.com/jfraeys/fetch_ml/internal/domain"
|
||||
)
|
||||
|
||||
|
|
@ -16,13 +15,3 @@ type (
|
|||
TensorBoardTrackingConfig = domain.TensorBoardTrackingConfig
|
||||
WandbTrackingConfig = domain.WandbTrackingConfig
|
||||
)
|
||||
|
||||
// Redis key constants
|
||||
var (
|
||||
TaskQueueKey = config.RedisTaskQueueKey
|
||||
TaskPrefix = config.RedisTaskPrefix
|
||||
TaskStatusPrefix = config.RedisTaskStatusPrefix
|
||||
WorkerHeartbeat = config.RedisWorkerHeartbeat
|
||||
WorkerPrewarmKey = config.RedisWorkerPrewarmKey
|
||||
JobMetricsPrefix = config.RedisJobMetricsPrefix
|
||||
)
|
||||
|
|
|
|||
55
internal/storage/paths.go
Normal file
55
internal/storage/paths.go
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Package storage provides filesystem storage utilities for the fetch_ml project.
|
||||
// This package owns schema for filesystem paths used by the storage layer.
|
||||
package storage
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExpandPath expands environment variables and tilde in a path
|
||||
func ExpandPath(path string) string {
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
expanded := os.ExpandEnv(path)
|
||||
if strings.HasPrefix(expanded, "~") {
|
||||
home, err := os.UserHomeDir()
|
||||
if err == nil {
|
||||
expanded = filepath.Join(home, expanded[1:])
|
||||
}
|
||||
}
|
||||
return expanded
|
||||
}
|
||||
|
||||
// JobPaths provides helper methods for job directory paths
|
||||
// This belongs in the storage layer because it defines filesystem schema
|
||||
type JobPaths struct {
|
||||
BasePath string
|
||||
}
|
||||
|
||||
// NewJobPaths creates a new JobPaths instance
|
||||
func NewJobPaths(basePath string) *JobPaths {
|
||||
return &JobPaths{BasePath: basePath}
|
||||
}
|
||||
|
||||
// PendingPath returns the path to pending jobs directory
|
||||
func (j *JobPaths) PendingPath() string {
|
||||
return filepath.Join(j.BasePath, "pending")
|
||||
}
|
||||
|
||||
// RunningPath returns the path to running jobs directory
|
||||
func (j *JobPaths) RunningPath() string {
|
||||
return filepath.Join(j.BasePath, "running")
|
||||
}
|
||||
|
||||
// FinishedPath returns the path to finished jobs directory
|
||||
func (j *JobPaths) FinishedPath() string {
|
||||
return filepath.Join(j.BasePath, "finished")
|
||||
}
|
||||
|
||||
// FailedPath returns the path to failed jobs directory
|
||||
func (j *JobPaths) FailedPath() string {
|
||||
return filepath.Join(j.BasePath, "failed")
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ import (
|
|||
"github.com/jfraeys/fetch_ml/internal/fileutil"
|
||||
"github.com/jfraeys/fetch_ml/internal/manifest"
|
||||
"github.com/jfraeys/fetch_ml/internal/queue"
|
||||
"github.com/jfraeys/fetch_ml/internal/storage"
|
||||
"github.com/jfraeys/fetch_ml/internal/telemetry"
|
||||
"github.com/jfraeys/fetch_ml/internal/tracking"
|
||||
)
|
||||
|
|
@ -263,7 +264,7 @@ func (w *Worker) runJob(ctx context.Context, task *queue.Task, cudaVisibleDevice
|
|||
m.MarkFinished(now, &exitCode, err)
|
||||
m.Metadata["failure_phase"] = "stage_experiment_files"
|
||||
})
|
||||
failedDir := filepath.Join(config.NewJobPaths(w.config.BasePath).FailedPath(), task.JobName)
|
||||
failedDir := filepath.Join(storage.NewJobPaths(w.config.BasePath).FailedPath(), task.JobName)
|
||||
_ = os.MkdirAll(filepath.Dir(failedDir), 0750)
|
||||
_ = os.RemoveAll(failedDir)
|
||||
_ = os.Rename(jobDir, failedDir)
|
||||
|
|
@ -286,7 +287,7 @@ func (w *Worker) runJob(ctx context.Context, task *queue.Task, cudaVisibleDevice
|
|||
m.MarkFinished(now, &exitCode, err)
|
||||
m.Metadata["failure_phase"] = "stage_snapshot"
|
||||
})
|
||||
failedDir := filepath.Join(config.NewJobPaths(w.config.BasePath).FailedPath(), task.JobName)
|
||||
failedDir := filepath.Join(storage.NewJobPaths(w.config.BasePath).FailedPath(), task.JobName)
|
||||
_ = os.MkdirAll(filepath.Dir(failedDir), 0750)
|
||||
_ = os.RemoveAll(failedDir)
|
||||
_ = os.Rename(jobDir, failedDir)
|
||||
|
|
@ -474,7 +475,7 @@ func copyDir(src, dst string) error {
|
|||
func (w *Worker) setupJobDirectories(
|
||||
task *queue.Task,
|
||||
) (jobDir, outputDir, logFile string, err error) {
|
||||
jobPaths := config.NewJobPaths(w.config.BasePath)
|
||||
jobPaths := storage.NewJobPaths(w.config.BasePath)
|
||||
pendingDir := jobPaths.PendingPath()
|
||||
jobDir = filepath.Join(pendingDir, task.JobName)
|
||||
outputDir = filepath.Join(jobPaths.RunningPath(), task.JobName)
|
||||
|
|
@ -611,7 +612,7 @@ func (w *Worker) executeJob(
|
|||
})
|
||||
|
||||
finalizeStart := time.Now()
|
||||
jobPaths := config.NewJobPaths(w.config.BasePath)
|
||||
jobPaths := storage.NewJobPaths(w.config.BasePath)
|
||||
var dest string
|
||||
if err != nil {
|
||||
dest = filepath.Join(jobPaths.FailedPath(), task.JobName)
|
||||
|
|
@ -763,7 +764,7 @@ func (w *Worker) executeContainerJob(
|
|||
containerWorkspace = config.DefaultContainerWorkspace
|
||||
}
|
||||
|
||||
jobPaths := config.NewJobPaths(w.config.BasePath)
|
||||
jobPaths := storage.NewJobPaths(w.config.BasePath)
|
||||
stagingStart := time.Now()
|
||||
|
||||
// Optional: provision tracking tools for this task
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"github.com/jfraeys/fetch_ml/internal/config"
|
||||
"github.com/jfraeys/fetch_ml/internal/storage"
|
||||
)
|
||||
|
||||
func TestExpandPath(t *testing.T) {
|
||||
|
|
@ -145,7 +146,7 @@ func TestNewJobPaths(t *testing.T) {
|
|||
t.Parallel() // Enable parallel execution
|
||||
|
||||
basePath := "/test/base"
|
||||
jobPaths := config.NewJobPaths(basePath)
|
||||
jobPaths := storage.NewJobPaths(basePath)
|
||||
|
||||
if jobPaths.BasePath != basePath {
|
||||
t.Errorf("Expected BasePath %s, got %s", basePath, jobPaths.BasePath)
|
||||
|
|
@ -156,7 +157,7 @@ func TestJobPathsMethods(t *testing.T) {
|
|||
t.Parallel() // Enable parallel execution
|
||||
|
||||
basePath := "/test/base"
|
||||
jobPaths := config.NewJobPaths(basePath)
|
||||
jobPaths := storage.NewJobPaths(basePath)
|
||||
|
||||
// Test all path methods
|
||||
tests := []struct {
|
||||
|
|
@ -184,7 +185,7 @@ func TestJobPathsWithComplexBase(t *testing.T) {
|
|||
t.Parallel() // Enable parallel execution
|
||||
|
||||
basePath := "/very/complex/base/path/with/subdirs"
|
||||
jobPaths := config.NewJobPaths(basePath)
|
||||
jobPaths := storage.NewJobPaths(basePath)
|
||||
|
||||
expectedPending := filepath.Join(basePath, "pending")
|
||||
if jobPaths.PendingPath() != expectedPending {
|
||||
|
|
@ -200,7 +201,7 @@ func TestJobPathsWithComplexBase(t *testing.T) {
|
|||
func TestJobPathsEmptyBase(t *testing.T) {
|
||||
t.Parallel() // Enable parallel execution
|
||||
|
||||
jobPaths := config.NewJobPaths("")
|
||||
jobPaths := storage.NewJobPaths("")
|
||||
|
||||
// Should still work with empty base path
|
||||
expectedPending := "pending"
|
||||
|
|
|
|||
Loading…
Reference in a new issue