fetch_ml/internal/worker/execution/snapshot.go
Jeremie Fraeys c46be7f815
refactor: Phase 4 deferred - Extract GPU utilities and execution helpers
Extracted from execution.go to focused packages:

1. internal/worker/gpu.go (60 lines)
   - gpuVisibleDevicesString() - GPU device string formatting
   - filterExistingDevicePaths() - Device path filtering
   - gpuVisibleEnvVarName() - GPU env var selection
   - Reuses GPUType constants from gpu_detector.go

2. internal/worker/execution/setup.go (108 lines)
   - SetupJobDirectories() - Job directory creation
   - CopyDir() - Directory tree copying
   - copyFile() - Single file copy helper

3. internal/worker/execution/snapshot.go (52 lines)
   - StageSnapshot() - Snapshot staging for jobs
   - StageSnapshotFromPath() - Snapshot staging from path

Updated execution.go:
- Removed 64 lines of GPU utilities (now in gpu.go)
- Reduced from 1,082 to ~1,018 lines
- Still contains main execution flow (runJob, executeJob, etc.)

Build status: Compiles successfully
2026-02-17 14:03:11 -05:00

54 lines
1.4 KiB
Go

// Package execution provides job execution utilities for the worker
package execution
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/jfraeys/fetch_ml/internal/container"
)
// StageSnapshot stages a snapshot for a job
func StageSnapshot(basePath, dataDir, taskID, snapshotID, jobDir string) error {
sid := strings.TrimSpace(snapshotID)
if sid == "" {
return nil
}
if err := container.ValidateJobName(sid); err != nil {
return err
}
if strings.TrimSpace(taskID) == "" {
return fmt.Errorf("missing task id")
}
if strings.TrimSpace(jobDir) == "" {
return fmt.Errorf("missing job dir")
}
src := filepath.Join(dataDir, "snapshots", sid)
return StageSnapshotFromPath(basePath, taskID, src, jobDir)
}
// StageSnapshotFromPath stages a snapshot from a specific source path
func StageSnapshotFromPath(basePath, taskID, srcPath, jobDir string) error {
if strings.TrimSpace(basePath) == "" {
return fmt.Errorf("missing base path")
}
if strings.TrimSpace(taskID) == "" {
return fmt.Errorf("missing task id")
}
if strings.TrimSpace(jobDir) == "" {
return fmt.Errorf("missing job dir")
}
dst := filepath.Join(jobDir, "snapshot")
_ = os.RemoveAll(dst)
prewarmSrc := filepath.Join(basePath, ".prewarm", "snapshots", taskID)
if info, err := os.Stat(prewarmSrc); err == nil && info.IsDir() {
// Use prewarmed snapshot if available
return os.Rename(prewarmSrc, dst)
}
return CopyDir(srcPath, dst)
}