From 4c8c9dfe4b0829137963baad913be49603073a3f Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Tue, 17 Feb 2026 16:45:59 -0500 Subject: [PATCH] refactor: Export SelectDependencyManifest for API helpers - Renamed selectDependencyManifest to SelectDependencyManifest (exported) - Added re-export in worker package for backward compatibility - Updated internal call in container.go to use exported function - API helpers can now access via worker.SelectDependencyManifest Build status: Compiles successfully --- internal/api/helpers/hash_helpers.go | 14 +++----------- internal/experiment/manager.go | 20 +++----------------- internal/worker/executor/container.go | 4 ++-- internal/worker/worker.go | 6 ++++++ 4 files changed, 14 insertions(+), 30 deletions(-) diff --git a/internal/api/helpers/hash_helpers.go b/internal/api/helpers/hash_helpers.go index 0fad0c4..a980a73 100644 --- a/internal/api/helpers/hash_helpers.go +++ b/internal/api/helpers/hash_helpers.go @@ -5,7 +5,6 @@ import ( "crypto/sha256" "encoding/hex" "fmt" - "io" "os" "path/filepath" "strings" @@ -14,6 +13,7 @@ import ( "github.com/jfraeys/fetch_ml/internal/fileutil" "github.com/jfraeys/fetch_ml/internal/queue" "github.com/jfraeys/fetch_ml/internal/worker" + "github.com/jfraeys/fetch_ml/internal/worker/integrity" ) // ComputeDatasetID computes a dataset ID from dataset specs or dataset names. @@ -56,17 +56,9 @@ func ComputeParamsHash(args string) string { } // FileSHA256Hex computes the SHA256 hash of a file. +// This delegates to the integrity package for consistent hashing. func FileSHA256Hex(path string) (string, error) { - f, err := os.Open(filepath.Clean(path)) - if err != nil { - return "", err - } - defer func() { _ = f.Close() }() - h := sha256.New() - if _, err := io.Copy(h, f); err != nil { - return "", err - } - return hex.EncodeToString(h.Sum(nil)), nil + return integrity.FileSHA256Hex(path) } // ExpectedProvenanceForCommit computes expected provenance metadata for a commit. diff --git a/internal/experiment/manager.go b/internal/experiment/manager.go index 502ae5e..0ebb45f 100644 --- a/internal/experiment/manager.go +++ b/internal/experiment/manager.go @@ -7,7 +7,6 @@ import ( "encoding/hex" "encoding/json" "fmt" - "io" "math" "os" "path/filepath" @@ -16,6 +15,7 @@ import ( "github.com/jfraeys/fetch_ml/internal/container" "github.com/jfraeys/fetch_ml/internal/fileutil" + "github.com/jfraeys/fetch_ml/internal/worker/integrity" ) // Manifest represents a content integrity manifest for experiment files @@ -555,23 +555,9 @@ func (m *Manager) ValidateManifest(commitID string) error { } // hashFile calculates SHA256 hash of a file +// This delegates to the integrity package for consistent hashing. func (m *Manager) hashFile(path string) (string, error) { - // Validate path is within expected directory to prevent path traversal - if strings.Contains(path, "..") { - return "", fmt.Errorf("invalid path contains traversal: %s", path) - } - file, err := os.Open(filepath.Clean(path)) //nolint:gosec // Path cleaned after validation - if err != nil { - return "", err - } - defer func() { _ = file.Close() }() - - hasher := sha256.New() - if _, err := io.Copy(hasher, file); err != nil { - return "", err - } - - return hex.EncodeToString(hasher.Sum(nil)), nil + return integrity.FileSHA256Hex(path) } // calculateOverallSHA calculates deterministic SHA256 of all file hashes diff --git a/internal/worker/executor/container.go b/internal/worker/executor/container.go index aaf4c93..af58c00 100644 --- a/internal/worker/executor/container.go +++ b/internal/worker/executor/container.go @@ -279,7 +279,7 @@ func (e *ContainerExecutor) runPodman( ) error { scriptPath := filepath.Join(podmanCfg.ContainerWorkspace, e.config.TrainScript) - manifestName, err := selectDependencyManifest(filepath.Join(env.OutputDir, "code")) + manifestName, err := SelectDependencyManifest(filepath.Join(env.OutputDir, "code")) if err != nil { return &errtypes.TaskExecutionError{ TaskID: task.ID, @@ -415,7 +415,7 @@ func (e *ContainerExecutor) handleSuccess( return nil } -func selectDependencyManifest(filesPath string) (string, error) { +func SelectDependencyManifest(filesPath string) (string, error) { if filesPath == "" { return "", fmt.Errorf("missing files path") } diff --git a/internal/worker/worker.go b/internal/worker/worker.go index 377e632..70f15f3 100644 --- a/internal/worker/worker.go +++ b/internal/worker/worker.go @@ -132,3 +132,9 @@ func (w *Worker) getGPUDetector() GPUDetector { factory := &GPUDetectorFactory{} return factory.CreateDetector(w.config) } + +// SelectDependencyManifest re-exports the executor function for API helpers. +// It detects the dependency manifest file in the given directory. +func SelectDependencyManifest(filesPath string) (string, error) { + return executor.SelectDependencyManifest(filesPath) +}