Update worker system for scheduler integration: - Worker server with scheduler registration - Configuration with scheduler endpoint support - Artifact handling with integrity verification - Container executor with supply chain validation - Local executor enhancements - GPU detection improvements (cross-platform) - Error handling with execution context - Factory pattern for executor instantiation - Hash integrity with native library support
163 lines
4.4 KiB
Go
163 lines
4.4 KiB
Go
package worker
|
|
|
|
import (
|
|
"fmt"
|
|
"io/fs"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/fileutil"
|
|
"github.com/jfraeys/fetch_ml/internal/manifest"
|
|
)
|
|
|
|
// scanArtifacts discovers and catalogs artifact files in a run directory.
|
|
// When includeAll is false, it excludes code/, snapshot/, *.log files, and symlinks.
|
|
func scanArtifacts(runDir string, includeAll bool, caps *SandboxConfig) (*manifest.Artifacts, error) {
|
|
runDir = strings.TrimSpace(runDir)
|
|
if runDir == "" {
|
|
return nil, fmt.Errorf("run dir is empty")
|
|
}
|
|
|
|
// Validate and canonicalize the runDir before any operations
|
|
validator := fileutil.NewSecurePathValidator(runDir)
|
|
validatedRunDir, err := validator.ValidatePath("")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid run directory: %w", err)
|
|
}
|
|
|
|
var files []manifest.ArtifactFile
|
|
var exclusions []manifest.Exclusion
|
|
var total int64
|
|
var fileCount int
|
|
|
|
now := time.Now().UTC()
|
|
|
|
err = filepath.WalkDir(validatedRunDir, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if path == validatedRunDir {
|
|
return nil
|
|
}
|
|
|
|
// Security: Validate each path is still within runDir
|
|
// This catches any symlink escapes or path traversal attempts during walk
|
|
rel, err := filepath.Rel(validatedRunDir, path)
|
|
if err != nil {
|
|
return fmt.Errorf("path escape detected during artifact scan: %w", err)
|
|
}
|
|
rel = filepath.ToSlash(rel)
|
|
|
|
// Check for path traversal patterns in the relative path
|
|
if strings.Contains(rel, "..") {
|
|
return fmt.Errorf("path traversal attempt detected: %s", rel)
|
|
}
|
|
|
|
// Standard exclusions (always apply)
|
|
// Exclude manifest files - both legacy (run_manifest.json) and nonce-based (run_manifest_<nonce>.json)
|
|
if strings.HasPrefix(rel, "run_manifest") && strings.HasSuffix(rel, ".json") {
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "manifest file excluded",
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// Optional exclusions (skipped when includeAll is true)
|
|
if !includeAll {
|
|
if rel == "code" || strings.HasPrefix(rel, "code/") {
|
|
if d.IsDir() {
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "source directory excluded",
|
|
})
|
|
return fs.SkipDir
|
|
}
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "source directory excluded",
|
|
})
|
|
return nil
|
|
}
|
|
if rel == "snapshot" || strings.HasPrefix(rel, "snapshot/") {
|
|
if d.IsDir() {
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "snapshot directory excluded",
|
|
})
|
|
return fs.SkipDir
|
|
}
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "snapshot directory excluded",
|
|
})
|
|
return nil
|
|
}
|
|
if strings.HasSuffix(rel, ".log") {
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "log files excluded",
|
|
})
|
|
return nil
|
|
}
|
|
if d.Type()&fs.ModeSymlink != 0 {
|
|
// Skip symlinks - they could point outside the directory
|
|
exclusions = append(exclusions, manifest.Exclusion{
|
|
Path: rel,
|
|
Reason: "symlink excluded for security",
|
|
})
|
|
return nil
|
|
}
|
|
}
|
|
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
info, err := d.Info()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check artifact caps before adding
|
|
fileCount++
|
|
if caps != nil && caps.MaxArtifactFiles > 0 && fileCount > caps.MaxArtifactFiles {
|
|
return fmt.Errorf("artifact file count cap exceeded: %d files (max %d)", fileCount, caps.MaxArtifactFiles)
|
|
}
|
|
|
|
total += info.Size()
|
|
if caps != nil && caps.MaxArtifactTotalBytes > 0 && total > caps.MaxArtifactTotalBytes {
|
|
return fmt.Errorf("artifact total size cap exceeded: %d bytes (max %d)", total, caps.MaxArtifactTotalBytes)
|
|
}
|
|
|
|
files = append(files, manifest.ArtifactFile{
|
|
Path: rel,
|
|
SizeBytes: info.Size(),
|
|
Modified: info.ModTime().UTC(),
|
|
})
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
sort.Slice(files, func(i, j int) bool {
|
|
return files[i].Path < files[j].Path
|
|
})
|
|
|
|
return &manifest.Artifacts{
|
|
DiscoveryTime: now,
|
|
Files: files,
|
|
TotalSizeBytes: total,
|
|
Exclusions: exclusions,
|
|
}, nil
|
|
}
|
|
|
|
// ScanArtifacts is an exported wrapper for testing/benchmarking.
|
|
// When includeAll is false, excludes code/, snapshot/, *.log files, and symlinks.
|
|
func ScanArtifacts(runDir string, includeAll bool, caps *SandboxConfig) (*manifest.Artifacts, error) {
|
|
return scanArtifacts(runDir, includeAll, caps)
|
|
}
|