package worker import ( "fmt" "io/fs" "path/filepath" "sort" "strings" "time" "github.com/jfraeys/fetch_ml/internal/manifest" ) func scanArtifacts(runDir string, includeAll bool) (*manifest.Artifacts, error) { runDir = strings.TrimSpace(runDir) if runDir == "" { return nil, fmt.Errorf("run dir is empty") } var files []manifest.ArtifactFile var total int64 now := time.Now().UTC() err := filepath.WalkDir(runDir, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } if path == runDir { return nil } rel, err := filepath.Rel(runDir, path) if err != nil { return err } rel = filepath.ToSlash(rel) // Standard exclusions (always apply) if rel == manifestFilename { return nil } if strings.HasSuffix(rel, "/"+manifestFilename) { return nil } // Optional exclusions (skipped when includeAll is true) if !includeAll { if rel == "code" || strings.HasPrefix(rel, "code/") { if d.IsDir() { return fs.SkipDir } return nil } if rel == "snapshot" || strings.HasPrefix(rel, "snapshot/") { if d.IsDir() { return fs.SkipDir } return nil } if strings.HasSuffix(rel, ".log") { return nil } if d.Type()&fs.ModeSymlink != 0 { return nil } } if d.IsDir() { return nil } info, err := d.Info() if err != nil { return err } files = append(files, manifest.ArtifactFile{ Path: rel, SizeBytes: info.Size(), Modified: info.ModTime().UTC(), }) total += info.Size() return nil }) if err != nil { return nil, err } sort.Slice(files, func(i, j int) bool { return files[i].Path < files[j].Path }) return &manifest.Artifacts{ DiscoveryTime: now, Files: files, TotalSizeBytes: total, }, nil } const manifestFilename = "run_manifest.json" // ScanArtifacts is an exported wrapper for testing/benchmarking. // When includeAll is false, excludes code/, snapshot/, *.log files, and symlinks. func ScanArtifacts(runDir string, includeAll bool) (*manifest.Artifacts, error) { return scanArtifacts(runDir, includeAll) }