Add comprehensive benchmarking suite for C++ optimization targets: - tests/benchmarks/dataset_hash_bench_test.go - dirOverallSHA256Hex profiling - tests/benchmarks/queue_bench_test.go - filesystem queue profiling - tests/benchmarks/artifact_and_snapshot_bench_test.go - scanArtifacts/extractTarGz profiling - tests/unit/worker/artifacts_test.go - moved from internal/ for clean separation Add parallel Go implementation as baseline for C++ comparison: - internal/worker/data_integrity.go: dirOverallSHA256HexParallel() with worker pool - Benchmarks show 2.1x speedup (3.97ms -> 1.90ms) vs sequential Exported wrappers for testing: - ScanArtifacts() - artifact scanning - ExtractTarGz() - tar.gz extraction - DirOverallSHA256HexParallel() - parallel hashing Profiling results (Apple M2 Ultra): - dirOverallSHA256Hex: 78% syscall overhead (target for mmap C++) - rebuildIndex: 96% syscall overhead (target for binary index C++) - scanArtifacts: 87% syscall overhead (target for fast traversal C++) - extractTarGz: 95% syscall overhead (target for parallel gzip C++) Related: C++ optimization strategy in memory 5d5f0bb6
68 lines
1.6 KiB
Go
68 lines
1.6 KiB
Go
package worker_test
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/worker"
|
|
)
|
|
|
|
func TestScanArtifacts_SkipsKnownPathsAndLogs(t *testing.T) {
|
|
runDir := t.TempDir()
|
|
|
|
mustWrite := func(rel string, data []byte) {
|
|
p := filepath.Join(runDir, rel)
|
|
if err := os.MkdirAll(filepath.Dir(p), 0750); err != nil {
|
|
t.Fatalf("mkdir: %v", err)
|
|
}
|
|
if err := os.WriteFile(p, data, 0600); err != nil {
|
|
t.Fatalf("write file: %v", err)
|
|
}
|
|
}
|
|
|
|
mustWrite("run_manifest.json", []byte("{}"))
|
|
mustWrite("output.log", []byte("log"))
|
|
mustWrite("code/ignored.txt", []byte("ignore"))
|
|
mustWrite("snapshot/ignored.bin", []byte("ignore"))
|
|
|
|
mustWrite("results/metrics.jsonl", []byte("m"))
|
|
mustWrite("checkpoints/best.pt", []byte("checkpoint"))
|
|
mustWrite("plots/loss.png", []byte("png"))
|
|
|
|
art, err := worker.ScanArtifacts(runDir)
|
|
if err != nil {
|
|
t.Fatalf("scanArtifacts: %v", err)
|
|
}
|
|
if art == nil {
|
|
t.Fatalf("expected artifacts")
|
|
}
|
|
|
|
paths := make([]string, 0, len(art.Files))
|
|
var total int64
|
|
for _, f := range art.Files {
|
|
paths = append(paths, f.Path)
|
|
total += f.SizeBytes
|
|
}
|
|
|
|
want := []string{
|
|
"checkpoints/best.pt",
|
|
"plots/loss.png",
|
|
"results/metrics.jsonl",
|
|
}
|
|
if len(paths) != len(want) {
|
|
t.Fatalf("expected %d files, got %d: %v", len(want), len(paths), paths)
|
|
}
|
|
for i := range want {
|
|
if paths[i] != want[i] {
|
|
t.Fatalf("expected paths[%d]=%q, got %q", i, want[i], paths[i])
|
|
}
|
|
}
|
|
|
|
if art.TotalSizeBytes != total {
|
|
t.Fatalf("expected total_size_bytes=%d, got %d", total, art.TotalSizeBytes)
|
|
}
|
|
if art.DiscoveryTime.IsZero() {
|
|
t.Fatalf("expected discovery_time")
|
|
}
|
|
}
|