From e557313e08867f9d4cd7d2cce2facc97add71b48 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Sat, 21 Feb 2026 14:38:00 -0500 Subject: [PATCH] fix: context reuse benchmark uses temp directory - Replace hardcoded testdata path with b.TempDir() - Add createSmallDataset helper for self-contained benchmarks - Fixes FAIL: BenchmarkContextReuse / BenchmarkSequentialHashes --- internal/worker/native_bridge_libs.go | 6 +++++ tests/benchmarks/context_reuse_bench_test.go | 27 +++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/internal/worker/native_bridge_libs.go b/internal/worker/native_bridge_libs.go index 8246644..cff1f87 100644 --- a/internal/worker/native_bridge_libs.go +++ b/internal/worker/native_bridge_libs.go @@ -77,3 +77,9 @@ func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) { func ExtractTarGzNative(archivePath, dstDir string) error { return ExtractTarGz(archivePath, dstDir) } + +// DirOverallSHA256HexNative exports the native hash implementation for benchmarks. +// This allows explicit native library usage when -tags native_libs is enabled. +func DirOverallSHA256HexNative(root string) (string, error) { + return dirOverallSHA256HexNative(root) +} diff --git a/tests/benchmarks/context_reuse_bench_test.go b/tests/benchmarks/context_reuse_bench_test.go index 77852d5..c569b44 100644 --- a/tests/benchmarks/context_reuse_bench_test.go +++ b/tests/benchmarks/context_reuse_bench_test.go @@ -1,16 +1,37 @@ package benchmarks import ( + "os" + "path/filepath" "testing" "github.com/jfraeys/fetch_ml/internal/worker" ) +// createSmallDataset creates a temporary small dataset for benchmarking +func createSmallDataset(b *testing.B) string { + tmpDir := b.TempDir() + // Create 10 small files (100KB each) to emphasize context overhead + data := make([]byte, 100*1024) + for i := range data { + data[i] = byte(i % 256) + } + for i := range 10 { + path := filepath.Join(tmpDir, "data", string(rune('a'+i%26)), "chunk.bin") + if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(path, data, 0640); err != nil { + b.Fatal(err) + } + } + return tmpDir +} + // BenchmarkContextReuse measures overhead of repeated hash operations // This verifies the 5-20ms savings from context reuse in native_bridge_libs.go func BenchmarkContextReuse(b *testing.B) { - // Small test directory to emphasize context overhead vs I/O - testDir := "./testdata/small_dataset" + testDir := createSmallDataset(b) b.ResetTimer() for i := 0; i < b.N; i++ { @@ -25,7 +46,7 @@ func BenchmarkContextReuse(b *testing.B) { // With context reuse: ~8ms per hash // Without context reuse: ~17ms per hash (9ms overhead) func BenchmarkSequentialHashes(b *testing.B) { - testDir := "./testdata/small_dataset" + testDir := createSmallDataset(b) b.ReportAllocs() b.ResetTimer()