fix: context reuse benchmark uses temp directory

- Replace hardcoded testdata path with b.TempDir()
- Add createSmallDataset helper for self-contained benchmarks
- Fixes FAIL: BenchmarkContextReuse / BenchmarkSequentialHashes
This commit is contained in:
Jeremie Fraeys 2026-02-21 14:38:00 -05:00
parent 5f8e7c59a5
commit e557313e08
No known key found for this signature in database
2 changed files with 30 additions and 3 deletions

View file

@ -77,3 +77,9 @@ func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
func ExtractTarGzNative(archivePath, dstDir string) error {
return ExtractTarGz(archivePath, dstDir)
}
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
// This allows explicit native library usage when -tags native_libs is enabled.
func DirOverallSHA256HexNative(root string) (string, error) {
return dirOverallSHA256HexNative(root)
}

View file

@ -1,16 +1,37 @@
package benchmarks
import (
"os"
"path/filepath"
"testing"
"github.com/jfraeys/fetch_ml/internal/worker"
)
// createSmallDataset creates a temporary small dataset for benchmarking
func createSmallDataset(b *testing.B) string {
tmpDir := b.TempDir()
// Create 10 small files (100KB each) to emphasize context overhead
data := make([]byte, 100*1024)
for i := range data {
data[i] = byte(i % 256)
}
for i := range 10 {
path := filepath.Join(tmpDir, "data", string(rune('a'+i%26)), "chunk.bin")
if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil {
b.Fatal(err)
}
if err := os.WriteFile(path, data, 0640); err != nil {
b.Fatal(err)
}
}
return tmpDir
}
// BenchmarkContextReuse measures overhead of repeated hash operations
// This verifies the 5-20ms savings from context reuse in native_bridge_libs.go
func BenchmarkContextReuse(b *testing.B) {
// Small test directory to emphasize context overhead vs I/O
testDir := "./testdata/small_dataset"
testDir := createSmallDataset(b)
b.ResetTimer()
for i := 0; i < b.N; i++ {
@ -25,7 +46,7 @@ func BenchmarkContextReuse(b *testing.B) {
// With context reuse: ~8ms per hash
// Without context reuse: ~17ms per hash (9ms overhead)
func BenchmarkSequentialHashes(b *testing.B) {
testDir := "./testdata/small_dataset"
testDir := createSmallDataset(b)
b.ReportAllocs()
b.ResetTimer()