//go:build !native_libs // +build !native_libs package benchmarks import ( "os" "path/filepath" "testing" "github.com/jfraeys/fetch_ml/internal/worker" ) // BenchmarkDirOverallSHA256Hex profiles the directory hashing hot path. // This function walks directories, sorts files, and computes SHA256 hashes. // It's a Tier 1 candidate for C++ optimization via: // - Memory-mapped file reads // - Parallel hashing // - SIMD SHA256 (Intel SHA extensions or ARMv8 crypto) func BenchmarkDirOverallSHA256Hex(b *testing.B) { // Create a temp directory structure resembling a dataset tmpDir := b.TempDir() // Create nested structure with files of varying sizes sizes := []int{1024, 10240, 102400, 1024 * 1024} // 1KB to 1MB for i, size := range sizes { subdir := filepath.Join(tmpDir, "subdir", string(rune('a'+i))) if err := os.MkdirAll(subdir, 0750); err != nil { b.Fatal(err) } data := make([]byte, size) for j := range data { data[j] = byte(i + j%256) } if err := os.WriteFile(filepath.Join(subdir, "data.bin"), data, 0640); err != nil { b.Fatal(err) } } // Add some small metadata files metaDir := filepath.Join(tmpDir, "meta") if err := os.MkdirAll(metaDir, 0750); err != nil { b.Fatal(err) } for i := range 10 { if err := os.WriteFile( filepath.Join(metaDir, "file"+string(rune('0'+i))+".json"), []byte(`{"key": "value"}`), 0640, ); err != nil { b.Fatal(err) } } b.ResetTimer() b.ReportAllocs() for b.Loop() { _, err := worker.DirOverallSHA256Hex(tmpDir) if err != nil { b.Fatal(err) } } } // BenchmarkDirOverallSHA256HexLarge profiles with larger dataset simulation func BenchmarkDirOverallSHA256HexLarge(b *testing.B) { tmpDir := b.TempDir() // Create 50 files of 100KB each = ~5MB total for i := range 50 { subdir := filepath.Join(tmpDir, "data", string(rune('a'+i%26))) if err := os.MkdirAll(subdir, 0750); err != nil { b.Fatal(err) } data := make([]byte, 100*1024) for j := range data { data[j] = byte(i + j%256) } if err := os.WriteFile( filepath.Join(subdir, "chunk"+string(rune('0'+i/26))+".bin"), data, 0640, ); err != nil { b.Fatal(err) } } b.Run("Sequential", func(b *testing.B) { b.ReportAllocs() for b.Loop() { _, err := worker.DirOverallSHA256Hex(tmpDir) if err != nil { b.Fatal(err) } } }) b.Run("ParallelGo", func(b *testing.B) { b.ReportAllocs() for b.Loop() { _, err := worker.DirOverallSHA256Hex(tmpDir) if err != nil { b.Fatal(err) } } }) b.Run("Native", func(b *testing.B) { // This requires -tags native_libs to actually use native // Otherwise falls back to Go implementation b.ReportAllocs() for b.Loop() { _, err := worker.DirOverallSHA256Hex(tmpDir) if err != nil { b.Fatal(err) } } }) }