package benchmarks import ( "os" "path/filepath" "strings" "testing" "github.com/jfraeys/fetch_ml/internal/worker" "github.com/jfraeys/fetch_ml/internal/worker/integrity" ) // BenchmarkDatasetSizeComparison finds the crossover point where native wins // Run with: go test -tags native_libs -bench=BenchmarkDatasetSize ./tests/benchmarks/ func BenchmarkDatasetSizeComparison(b *testing.B) { sizes := []struct { name string fileSize int numFiles int totalMB int }{ {"100MB", 10 * 1024 * 1024, 10, 100}, // 10 x 10MB = 100MB {"500MB", 50 * 1024 * 1024, 10, 500}, // 10 x 50MB = 500MB {"1GB", 100 * 1024 * 1024, 10, 1000}, // 10 x 100MB = 1GB {"2GB", 100 * 1024 * 1024, 20, 2000}, // 20 x 100MB = 2GB {"5GB", 100 * 1024 * 1024, 50, 5000}, // 50 x 100MB = 5GB } for _, tc := range sizes { tc := tc // capture range variable b.Run(tc.name+"/GoParallel", func(b *testing.B) { tmpDir := b.TempDir() createTestFiles(b, tmpDir, tc.numFiles, tc.fileSize) b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { _, err := integrity.DirOverallSHA256HexParallel(tmpDir) if err != nil { b.Fatal(err) } } }) b.Run(tc.name+"/Native", func(b *testing.B) { tmpDir := b.TempDir() createTestFiles(b, tmpDir, tc.numFiles, tc.fileSize) b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { // Use DirOverallSHA256HexNative which calls native C++ implementation _, err := worker.DirOverallSHA256HexNative(tmpDir) if err != nil { if strings.Contains(err.Error(), "native hash requires") { b.Skip("Native hash not available: ", err) } b.Fatal(err) } } }) } } func createTestFiles(b *testing.B, dir string, numFiles int, fileSize int) { data := make([]byte, fileSize) for i := range data { data[i] = byte(i % 256) } for i := range numFiles { path := filepath.Join(dir, "data", string(rune('a'+i%26)), "chunk.bin") if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil { b.Fatal(err) } if err := os.WriteFile(path, data, 0640); err != nil { b.Fatal(err) } } }