fetch_ml/tests/benchmarks/dataset_size_comparison_test.go
Jeremie Fraeys be67cb77d3
test(benchmarks): update benchmark tests with job cleanup and improvements
**Payload Performance Test:**
- Add job cleanup after each iteration using DeleteJob()
- Ensure isolated memory measurements between test runs

**All Benchmark Tests:**
- General improvements and maintenance updates
2026-02-23 18:03:54 -05:00

80 lines
2.1 KiB
Go

package benchmarks
import (
"os"
"path/filepath"
"strings"
"testing"
"github.com/jfraeys/fetch_ml/internal/worker"
"github.com/jfraeys/fetch_ml/internal/worker/integrity"
)
// BenchmarkDatasetSizeComparison finds the crossover point where native wins
// Run with: go test -tags native_libs -bench=BenchmarkDatasetSize ./tests/benchmarks/
func BenchmarkDatasetSizeComparison(b *testing.B) {
sizes := []struct {
name string
fileSize int
numFiles int
totalMB int
}{
{"100MB", 10 * 1024 * 1024, 10, 100}, // 10 x 10MB = 100MB
{"500MB", 50 * 1024 * 1024, 10, 500}, // 10 x 50MB = 500MB
{"1GB", 100 * 1024 * 1024, 10, 1000}, // 10 x 100MB = 1GB
{"2GB", 100 * 1024 * 1024, 20, 2000}, // 20 x 100MB = 2GB
{"5GB", 100 * 1024 * 1024, 50, 5000}, // 50 x 100MB = 5GB
}
for _, tc := range sizes {
tc := tc // capture range variable
b.Run(tc.name+"/GoParallel", func(b *testing.B) {
tmpDir := b.TempDir()
createTestFiles(b, tmpDir, tc.numFiles, tc.fileSize)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := integrity.DirOverallSHA256HexParallel(tmpDir)
if err != nil {
b.Fatal(err)
}
}
})
b.Run(tc.name+"/Native", func(b *testing.B) {
tmpDir := b.TempDir()
createTestFiles(b, tmpDir, tc.numFiles, tc.fileSize)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Use DirOverallSHA256HexNative which calls native C++ implementation
_, err := worker.DirOverallSHA256HexNative(tmpDir)
if err != nil {
if strings.Contains(err.Error(), "native hash requires") {
b.Skip("Native hash not available: ", err)
}
b.Fatal(err)
}
}
})
}
}
func createTestFiles(b *testing.B, dir string, numFiles int, fileSize int) {
data := make([]byte, fileSize)
for i := range data {
data[i] = byte(i % 256)
}
for i := range numFiles {
path := filepath.Join(dir, "data", string(rune('a'+i%26)), "chunk.bin")
if err := os.MkdirAll(filepath.Dir(path), 0750); err != nil {
b.Fatal(err)
}
if err := os.WriteFile(path, data, 0640); err != nil {
b.Fatal(err)
}
}
}