fetch_ml/tests/benchmarks/streaming_io_bench_test.go
Jeremie Fraeys be67cb77d3
test(benchmarks): update benchmark tests with job cleanup and improvements
**Payload Performance Test:**
- Add job cleanup after each iteration using DeleteJob()
- Ensure isolated memory measurements between test runs

**All Benchmark Tests:**
- General improvements and maintenance updates
2026-02-23 18:03:54 -05:00

194 lines
4.8 KiB
Go

package benchmarks
import (
"archive/tar"
"bytes"
"compress/gzip"
"os"
"path/filepath"
"strings"
"testing"
"github.com/jfraeys/fetch_ml/internal/worker"
)
// BenchmarkExtractTarGzGo profiles Go sequential tar.gz extraction
func BenchmarkExtractTarGzGo(b *testing.B) {
tmpDir := b.TempDir()
archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024) // 100 files, 1KB each
b.ReportAllocs()
for i := 0; b.Loop(); i++ {
dstDir := filepath.Join(tmpDir, "extract_go_"+string(rune('0'+i%10)))
if err := os.MkdirAll(dstDir, 0750); err != nil {
b.Fatal(err)
}
if err := worker.ExtractTarGz(archivePath, dstDir); err != nil {
b.Fatal(err)
}
}
}
// BenchmarkExtractTarGzNative profiles C++ parallel decompression
// Uses: mmap + thread pool + O_DIRECT for large files
func BenchmarkExtractTarGzNative(b *testing.B) {
tmpDir := b.TempDir()
archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024)
b.ReportAllocs()
for i := 0; b.Loop(); i++ {
dstDir := filepath.Join(tmpDir, "extract_native_"+string(rune('0'+i%10)))
if err := os.MkdirAll(dstDir, 0750); err != nil {
b.Fatal(err)
}
if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil {
if strings.Contains(err.Error(), "native tar.gz extractor requires") {
b.Skip("Native tar.gz extractor not available: ", err)
}
b.Fatal(err)
}
}
}
// BenchmarkExtractTarGzSizes tests different archive sizes
func BenchmarkExtractTarGzSizes(b *testing.B) {
tmpDir := b.TempDir()
// Small: 10 files, 1KB each (~10KB compressed)
b.Run("Small", func(b *testing.B) {
archivePath := createStreamingTestArchive(b, tmpDir, 10, 1024)
benchmarkBoth(b, archivePath, tmpDir)
})
// Medium: 100 files, 10KB each (~1MB compressed)
b.Run("Medium", func(b *testing.B) {
archivePath := createStreamingTestArchive(b, tmpDir, 100, 10240)
benchmarkBoth(b, archivePath, tmpDir)
})
// Large: 50 files, 100KB each (~5MB compressed)
b.Run("Large", func(b *testing.B) {
archivePath := createStreamingTestArchive(b, tmpDir, 50, 102400)
benchmarkBoth(b, archivePath, tmpDir)
})
}
func benchmarkBoth(b *testing.B, archivePath, tmpDir string) {
b.Run("Go", func(b *testing.B) {
b.ReportAllocs()
for i := 0; b.Loop(); i++ {
dstDir := filepath.Join(tmpDir, "go_"+string(rune('0'+i%10)))
if err := os.MkdirAll(dstDir, 0750); err != nil {
b.Fatal(err)
}
if err := worker.ExtractTarGz(archivePath, dstDir); err != nil {
b.Fatal(err)
}
}
})
b.Run("Native", func(b *testing.B) {
b.ReportAllocs()
for i := 0; b.Loop(); i++ {
dstDir := filepath.Join(tmpDir, "native_"+string(rune('0'+i%10)))
if err := os.MkdirAll(dstDir, 0750); err != nil {
b.Fatal(err)
}
if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil {
if strings.Contains(err.Error(), "native tar.gz extractor requires") {
b.Skip("Native tar.gz extractor not available: ", err)
}
b.Fatal(err)
}
}
})
}
// createStreamingTestArchive creates a tar.gz archive with test files for streaming benchmarks
func createStreamingTestArchive(b testing.TB, tmpDir string, numFiles, fileSize int) string {
b.Helper()
// Create data directory with test files
dataDir := filepath.Join(tmpDir, "data_"+string(rune('0'+numFiles/10)))
if err := os.MkdirAll(dataDir, 0750); err != nil {
b.Fatal(err)
}
for i := 0; i < numFiles; i++ {
subdir := filepath.Join(dataDir, "subdir_"+string(rune('0'+i%5)))
if err := os.MkdirAll(subdir, 0750); err != nil {
b.Fatal(err)
}
filename := filepath.Join(subdir, "file_"+string(rune('0'+i/10))+".bin")
data := make([]byte, fileSize)
for j := range data {
data[j] = byte((i + j) % 256)
}
if err := os.WriteFile(filename, data, 0640); err != nil {
b.Fatal(err)
}
}
// Create tar.gz archive
archivePath := filepath.Join(tmpDir, "test_"+string(rune('0'+numFiles/10))+".tar.gz")
if err := createTarGzFromDir(dataDir, archivePath); err != nil {
b.Fatal(err)
}
return archivePath
}
// createTarGzFromDir creates a tar.gz archive from a directory
func createTarGzFromDir(srcDir, dstPath string) error {
var buf bytes.Buffer
gw := gzip.NewWriter(&buf)
tw := tar.NewWriter(gw)
err := filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
rel, err := filepath.Rel(srcDir, path)
if err != nil {
return err
}
hdr, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
hdr.Name = rel
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if !info.IsDir() {
data, err := os.ReadFile(path)
if err != nil {
return err
}
if _, err := tw.Write(data); err != nil {
return err
}
}
return nil
})
if err != nil {
return err
}
if err := tw.Close(); err != nil {
return err
}
if err := gw.Close(); err != nil {
return err
}
return os.WriteFile(dstPath, buf.Bytes(), 0640)
}