**Payload Performance Test:** - Add job cleanup after each iteration using DeleteJob() - Ensure isolated memory measurements between test runs **All Benchmark Tests:** - General improvements and maintenance updates
194 lines
4.8 KiB
Go
194 lines
4.8 KiB
Go
package benchmarks
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"compress/gzip"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/worker"
|
|
)
|
|
|
|
// BenchmarkExtractTarGzGo profiles Go sequential tar.gz extraction
|
|
func BenchmarkExtractTarGzGo(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024) // 100 files, 1KB each
|
|
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; b.Loop(); i++ {
|
|
dstDir := filepath.Join(tmpDir, "extract_go_"+string(rune('0'+i%10)))
|
|
if err := os.MkdirAll(dstDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := worker.ExtractTarGz(archivePath, dstDir); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkExtractTarGzNative profiles C++ parallel decompression
|
|
// Uses: mmap + thread pool + O_DIRECT for large files
|
|
func BenchmarkExtractTarGzNative(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024)
|
|
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; b.Loop(); i++ {
|
|
dstDir := filepath.Join(tmpDir, "extract_native_"+string(rune('0'+i%10)))
|
|
if err := os.MkdirAll(dstDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil {
|
|
if strings.Contains(err.Error(), "native tar.gz extractor requires") {
|
|
b.Skip("Native tar.gz extractor not available: ", err)
|
|
}
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkExtractTarGzSizes tests different archive sizes
|
|
func BenchmarkExtractTarGzSizes(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
|
|
// Small: 10 files, 1KB each (~10KB compressed)
|
|
b.Run("Small", func(b *testing.B) {
|
|
archivePath := createStreamingTestArchive(b, tmpDir, 10, 1024)
|
|
benchmarkBoth(b, archivePath, tmpDir)
|
|
})
|
|
|
|
// Medium: 100 files, 10KB each (~1MB compressed)
|
|
b.Run("Medium", func(b *testing.B) {
|
|
archivePath := createStreamingTestArchive(b, tmpDir, 100, 10240)
|
|
benchmarkBoth(b, archivePath, tmpDir)
|
|
})
|
|
|
|
// Large: 50 files, 100KB each (~5MB compressed)
|
|
b.Run("Large", func(b *testing.B) {
|
|
archivePath := createStreamingTestArchive(b, tmpDir, 50, 102400)
|
|
benchmarkBoth(b, archivePath, tmpDir)
|
|
})
|
|
}
|
|
|
|
func benchmarkBoth(b *testing.B, archivePath, tmpDir string) {
|
|
b.Run("Go", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
for i := 0; b.Loop(); i++ {
|
|
dstDir := filepath.Join(tmpDir, "go_"+string(rune('0'+i%10)))
|
|
if err := os.MkdirAll(dstDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := worker.ExtractTarGz(archivePath, dstDir); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
})
|
|
|
|
b.Run("Native", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
for i := 0; b.Loop(); i++ {
|
|
dstDir := filepath.Join(tmpDir, "native_"+string(rune('0'+i%10)))
|
|
if err := os.MkdirAll(dstDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil {
|
|
if strings.Contains(err.Error(), "native tar.gz extractor requires") {
|
|
b.Skip("Native tar.gz extractor not available: ", err)
|
|
}
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
// createStreamingTestArchive creates a tar.gz archive with test files for streaming benchmarks
|
|
func createStreamingTestArchive(b testing.TB, tmpDir string, numFiles, fileSize int) string {
|
|
b.Helper()
|
|
|
|
// Create data directory with test files
|
|
dataDir := filepath.Join(tmpDir, "data_"+string(rune('0'+numFiles/10)))
|
|
if err := os.MkdirAll(dataDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
for i := 0; i < numFiles; i++ {
|
|
subdir := filepath.Join(dataDir, "subdir_"+string(rune('0'+i%5)))
|
|
if err := os.MkdirAll(subdir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
filename := filepath.Join(subdir, "file_"+string(rune('0'+i/10))+".bin")
|
|
data := make([]byte, fileSize)
|
|
for j := range data {
|
|
data[j] = byte((i + j) % 256)
|
|
}
|
|
if err := os.WriteFile(filename, data, 0640); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Create tar.gz archive
|
|
archivePath := filepath.Join(tmpDir, "test_"+string(rune('0'+numFiles/10))+".tar.gz")
|
|
if err := createTarGzFromDir(dataDir, archivePath); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
return archivePath
|
|
}
|
|
|
|
// createTarGzFromDir creates a tar.gz archive from a directory
|
|
func createTarGzFromDir(srcDir, dstPath string) error {
|
|
var buf bytes.Buffer
|
|
gw := gzip.NewWriter(&buf)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
err := filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
rel, err := filepath.Rel(srcDir, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
hdr, err := tar.FileInfoHeader(info, "")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
hdr.Name = rel
|
|
|
|
if err := tw.WriteHeader(hdr); err != nil {
|
|
return err
|
|
}
|
|
|
|
if !info.IsDir() {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, err := tw.Write(data); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := tw.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := gw.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return os.WriteFile(dstPath, buf.Bytes(), 0640)
|
|
}
|