package benchmarks import ( "archive/tar" "bytes" "compress/gzip" "os" "path/filepath" "strings" "testing" "github.com/jfraeys/fetch_ml/internal/worker" ) // BenchmarkExtractTarGzGo profiles Go sequential tar.gz extraction func BenchmarkExtractTarGzGo(b *testing.B) { tmpDir := b.TempDir() archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024) // 100 files, 1KB each b.ReportAllocs() for i := 0; b.Loop(); i++ { dstDir := filepath.Join(tmpDir, "extract_go_"+string(rune('0'+i%10))) if err := os.MkdirAll(dstDir, 0750); err != nil { b.Fatal(err) } if err := worker.ExtractTarGz(archivePath, dstDir); err != nil { b.Fatal(err) } } } // BenchmarkExtractTarGzNative profiles C++ parallel decompression // Uses: mmap + thread pool + O_DIRECT for large files func BenchmarkExtractTarGzNative(b *testing.B) { tmpDir := b.TempDir() archivePath := createStreamingTestArchive(b, tmpDir, 100, 1024) b.ReportAllocs() for i := 0; b.Loop(); i++ { dstDir := filepath.Join(tmpDir, "extract_native_"+string(rune('0'+i%10))) if err := os.MkdirAll(dstDir, 0750); err != nil { b.Fatal(err) } if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil { if strings.Contains(err.Error(), "native tar.gz extractor requires") { b.Skip("Native tar.gz extractor not available: ", err) } b.Fatal(err) } } } // BenchmarkExtractTarGzSizes tests different archive sizes func BenchmarkExtractTarGzSizes(b *testing.B) { tmpDir := b.TempDir() // Small: 10 files, 1KB each (~10KB compressed) b.Run("Small", func(b *testing.B) { archivePath := createStreamingTestArchive(b, tmpDir, 10, 1024) benchmarkBoth(b, archivePath, tmpDir) }) // Medium: 100 files, 10KB each (~1MB compressed) b.Run("Medium", func(b *testing.B) { archivePath := createStreamingTestArchive(b, tmpDir, 100, 10240) benchmarkBoth(b, archivePath, tmpDir) }) // Large: 50 files, 100KB each (~5MB compressed) b.Run("Large", func(b *testing.B) { archivePath := createStreamingTestArchive(b, tmpDir, 50, 102400) benchmarkBoth(b, archivePath, tmpDir) }) } func benchmarkBoth(b *testing.B, archivePath, tmpDir string) { b.Run("Go", func(b *testing.B) { b.ReportAllocs() for i := 0; b.Loop(); i++ { dstDir := filepath.Join(tmpDir, "go_"+string(rune('0'+i%10))) if err := os.MkdirAll(dstDir, 0750); err != nil { b.Fatal(err) } if err := worker.ExtractTarGz(archivePath, dstDir); err != nil { b.Fatal(err) } } }) b.Run("Native", func(b *testing.B) { b.ReportAllocs() for i := 0; b.Loop(); i++ { dstDir := filepath.Join(tmpDir, "native_"+string(rune('0'+i%10))) if err := os.MkdirAll(dstDir, 0750); err != nil { b.Fatal(err) } if err := worker.ExtractTarGzNative(archivePath, dstDir); err != nil { if strings.Contains(err.Error(), "native tar.gz extractor requires") { b.Skip("Native tar.gz extractor not available: ", err) } b.Fatal(err) } } }) } // createStreamingTestArchive creates a tar.gz archive with test files for streaming benchmarks func createStreamingTestArchive(b testing.TB, tmpDir string, numFiles, fileSize int) string { b.Helper() // Create data directory with test files dataDir := filepath.Join(tmpDir, "data_"+string(rune('0'+numFiles/10))) if err := os.MkdirAll(dataDir, 0750); err != nil { b.Fatal(err) } for i := 0; i < numFiles; i++ { subdir := filepath.Join(dataDir, "subdir_"+string(rune('0'+i%5))) if err := os.MkdirAll(subdir, 0750); err != nil { b.Fatal(err) } filename := filepath.Join(subdir, "file_"+string(rune('0'+i/10))+".bin") data := make([]byte, fileSize) for j := range data { data[j] = byte((i + j) % 256) } if err := os.WriteFile(filename, data, 0640); err != nil { b.Fatal(err) } } // Create tar.gz archive archivePath := filepath.Join(tmpDir, "test_"+string(rune('0'+numFiles/10))+".tar.gz") if err := createTarGzFromDir(dataDir, archivePath); err != nil { b.Fatal(err) } return archivePath } // createTarGzFromDir creates a tar.gz archive from a directory func createTarGzFromDir(srcDir, dstPath string) error { var buf bytes.Buffer gw := gzip.NewWriter(&buf) tw := tar.NewWriter(gw) err := filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err } rel, err := filepath.Rel(srcDir, path) if err != nil { return err } hdr, err := tar.FileInfoHeader(info, "") if err != nil { return err } hdr.Name = rel if err := tw.WriteHeader(hdr); err != nil { return err } if !info.IsDir() { data, err := os.ReadFile(path) if err != nil { return err } if _, err := tw.Write(data); err != nil { return err } } return nil }) if err != nil { return err } if err := tw.Close(); err != nil { return err } if err := gw.Close(); err != nil { return err } return os.WriteFile(dstPath, buf.Bytes(), 0640) }