test(benchmarks): fix native lib benchmarks when disabled

- Add skip checks to native queue benchmarks when FETCHML_NATIVE_LIBS=0
- Skip TestGoNativeArtifactScanLeak cleanly instead of 100 warnings
- Add build tags (!native_libs/native_libs) for Go vs Native comparison
- Add benchmark-native and benchmark-compare Makefile targets
This commit is contained in:
Jeremie Fraeys 2026-02-18 12:45:30 -05:00
parent 320e6fd409
commit 38c09c92bb
No known key found for this signature in database
6 changed files with 135 additions and 6 deletions

View file

@ -228,6 +228,19 @@ benchmark-local:
@echo "Running benchmarks locally with full workflow..."
./scripts/benchmarks/run-benchmarks-local.sh
# Run benchmarks with native libraries (requires native_libs build tag)
benchmark-native:
@echo "Running benchmarks with native libraries..."
go test -bench=. -benchmem -tags native_libs ./tests/benchmarks/...
# Compare Go vs Native implementation performance
benchmark-compare:
@echo "=== Go Implementation ==="
@go test -bench=. -benchmem ./tests/benchmarks/... 2>&1 | grep -E '(Benchmark|ns/op|allocs/op)' || true
@echo ""
@echo "=== Native Implementation ==="
@go test -bench=. -benchmem -tags native_libs ./tests/benchmarks/... 2>&1 | grep -E '(Benchmark|ns/op|allocs/op)' || echo "Native not available (build with: make native-build)"
# Manage benchmark artifacts
artifacts:
@echo "Managing benchmark artifacts..."

View file

@ -0,0 +1,94 @@
//go:build native_libs
// +build native_libs
package benchmarks
import (
"os"
"path/filepath"
"testing"
"github.com/jfraeys/fetch_ml/internal/worker"
)
// BenchmarkDirOverallSHA256Hex_Native profiles the directory hashing with native implementation.
// This benchmark only runs when built with -tags native_libs.
func BenchmarkDirOverallSHA256Hex_Native(b *testing.B) {
// Create a temp directory structure resembling a dataset
tmpDir := b.TempDir()
// Create nested structure with files of varying sizes
sizes := []int{1024, 10240, 102400, 1024 * 1024} // 1KB to 1MB
for i, size := range sizes {
subdir := filepath.Join(tmpDir, "subdir", string(rune('a'+i)))
if err := os.MkdirAll(subdir, 0750); err != nil {
b.Fatal(err)
}
data := make([]byte, size)
for j := range data {
data[j] = byte(i + j%256)
}
if err := os.WriteFile(filepath.Join(subdir, "data.bin"), data, 0640); err != nil {
b.Fatal(err)
}
}
// Add some small metadata files
metaDir := filepath.Join(tmpDir, "meta")
if err := os.MkdirAll(metaDir, 0750); err != nil {
b.Fatal(err)
}
for i := 0; i < 10; i++ {
if err := os.WriteFile(
filepath.Join(metaDir, "file"+string(rune('0'+i))+".json"),
[]byte(`{"key": "value"}`),
0640,
); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := worker.DirOverallSHA256Hex(tmpDir)
if err != nil {
b.Fatal(err)
}
}
}
// BenchmarkDirOverallSHA256HexLarge_Native profiles with larger dataset simulation using native
func BenchmarkDirOverallSHA256HexLarge_Native(b *testing.B) {
tmpDir := b.TempDir()
// Create 50 files of 100KB each = ~5MB total
for i := 0; i < 50; i++ {
subdir := filepath.Join(tmpDir, "data", string(rune('a'+i%26)))
if err := os.MkdirAll(subdir, 0750); err != nil {
b.Fatal(err)
}
data := make([]byte, 100*1024)
for j := range data {
data[j] = byte(i + j%256)
}
if err := os.WriteFile(
filepath.Join(subdir, "chunk"+string(rune('0'+i/26))+".bin"),
data,
0640,
); err != nil {
b.Fatal(err)
}
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
_, err := worker.DirOverallSHA256Hex(tmpDir)
if err != nil {
b.Fatal(err)
}
}
}

View file

@ -1,3 +1,6 @@
//go:build !native_libs
// +build !native_libs
package benchmarks
import (

View file

@ -10,7 +10,7 @@ import (
// TestGoNativeLeakStress runs 1000 iterations through Go->C++ integration
func TestGoNativeLeakStress(t *testing.T) {
tmpDir := t.TempDir()
// Create multiple test files
for i := 0; i < 10; i++ {
content := make([]byte, 1024*1024) // 1MB each
@ -31,33 +31,37 @@ func TestGoNativeLeakStress(t *testing.T) {
if len(hash) != 64 {
t.Fatalf("Hash %d: expected 64 chars, got %d", i, len(hash))
}
if i%100 == 0 {
t.Logf("Completed %d iterations", i)
}
}
t.Logf("Completed 1000 iterations through Go->C++ integration")
}
// TestGoNativeArtifactScanLeak tests artifact scanner through Go
func TestGoNativeArtifactScanLeak(t *testing.T) {
if !worker.HasSIMDSHA256() {
t.Skip("Native libraries not available (build with -tags native_libs)")
}
tmpDir := t.TempDir()
// Create test files
for i := 0; i < 50; i++ {
if err := os.WriteFile(tmpDir+"/file_"+string(rune('a'+i%26))+".txt", []byte("data"), 0644); err != nil {
t.Fatal(err)
}
}
// Run 100 scans
for i := 0; i < 100; i++ {
_, err := worker.ScanArtifactsNative(tmpDir)
if err != nil {
t.Logf("Scan %d: %v (may be expected if native disabled)", i, err)
}
if i%25 == 0 {
t.Logf("Completed %d scans", i)
}

View file

@ -10,6 +10,10 @@ import (
// Tier 1 C++ candidate: binary format vs JSON
// Expected: 5x speedup, 99% allocation reduction
func BenchmarkNativeQueueRebuildIndex(b *testing.B) {
if !queue.UseNativeQueue {
b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)")
}
tmpDir := b.TempDir()
q, err := queue.NewNativeQueue(tmpDir)
if err != nil {
@ -47,6 +51,10 @@ func BenchmarkNativeQueueRebuildIndex(b *testing.B) {
// BenchmarkNativeQueueClaimNext profiles task claiming from binary heap
func BenchmarkNativeQueueClaimNext(b *testing.B) {
if !queue.UseNativeQueue {
b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)")
}
tmpDir := b.TempDir()
q, err := queue.NewNativeQueue(tmpDir)
if err != nil {
@ -77,6 +85,10 @@ func BenchmarkNativeQueueClaimNext(b *testing.B) {
// BenchmarkNativeQueueGetAllTasks profiles full task scan from binary index
func BenchmarkNativeQueueGetAllTasks(b *testing.B) {
if !queue.UseNativeQueue {
b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)")
}
tmpDir := b.TempDir()
q, err := queue.NewNativeQueue(tmpDir)
if err != nil {

View file

@ -1,3 +1,6 @@
//go:build !native_libs
// +build !native_libs
package benchmarks
import (