diff --git a/Makefile b/Makefile index 1a4ec78..3276797 100644 --- a/Makefile +++ b/Makefile @@ -228,6 +228,19 @@ benchmark-local: @echo "Running benchmarks locally with full workflow..." ./scripts/benchmarks/run-benchmarks-local.sh +# Run benchmarks with native libraries (requires native_libs build tag) +benchmark-native: + @echo "Running benchmarks with native libraries..." + go test -bench=. -benchmem -tags native_libs ./tests/benchmarks/... + +# Compare Go vs Native implementation performance +benchmark-compare: + @echo "=== Go Implementation ===" + @go test -bench=. -benchmem ./tests/benchmarks/... 2>&1 | grep -E '(Benchmark|ns/op|allocs/op)' || true + @echo "" + @echo "=== Native Implementation ===" + @go test -bench=. -benchmem -tags native_libs ./tests/benchmarks/... 2>&1 | grep -E '(Benchmark|ns/op|allocs/op)' || echo "Native not available (build with: make native-build)" + # Manage benchmark artifacts artifacts: @echo "Managing benchmark artifacts..." diff --git a/tests/benchmarks/dataset_hash_bench_native_test.go b/tests/benchmarks/dataset_hash_bench_native_test.go new file mode 100644 index 0000000..c88a25a --- /dev/null +++ b/tests/benchmarks/dataset_hash_bench_native_test.go @@ -0,0 +1,94 @@ +//go:build native_libs +// +build native_libs + +package benchmarks + +import ( + "os" + "path/filepath" + "testing" + + "github.com/jfraeys/fetch_ml/internal/worker" +) + +// BenchmarkDirOverallSHA256Hex_Native profiles the directory hashing with native implementation. +// This benchmark only runs when built with -tags native_libs. +func BenchmarkDirOverallSHA256Hex_Native(b *testing.B) { + // Create a temp directory structure resembling a dataset + tmpDir := b.TempDir() + + // Create nested structure with files of varying sizes + sizes := []int{1024, 10240, 102400, 1024 * 1024} // 1KB to 1MB + for i, size := range sizes { + subdir := filepath.Join(tmpDir, "subdir", string(rune('a'+i))) + if err := os.MkdirAll(subdir, 0750); err != nil { + b.Fatal(err) + } + data := make([]byte, size) + for j := range data { + data[j] = byte(i + j%256) + } + if err := os.WriteFile(filepath.Join(subdir, "data.bin"), data, 0640); err != nil { + b.Fatal(err) + } + } + + // Add some small metadata files + metaDir := filepath.Join(tmpDir, "meta") + if err := os.MkdirAll(metaDir, 0750); err != nil { + b.Fatal(err) + } + for i := 0; i < 10; i++ { + if err := os.WriteFile( + filepath.Join(metaDir, "file"+string(rune('0'+i))+".json"), + []byte(`{"key": "value"}`), + 0640, + ); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + _, err := worker.DirOverallSHA256Hex(tmpDir) + if err != nil { + b.Fatal(err) + } + } +} + +// BenchmarkDirOverallSHA256HexLarge_Native profiles with larger dataset simulation using native +func BenchmarkDirOverallSHA256HexLarge_Native(b *testing.B) { + tmpDir := b.TempDir() + + // Create 50 files of 100KB each = ~5MB total + for i := 0; i < 50; i++ { + subdir := filepath.Join(tmpDir, "data", string(rune('a'+i%26))) + if err := os.MkdirAll(subdir, 0750); err != nil { + b.Fatal(err) + } + data := make([]byte, 100*1024) + for j := range data { + data[j] = byte(i + j%256) + } + if err := os.WriteFile( + filepath.Join(subdir, "chunk"+string(rune('0'+i/26))+".bin"), + data, + 0640, + ); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + _, err := worker.DirOverallSHA256Hex(tmpDir) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/tests/benchmarks/dataset_hash_bench_test.go b/tests/benchmarks/dataset_hash_bench_test.go index 0b2e4da..f42dffd 100644 --- a/tests/benchmarks/dataset_hash_bench_test.go +++ b/tests/benchmarks/dataset_hash_bench_test.go @@ -1,3 +1,6 @@ +//go:build !native_libs +// +build !native_libs + package benchmarks import ( diff --git a/tests/benchmarks/go_native_leak_test.go b/tests/benchmarks/go_native_leak_test.go index dc41932..81d15b2 100644 --- a/tests/benchmarks/go_native_leak_test.go +++ b/tests/benchmarks/go_native_leak_test.go @@ -10,7 +10,7 @@ import ( // TestGoNativeLeakStress runs 1000 iterations through Go->C++ integration func TestGoNativeLeakStress(t *testing.T) { tmpDir := t.TempDir() - + // Create multiple test files for i := 0; i < 10; i++ { content := make([]byte, 1024*1024) // 1MB each @@ -31,33 +31,37 @@ func TestGoNativeLeakStress(t *testing.T) { if len(hash) != 64 { t.Fatalf("Hash %d: expected 64 chars, got %d", i, len(hash)) } - + if i%100 == 0 { t.Logf("Completed %d iterations", i) } } - + t.Logf("Completed 1000 iterations through Go->C++ integration") } // TestGoNativeArtifactScanLeak tests artifact scanner through Go func TestGoNativeArtifactScanLeak(t *testing.T) { + if !worker.HasSIMDSHA256() { + t.Skip("Native libraries not available (build with -tags native_libs)") + } + tmpDir := t.TempDir() - + // Create test files for i := 0; i < 50; i++ { if err := os.WriteFile(tmpDir+"/file_"+string(rune('a'+i%26))+".txt", []byte("data"), 0644); err != nil { t.Fatal(err) } } - + // Run 100 scans for i := 0; i < 100; i++ { _, err := worker.ScanArtifactsNative(tmpDir) if err != nil { t.Logf("Scan %d: %v (may be expected if native disabled)", i, err) } - + if i%25 == 0 { t.Logf("Completed %d scans", i) } diff --git a/tests/benchmarks/native_queue_bench_test.go b/tests/benchmarks/native_queue_bench_test.go index fa0aeae..6b7652d 100644 --- a/tests/benchmarks/native_queue_bench_test.go +++ b/tests/benchmarks/native_queue_bench_test.go @@ -10,6 +10,10 @@ import ( // Tier 1 C++ candidate: binary format vs JSON // Expected: 5x speedup, 99% allocation reduction func BenchmarkNativeQueueRebuildIndex(b *testing.B) { + if !queue.UseNativeQueue { + b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)") + } + tmpDir := b.TempDir() q, err := queue.NewNativeQueue(tmpDir) if err != nil { @@ -47,6 +51,10 @@ func BenchmarkNativeQueueRebuildIndex(b *testing.B) { // BenchmarkNativeQueueClaimNext profiles task claiming from binary heap func BenchmarkNativeQueueClaimNext(b *testing.B) { + if !queue.UseNativeQueue { + b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)") + } + tmpDir := b.TempDir() q, err := queue.NewNativeQueue(tmpDir) if err != nil { @@ -77,6 +85,10 @@ func BenchmarkNativeQueueClaimNext(b *testing.B) { // BenchmarkNativeQueueGetAllTasks profiles full task scan from binary index func BenchmarkNativeQueueGetAllTasks(b *testing.B) { + if !queue.UseNativeQueue { + b.Skip("Native queue not enabled (set FETCHML_NATIVE_LIBS=1 or build with -tags native_libs)") + } + tmpDir := b.TempDir() q, err := queue.NewNativeQueue(tmpDir) if err != nil { diff --git a/tests/benchmarks/queue_bench_test.go b/tests/benchmarks/queue_bench_test.go index 59f9b7d..640371c 100644 --- a/tests/benchmarks/queue_bench_test.go +++ b/tests/benchmarks/queue_bench_test.go @@ -1,3 +1,6 @@ +//go:build !native_libs +// +build !native_libs + package benchmarks import (