Add comprehensive benchmarking suite for C++ optimization targets: - tests/benchmarks/dataset_hash_bench_test.go - dirOverallSHA256Hex profiling - tests/benchmarks/queue_bench_test.go - filesystem queue profiling - tests/benchmarks/artifact_and_snapshot_bench_test.go - scanArtifacts/extractTarGz profiling - tests/unit/worker/artifacts_test.go - moved from internal/ for clean separation Add parallel Go implementation as baseline for C++ comparison: - internal/worker/data_integrity.go: dirOverallSHA256HexParallel() with worker pool - Benchmarks show 2.1x speedup (3.97ms -> 1.90ms) vs sequential Exported wrappers for testing: - ScanArtifacts() - artifact scanning - ExtractTarGz() - tar.gz extraction - DirOverallSHA256HexParallel() - parallel hashing Profiling results (Apple M2 Ultra): - dirOverallSHA256Hex: 78% syscall overhead (target for mmap C++) - rebuildIndex: 96% syscall overhead (target for binary index C++) - scanArtifacts: 87% syscall overhead (target for fast traversal C++) - extractTarGz: 95% syscall overhead (target for parallel gzip C++) Related: C++ optimization strategy in memory 5d5f0bb6
112 lines
2.5 KiB
Go
112 lines
2.5 KiB
Go
package benchmarks
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/queue"
|
|
)
|
|
|
|
// BenchmarkFilesystemQueueRebuildIndex profiles the queue index rebuild hot path.
|
|
// Called on every task add/update - walks directory, reads JSON, sorts tasks.
|
|
// Tier 1 C++ candidate for:
|
|
// - Memory-mapped JSON parsing
|
|
// - Binary index format (instead of JSON)
|
|
// - Zero-copy sorting
|
|
func BenchmarkFilesystemQueueRebuildIndex(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
q, err := queue.NewFilesystemQueue(tmpDir)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
defer q.Close()
|
|
|
|
// Seed with tasks
|
|
for i := 0; i < 100; i++ {
|
|
task := &queue.Task{
|
|
ID: "task-" + string(rune('0'+i/10)) + string(rune('0'+i%10)),
|
|
JobName: "job-" + string(rune('0'+i/10)),
|
|
Priority: int64(100 - i),
|
|
}
|
|
if err := q.AddTask(task); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
|
|
// Benchmark just the rebuild (not the full AddTask)
|
|
for i := 0; i < b.N; i++ {
|
|
// Force rebuild by adding one more task
|
|
task := &queue.Task{
|
|
ID: "bench-task-" + string(rune('0'+i%10)),
|
|
JobName: "bench-job",
|
|
Priority: int64(i),
|
|
}
|
|
if err := q.AddTask(task); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkFilesystemQueueClaimNext profiles task claiming (priority selection)
|
|
func BenchmarkFilesystemQueueClaimNext(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
q, err := queue.NewFilesystemQueue(tmpDir)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
defer q.Close()
|
|
|
|
// Seed with tasks
|
|
for i := 0; i < 100; i++ {
|
|
task := &queue.Task{
|
|
ID: "task-" + string(rune('0'+i/10)) + string(rune('0'+i%10)),
|
|
JobName: "job-" + string(rune('0'+i/10)),
|
|
Priority: int64(100 - i),
|
|
}
|
|
if err := q.AddTask(task); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
// This triggers ReadDir + JSON unmarshal + sort
|
|
_, _ = q.PeekNextTask()
|
|
}
|
|
}
|
|
|
|
// BenchmarkFilesystemQueueGetAllTasks profiles full task scan
|
|
func BenchmarkFilesystemQueueGetAllTasks(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
q, err := queue.NewFilesystemQueue(tmpDir)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
defer q.Close()
|
|
|
|
// Seed with tasks
|
|
for i := 0; i < 100; i++ {
|
|
task := &queue.Task{
|
|
ID: "task-" + string(rune('0'+i/10)) + string(rune('0'+i%10)),
|
|
JobName: "job-" + string(rune('0'+i/10)),
|
|
Priority: int64(100 - i),
|
|
}
|
|
if err := q.AddTask(task); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := q.GetAllTasks()
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|