package benchmarks import ( "os" "path/filepath" "strings" "testing" "github.com/jfraeys/fetch_ml/internal/worker" ) // BenchmarkArtifactScanGo profiles Go filepath.WalkDir implementation func BenchmarkArtifactScanGo(b *testing.B) { tmpDir := b.TempDir() // Create test artifact structure createTestArtifacts(b, tmpDir, 100) b.ReportAllocs() for b.Loop() { _, err := worker.ScanArtifacts(tmpDir, false, nil) if err != nil { b.Fatal(err) } } } // BenchmarkArtifactScanNative profiles C++ platform-optimized traversal // Uses: fts on BSD, getdents64 on Linux, getattrlistbulk on macOS func BenchmarkArtifactScanNative(b *testing.B) { tmpDir := b.TempDir() // Create test artifact structure createTestArtifacts(b, tmpDir, 100) b.ReportAllocs() for b.Loop() { _, err := worker.ScanArtifactsNative(tmpDir) if err != nil { if strings.Contains(err.Error(), "native artifact scanner requires") { b.Skip("Native artifact scanner not available: ", err) } b.Fatal(err) } } } // BenchmarkArtifactScanLarge tests with many files func BenchmarkArtifactScanLarge(b *testing.B) { tmpDir := b.TempDir() // Create 1000 test files createTestArtifacts(b, tmpDir, 1000) b.Run("Go", func(b *testing.B) { b.ReportAllocs() for b.Loop() { _, err := worker.ScanArtifacts(tmpDir, false, nil) if err != nil { b.Fatal(err) } } }) b.Run("Native", func(b *testing.B) { b.ReportAllocs() for b.Loop() { _, err := worker.ScanArtifactsNative(tmpDir) if err != nil { if strings.Contains(err.Error(), "native artifact scanner requires") { b.Skip("Native artifact scanner not available: ", err) } b.Fatal(err) } } }) } // createTestArtifacts creates a directory structure with test files func createTestArtifacts(b testing.TB, root string, count int) { b.Helper() // Create nested directories dirs := []string{ "outputs", "outputs/models", "outputs/checkpoints", "logs", "data", } for _, dir := range dirs { if err := os.MkdirAll(filepath.Join(root, dir), 0750); err != nil { b.Fatal(err) } } // Create test files for i := range count { var path string switch i % 5 { case 0: path = filepath.Join(root, "outputs", "model_"+string(rune('0'+i%10))+".pt") case 1: path = filepath.Join(root, "outputs", "models", "checkpoint_"+string(rune('0'+i%10))+".ckpt") case 2: path = filepath.Join(root, "outputs", "checkpoints", "epoch_"+string(rune('0'+i%10))+".pt") case 3: path = filepath.Join(root, "logs", "train_"+string(rune('0'+i%10))+".log") case 4: path = filepath.Join(root, "data", "batch_"+string(rune('0'+i%10))+".npy") } data := make([]byte, 1024*(i%10+1)) // Varying sizes 1KB-10KB for j := range data { data[j] = byte(i + j%256) } if err := os.WriteFile(path, data, 0640); err != nil { b.Fatal(err) } } // Create files that should be excluded os.WriteFile(filepath.Join(root, "run_manifest.json"), []byte("{}"), 0640) os.MkdirAll(filepath.Join(root, "code"), 0750) os.WriteFile(filepath.Join(root, "code", "script.py"), []byte("# test"), 0640) }