fetch_ml/tests/benchmarks/artifact_scanner_bench_test.go

package benchmarks

import (
	"os"
	"path/filepath"
	"testing"

	"github.com/jfraeys/fetch_ml/internal/worker"
)

// BenchmarkArtifactScanGo profiles Go filepath.WalkDir implementation
func BenchmarkArtifactScanGo(b *testing.B) {
	tmpDir := b.TempDir()

	// Create test artifact structure
	createTestArtifacts(b, tmpDir, 100)

	b.ReportAllocs()

	for b.Loop() {
		_, err := worker.ScanArtifacts(tmpDir, false)
		if err != nil {
			b.Fatal(err)
		}
	}
}

// BenchmarkArtifactScanNative profiles C++ platform-optimized traversal
// Uses: fts on BSD, getdents64 on Linux, getattrlistbulk on macOS
func BenchmarkArtifactScanNative(b *testing.B) {
	tmpDir := b.TempDir()

	// Create test artifact structure
	createTestArtifacts(b, tmpDir, 100)

	b.ReportAllocs()

	for b.Loop() {
		_, err := worker.ScanArtifactsNative(tmpDir)
		if err != nil {
			b.Fatal(err)
		}
	}
}

// BenchmarkArtifactScanLarge tests with many files
func BenchmarkArtifactScanLarge(b *testing.B) {
	tmpDir := b.TempDir()

	// Create 1000 test files
	createTestArtifacts(b, tmpDir, 1000)

	b.Run("Go", func(b *testing.B) {
		b.ReportAllocs()
		for b.Loop() {
			_, err := worker.ScanArtifacts(tmpDir, false)
			if err != nil {
				b.Fatal(err)
			}
		}
	})

	b.Run("Native", func(b *testing.B) {
		b.ReportAllocs()
		for b.Loop() {
			_, err := worker.ScanArtifactsNative(tmpDir)
			if err != nil {
				b.Fatal(err)
			}
		}
	})
}

// createTestArtifacts creates a directory structure with test files
func createTestArtifacts(b testing.TB, root string, count int) {
	b.Helper()

	// Create nested directories
	dirs := []string{
		"outputs",
		"outputs/models",
		"outputs/checkpoints",
		"logs",
		"data",
	}

	for _, dir := range dirs {
		if err := os.MkdirAll(filepath.Join(root, dir), 0750); err != nil {
			b.Fatal(err)
		}
	}

	// Create test files
	for i := range count {
		var path string
		switch i % 5 {
		case 0:
			path = filepath.Join(root, "outputs", "model_"+string(rune('0'+i%10))+".pt")
		case 1:
			path = filepath.Join(root, "outputs", "models", "checkpoint_"+string(rune('0'+i%10))+".ckpt")
		case 2:
			path = filepath.Join(root, "outputs", "checkpoints", "epoch_"+string(rune('0'+i%10))+".pt")
		case 3:
			path = filepath.Join(root, "logs", "train_"+string(rune('0'+i%10))+".log")
		case 4:
			path = filepath.Join(root, "data", "batch_"+string(rune('0'+i%10))+".npy")
		}

		data := make([]byte, 1024*(i%10+1)) // Varying sizes 1KB-10KB
		for j := range data {
			data[j] = byte(i + j%256)
		}

		if err := os.WriteFile(path, data, 0640); err != nil {
			b.Fatal(err)
		}
	}

	// Create files that should be excluded
	os.WriteFile(filepath.Join(root, "run_manifest.json"), []byte("{}"), 0640)
	os.MkdirAll(filepath.Join(root, "code"), 0750)
	os.WriteFile(filepath.Join(root, "code", "script.py"), []byte("# test"), 0640)
}