fetch_ml/tests/benchmarks/worker_churn_bench_test.go

// Package benchmarks provides performance benchmarks for the scheduler and queue
package benchmarks_test

import (
	"fmt"
	"testing"
	"time"

	"github.com/jfraeys/fetch_ml/internal/scheduler"
	fixtures "github.com/jfraeys/fetch_ml/tests/fixtures"
)

// BenchmarkWorkerChurn measures worker connection/disconnection throughput
// This benchmarks the scheduler's ability to handle rapid worker churn
func BenchmarkWorkerChurn(b *testing.B) {
	fixture := fixtures.NewSchedulerTestFixture(b, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	// Reset timer to exclude setup
	b.ReportAllocs()
	b.ResetTimer()

	for i := 0; b.Loop(); i++ {
		workerID := fmt.Sprintf("churn-worker-%d", i)
		worker := fixtures.NewMockWorker(b, fixture.Hub, workerID)
		worker.Register(scheduler.WorkerCapabilities{GPUCount: 0})
		worker.Close()
	}
}

// BenchmarkWorkerChurnParallel measures concurrent worker churn
func BenchmarkWorkerChurnParallel(b *testing.B) {
	fixture := fixtures.NewSchedulerTestFixture(b, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	b.ReportAllocs()
	b.RunParallel(func(pb *testing.PB) {
		i := 0
		for pb.Next() {
			workerID := fmt.Sprintf("parallel-worker-%d", b.N, i)
			worker := fixtures.NewMockWorker(b, fixture.Hub, workerID)
			worker.Register(scheduler.WorkerCapabilities{GPUCount: 0})
			worker.Close()
			i++
		}
	})
}

// BenchmarkWorkerChurnWithHeartbeat measures churn with active heartbeats
func BenchmarkWorkerChurnWithHeartbeat(b *testing.B) {
	fixture := fixtures.NewSchedulerTestFixture(b, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	b.ReportAllocs()

	for i := 0; b.Loop(); i++ {
		workerID := fmt.Sprintf("hb-worker-%d", i)
		worker := fixtures.NewMockWorker(b, fixture.Hub, workerID)
		worker.Register(scheduler.WorkerCapabilities{GPUCount: 0})

		// Send a few heartbeats before disconnecting
		for range 3 {
			worker.SendHeartbeat(scheduler.SlotStatus{
				BatchTotal: 4,
				BatchInUse: 0,
			})
			time.Sleep(10 * time.Millisecond)
		}

		worker.Close()
	}
}

// BenchmarkWorkerChurnLargeBatch measures batch worker registration/disconnection
func BenchmarkWorkerChurnLargeBatch(b *testing.B) {
	batchSizes := []int{10, 50, 100, 500}

	for _, batchSize := range batchSizes {
		b.Run(fmt.Sprintf("batch-%d", batchSize), func(b *testing.B) {
			fixture := fixtures.NewSchedulerTestFixture(b, fixtures.DefaultHubConfig())
			defer fixture.Cleanup()

			b.ReportAllocs()
			b.ResetTimer()

			for i := 0; b.Loop(); i++ {
				workers := make([]*fixtures.MockWorker, batchSize)

				// Register all workers
				for j := 0; j < batchSize; j++ {
					workerID := fmt.Sprintf("batch-worker-%d-%d", i, j)
					workers[j] = fixtures.NewMockWorker(b, fixture.Hub, workerID)
					workers[j].Register(scheduler.WorkerCapabilities{GPUCount: 0})
				}

				// Disconnect all workers
				for _, w := range workers {
					w.Close()
				}
			}

			// Report connections per second
			b.ReportMetric(float64(batchSize), "workers/op")
		})
	}
}

// BenchmarkMemoryAllocs tracks memory allocations during worker operations
func BenchmarkMemoryAllocs(b *testing.B) {
	fixture := fixtures.NewSchedulerTestFixture(b, fixtures.DefaultHubConfig())
	defer fixture.Cleanup()

	b.ReportAllocs()

	for i := 0; b.Loop(); i++ {
		workerID := fmt.Sprintf("alloc-worker-%d", i)
		worker := fixtures.NewMockWorker(b, fixture.Hub, workerID)
		worker.Register(scheduler.WorkerCapabilities{GPUCount: 0})
		worker.SendHeartbeat(scheduler.SlotStatus{
			BatchTotal: 4,
			BatchInUse: 0,
		})
		worker.Close()
	}
}