package tests import ( "context" "fmt" "path/filepath" "runtime" "testing" "time" "github.com/jfraeys/fetch_ml/internal/metrics" "github.com/jfraeys/fetch_ml/internal/storage" "github.com/redis/go-redis/v9" ) // setupPerformanceRedis creates a Redis client for performance testing func setupPerformanceRedis(t *testing.T) *redis.Client { rdb := redis.NewClient(&redis.Options{ Addr: "localhost:6379", Password: "", DB: 4, // Use DB 4 for performance tests to avoid conflicts }) ctx := context.Background() if err := rdb.Ping(ctx).Err(); err != nil { t.Skipf("Redis not available, skipping performance test: %v", err) return nil } // Clean up the test database rdb.FlushDB(ctx) t.Cleanup(func() { rdb.FlushDB(ctx) rdb.Close() }) return rdb } func TestPayloadPerformanceSmall(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupPerformanceRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test small payload performance numJobs := 100 payloadSize := 1024 // 1KB payloads m := &metrics.Metrics{} ctx := context.Background() start := time.Now() // Create jobs with small payloads for i := 0; i < numJobs; i++ { jobID := fmt.Sprintf("small-payload-job-%d", i) // Create small payload payload := make([]byte, payloadSize) for j := range payload { payload[j] = byte(i % 256) } job := &storage.Job{ ID: jobID, JobName: fmt.Sprintf("Small Payload Job %d", i), Status: "pending", Priority: 0, Args: string(payload), } m.RecordTaskStart() err = db.CreateJob(job) if err != nil { t.Fatalf("Failed to create job %d: %v", i, err) } m.RecordTaskCompletion() // Queue job in Redis err = rdb.LPush(ctx, "ml:queue", jobID).Err() if err != nil { t.Fatalf("Failed to queue job %d: %v", i, err) } m.RecordDataTransfer(int64(len(payload)), 0) } creationTime := time.Since(start) t.Logf("Created %d jobs with %d byte payloads in %v", numJobs, payloadSize, creationTime) // Process jobs start = time.Now() for i := 0; i < numJobs; i++ { jobID := fmt.Sprintf("small-payload-job-%d", i) // Update job status err = db.UpdateJobStatus(jobID, "completed", "worker-1", "") if err != nil { t.Fatalf("Failed to update job %d: %v", i, err) } // Record metrics err = db.RecordJobMetric(jobID, "processing_time", "100") if err != nil { t.Fatalf("Failed to record metric for job %d: %v", i, err) } // Pop from queue _, err = rdb.LPop(ctx, "ml:queue").Result() if err != nil { t.Fatalf("Failed to pop job %d: %v", i, err) } } processingTime := time.Since(start) t.Logf("Processed %d jobs in %v", numJobs, processingTime) // Performance metrics totalTime := creationTime + processingTime jobsPerSecond := float64(numJobs) / totalTime.Seconds() avgTimePerJob := totalTime / time.Duration(numJobs) t.Logf("Performance Results:") t.Logf(" Total time: %v", totalTime) t.Logf(" Jobs per second: %.2f", jobsPerSecond) t.Logf(" Average time per job: %v", avgTimePerJob) // Verify performance thresholds if jobsPerSecond < 50 { // Should handle at least 50 jobs/second for small payloads t.Errorf("Performance below threshold: %.2f jobs/sec (expected >= 50)", jobsPerSecond) } if avgTimePerJob > 20*time.Millisecond { // Should handle each job in under 20ms t.Errorf("Average job time too high: %v (expected <= 20ms)", avgTimePerJob) } stats := m.GetStats() t.Logf("Final metrics: %+v", stats) } func TestPayloadPerformanceLarge(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupPerformanceRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test large payload performance numJobs := 10 // Fewer jobs for large payloads payloadSize := 1024 * 1024 // 1MB payloads m := &metrics.Metrics{} ctx := context.Background() start := time.Now() // Create jobs with large payloads for i := 0; i < numJobs; i++ { jobID := fmt.Sprintf("large-payload-job-%d", i) // Create large payload payload := make([]byte, payloadSize) for j := range payload { payload[j] = byte(i % 256) } job := &storage.Job{ ID: jobID, JobName: fmt.Sprintf("Large Payload Job %d", i), Status: "pending", Priority: 0, Args: string(payload), } m.RecordTaskStart() err = db.CreateJob(job) if err != nil { t.Fatalf("Failed to create job %d: %v", i, err) } m.RecordTaskCompletion() // Queue job in Redis err = rdb.LPush(ctx, "ml:queue", jobID).Err() if err != nil { t.Fatalf("Failed to queue job %d: %v", i, err) } m.RecordDataTransfer(int64(len(payload)), 0) } creationTime := time.Since(start) t.Logf("Created %d jobs with %d byte payloads in %v", numJobs, payloadSize, creationTime) // Process jobs start = time.Now() for i := 0; i < numJobs; i++ { jobID := fmt.Sprintf("large-payload-job-%d", i) // Update job status err = db.UpdateJobStatus(jobID, "completed", "worker-1", "") if err != nil { t.Fatalf("Failed to update job %d: %v", i, err) } // Record metrics err = db.RecordJobMetric(jobID, "processing_time", "1000") if err != nil { t.Fatalf("Failed to record metric for job %d: %v", i, err) } // Pop from queue _, err = rdb.LPop(ctx, "ml:queue").Result() if err != nil { t.Fatalf("Failed to pop job %d: %v", i, err) } } processingTime := time.Since(start) t.Logf("Processed %d jobs in %v", numJobs, processingTime) // Performance metrics totalTime := creationTime + processingTime jobsPerSecond := float64(numJobs) / totalTime.Seconds() avgTimePerJob := totalTime / time.Duration(numJobs) dataThroughput := float64(numJobs*payloadSize) / totalTime.Seconds() / (1024 * 1024) // MB/sec t.Logf("Performance Results:") t.Logf(" Total time: %v", totalTime) t.Logf(" Jobs per second: %.2f", jobsPerSecond) t.Logf(" Average time per job: %v", avgTimePerJob) t.Logf(" Data throughput: %.2f MB/sec", dataThroughput) // Verify performance thresholds (more lenient for large payloads) if jobsPerSecond < 1 { // Should handle at least 1 job/second for large payloads t.Errorf("Performance below threshold: %.2f jobs/sec (expected >= 1)", jobsPerSecond) } if avgTimePerJob > 1*time.Second { // Should handle each large job in under 1 second t.Errorf("Average job time too high: %v (expected <= 1s)", avgTimePerJob) } if dataThroughput < 10 { // Should handle at least 10 MB/sec t.Errorf("Data throughput too low: %.2f MB/sec (expected >= 10)", dataThroughput) } stats := m.GetStats() t.Logf("Final metrics: %+v", stats) } func TestPayloadPerformanceConcurrent(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupPerformanceRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test concurrent payload performance numWorkers := 5 jobsPerWorker := 20 payloadSize := 10 * 1024 // 10KB payloads m := &metrics.Metrics{} ctx := context.Background() start := time.Now() // Create jobs concurrently done := make(chan bool, numWorkers) for worker := 0; worker < numWorkers; worker++ { go func(w int) { defer func() { done <- true }() for i := 0; i < jobsPerWorker; i++ { jobID := fmt.Sprintf("concurrent-job-w%d-i%d", w, i) // Create payload payload := make([]byte, payloadSize) for j := range payload { payload[j] = byte((w + i) % 256) } job := &storage.Job{ ID: jobID, JobName: fmt.Sprintf("Concurrent Job W%d I%d", w, i), Status: "pending", Priority: 0, Args: string(payload), } m.RecordTaskStart() err = db.CreateJob(job) if err != nil { t.Errorf("Worker %d failed to create job %d: %v", w, i, err) return } m.RecordTaskCompletion() // Queue job in Redis err = rdb.LPush(ctx, "ml:queue", jobID).Err() if err != nil { t.Errorf("Worker %d failed to queue job %d: %v", w, i, err) return } m.RecordDataTransfer(int64(len(payload)), 0) } }(worker) } // Wait for all workers to complete for i := 0; i < numWorkers; i++ { <-done } creationTime := time.Since(start) totalJobs := numWorkers * jobsPerWorker t.Logf("Created %d jobs concurrently with %d byte payloads in %v", totalJobs, payloadSize, creationTime) // Process jobs concurrently start = time.Now() for worker := 0; worker < numWorkers; worker++ { go func(w int) { defer func() { done <- true }() for i := 0; i < jobsPerWorker; i++ { jobID := fmt.Sprintf("concurrent-job-w%d-i%d", w, i) // Update job status err = db.UpdateJobStatus(jobID, "completed", fmt.Sprintf("worker-%d", w), "") if err != nil { t.Errorf("Worker %d failed to update job %d: %v", w, i, err) return } // Record metrics err = db.RecordJobMetric(jobID, "processing_time", "50") if err != nil { t.Errorf("Worker %d failed to record metric for job %d: %v", w, i, err) return } // Pop from queue _, err = rdb.LPop(ctx, "ml:queue").Result() if err != nil { t.Errorf("Worker %d failed to pop job %d: %v", w, i, err) return } } }(worker) } // Wait for all workers to complete for i := 0; i < numWorkers; i++ { <-done } processingTime := time.Since(start) t.Logf("Processed %d jobs concurrently in %v", totalJobs, processingTime) // Performance metrics totalTime := creationTime + processingTime jobsPerSecond := float64(totalJobs) / totalTime.Seconds() avgTimePerJob := totalTime / time.Duration(totalJobs) concurrencyFactor := float64(totalJobs) / float64(creationTime.Seconds()) / 50 // Relative to baseline t.Logf("Concurrent Performance Results:") t.Logf(" Total time: %v", totalTime) t.Logf(" Jobs per second: %.2f", jobsPerSecond) t.Logf(" Average time per job: %v", avgTimePerJob) t.Logf(" Concurrency factor: %.2f", concurrencyFactor) // Verify concurrent performance benefits if jobsPerSecond < 100 { // Should handle at least 100 jobs/second with concurrency t.Errorf("Concurrent performance below threshold: %.2f jobs/sec (expected >= 100)", jobsPerSecond) } if concurrencyFactor < 2.0 { // Should be at least 2x faster than sequential t.Errorf("Concurrency benefit too low: %.2fx (expected >= 2x)", concurrencyFactor) } stats := m.GetStats() t.Logf("Final metrics: %+v", stats) } func TestPayloadMemoryUsage(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupPerformanceRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test memory usage with different payload sizes payloadSizes := []int{1024, 10 * 1024, 100 * 1024, 1024 * 1024} // 1KB, 10KB, 100KB, 1MB numJobs := 10 for _, payloadSize := range payloadSizes { // Force GC to get clean memory baseline runtime.GC() var memBefore runtime.MemStats runtime.ReadMemStats(&memBefore) // Create jobs with specific payload size for i := 0; i < numJobs; i++ { jobID := fmt.Sprintf("memory-test-%d-%d", payloadSize, i) payload := make([]byte, payloadSize) for j := range payload { payload[j] = byte(i % 256) } job := &storage.Job{ ID: jobID, JobName: fmt.Sprintf("Memory Test %d", i), Status: "pending", Priority: 0, Args: string(payload), } err = db.CreateJob(job) if err != nil { t.Fatalf("Failed to create job %d: %v", i, err) } } var memAfter runtime.MemStats runtime.ReadMemStats(&memAfter) memoryUsed := memAfter.Alloc - memBefore.Alloc memoryPerJob := memoryUsed / uint64(numJobs) payloadOverhead := float64(memoryPerJob) / float64(payloadSize) t.Logf("Memory usage for %d byte payloads:", payloadSize) t.Logf(" Total memory used: %d bytes (%.2f MB)", memoryUsed, float64(memoryUsed)/1024/1024) t.Logf(" Memory per job: %d bytes", memoryPerJob) t.Logf(" Payload overhead ratio: %.2f", payloadOverhead) // Verify memory usage is reasonable (overhead should be less than 10x payload size) if payloadOverhead > 10.0 { t.Errorf("Memory overhead too high for %d byte payloads: %.2fx (expected <= 10x)", payloadSize, payloadOverhead) } // Clean up jobs for next iteration for i := 0; i < numJobs; i++ { // Note: In a real implementation, we'd need a way to delete jobs // For now, we'll just continue as the test will cleanup automatically } } }