package tests import ( "context" "fmt" "os" "path/filepath" "runtime" "testing" "time" "github.com/jfraeys/fetch_ml/internal/metrics" "github.com/jfraeys/fetch_ml/internal/storage" "github.com/jfraeys/fetch_ml/internal/telemetry" "github.com/redis/go-redis/v9" ) // setupTelemetryRedis creates a Redis client for telemetry testing func setupTelemetryRedis(t *testing.T) *redis.Client { rdb := redis.NewClient(&redis.Options{ Addr: "localhost:6379", Password: "", DB: 3, // Use DB 3 for telemetry tests to avoid conflicts }) ctx := context.Background() if err := rdb.Ping(ctx).Err(); err != nil { t.Skipf("Redis not available, skipping telemetry test: %v", err) return nil } // Clean up the test database rdb.FlushDB(ctx) t.Cleanup(func() { rdb.FlushDB(ctx) rdb.Close() }) return rdb } func TestTelemetryMetricsCollection(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupTelemetryRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test 1: Metrics Collection m := &metrics.Metrics{} // Record some task metrics m.RecordTaskStart() m.RecordTaskSuccess(100 * time.Millisecond) m.RecordTaskCompletion() m.RecordTaskStart() m.RecordTaskSuccess(200 * time.Millisecond) m.RecordTaskCompletion() m.RecordTaskStart() m.RecordTaskFailure() m.RecordTaskCompletion() m.SetQueuedTasks(5) m.RecordDataTransfer(1024*1024, 50*time.Millisecond) // 1MB // Get stats and verify stats := m.GetStats() // Verify metrics if stats["tasks_processed"] != int64(2) { t.Errorf("Expected 2 processed tasks, got %v", stats["tasks_processed"]) } if stats["tasks_failed"] != int64(1) { t.Errorf("Expected 1 failed task, got %v", stats["tasks_failed"]) } if stats["active_tasks"] != int64(0) { t.Errorf("Expected 0 active tasks, got %v", stats["active_tasks"]) } if stats["queued_tasks"] != int64(5) { t.Errorf("Expected 5 queued tasks, got %v", stats["queued_tasks"]) } // Verify success rate calculation successRate := stats["success_rate"].(float64) expectedRate := float64(2-1) / float64(2) // (processed - failed) / processed = (2-1)/2 = 0.5 if successRate != expectedRate { t.Errorf("Expected success rate %.2f, got %.2f", expectedRate, successRate) } // Verify data transfer dataTransferred := stats["data_transferred_gb"].(float64) expectedGB := float64(1024*1024) / (1024 * 1024 * 1024) // 1MB in GB if dataTransferred != expectedGB { t.Errorf("Expected data transferred %.6f GB, got %.6f GB", expectedGB, dataTransferred) } t.Logf("Metrics collected successfully: %+v", stats) } func TestTelemetryIOStats(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Skip on non-Linux systems (proc filesystem) if runtime.GOOS != "linux" { t.Skip("IO stats test requires Linux /proc filesystem") return } // Test IO stats collection before, err := telemetry.ReadProcessIO() if err != nil { t.Fatalf("Failed to read initial IO stats: %v", err) } // Perform some I/O operations testFile := filepath.Join(t.TempDir(), "io_test.txt") data := "This is test data for I/O operations\n" // Write operation err = os.WriteFile(testFile, []byte(data), 0644) if err != nil { t.Fatalf("Failed to write test file: %v", err) } // Read operation _, err = os.ReadFile(testFile) if err != nil { t.Fatalf("Failed to read test file: %v", err) } // Get IO stats after operations after, err := telemetry.ReadProcessIO() if err != nil { t.Fatalf("Failed to read final IO stats: %v", err) } // Calculate delta delta := telemetry.DiffIO(before, after) // Verify we had some I/O (should be non-zero) if delta.ReadBytes == 0 && delta.WriteBytes == 0 { t.Log("Warning: No I/O detected (this might be okay on some systems)") } else { t.Logf("I/O stats - Read: %d bytes, Write: %d bytes", delta.ReadBytes, delta.WriteBytes) } } func TestTelemetrySystemHealth(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupTelemetryRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test system health checks ctx := context.Background() // Check Redis health redisPong, err := rdb.Ping(ctx).Result() if err != nil { t.Errorf("Redis health check failed: %v", err) } else { t.Logf("Redis health check: %s", redisPong) } // Check database health testJob := &storage.Job{ ID: "health-check-job", JobName: "Health Check", Status: "pending", Priority: 0, } err = db.CreateJob(testJob) if err != nil { t.Errorf("Database health check (create) failed: %v", err) } else { // Test read _, err := db.GetJob("health-check-job") if err != nil { t.Errorf("Database health check (read) failed: %v", err) } else { t.Logf("Database health check: OK") } } // Check system resources var memStats runtime.MemStats runtime.ReadMemStats(&memStats) // Log system health metrics t.Logf("System Health Report:") t.Logf(" Memory Usage: %d bytes (%.2f MB)", memStats.Alloc, float64(memStats.Alloc)/1024/1024) t.Logf(" Goroutines: %d", runtime.NumGoroutine()) t.Logf(" GC Cycles: %d", memStats.NumGC) t.Logf(" Disk Space Available: Check passed (test directory created)") // Verify basic system health indicators if memStats.Alloc == 0 { t.Error("Memory allocation seems abnormal (zero bytes)") } if runtime.NumGoroutine() == 0 { t.Error("No goroutines running (seems abnormal for a running test)") } } func TestTelemetryMetricsIntegration(t *testing.T) { // t.Parallel() // Disable parallel to avoid conflicts // Setup test environment tempDir := t.TempDir() rdb := setupTelemetryRedis(t) if rdb == nil { return } defer rdb.Close() // Setup database db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db")) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer db.Close() // Initialize database schema schema := ` CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, metadata TEXT, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT NOT NULL, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT NOT NULL DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT NOT NULL, metric_name TEXT NOT NULL, metric_value TEXT NOT NULL, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); ` err = db.Initialize(schema) if err != nil { t.Fatalf("Failed to initialize database: %v", err) } // Test integrated metrics collection with job lifecycle m := &metrics.Metrics{} // Simulate job processing workflow for i := 0; i < 5; i++ { jobID := fmt.Sprintf("metrics-job-%d", i) // Create job in database job := &storage.Job{ ID: jobID, JobName: fmt.Sprintf("Metrics Test Job %d", i), Status: "pending", Priority: 0, } err = db.CreateJob(job) if err != nil { t.Fatalf("Failed to create job %d: %v", i, err) } // Record metrics for job processing m.RecordTaskStart() // Simulate work time.Sleep(10 * time.Millisecond) // Record job metrics in database err = db.RecordJobMetric(jobID, "cpu_usage", fmt.Sprintf("%.1f", float64(20+i*5))) if err != nil { t.Fatalf("Failed to record CPU metric for job %d: %v", i, err) } err = db.RecordJobMetric(jobID, "memory_usage", fmt.Sprintf("%.1f", float64(100+i*20))) if err != nil { t.Fatalf("Failed to record memory metric for job %d: %v", i, err) } // Complete job m.RecordTaskSuccess(10 * time.Millisecond) m.RecordTaskCompletion() err = db.UpdateJobStatus(jobID, "completed", "worker-1", "") if err != nil { t.Fatalf("Failed to update job %d status: %v", i, err) } // Simulate data transfer dataSize := int64(1024 * (i + 1)) // Increasing data sizes m.RecordDataTransfer(dataSize, 5*time.Millisecond) } // Verify metrics collection stats := m.GetStats() if stats["tasks_processed"] != int64(5) { t.Errorf("Expected 5 processed tasks, got %v", stats["tasks_processed"]) } // Verify database metrics metricsForJob, err := db.GetJobMetrics("metrics-job-2") if err != nil { t.Fatalf("Failed to get metrics for job: %v", err) } if len(metricsForJob) != 2 { t.Errorf("Expected 2 metrics for job, got %d", len(metricsForJob)) } if metricsForJob["cpu_usage"] != "30.0" { t.Errorf("Expected CPU usage 30.0, got %s", metricsForJob["cpu_usage"]) } if metricsForJob["memory_usage"] != "140.0" { t.Errorf("Expected memory usage 140.0, got %s", metricsForJob["memory_usage"]) } t.Logf("Integrated metrics test completed successfully") t.Logf("Final metrics: %+v", stats) t.Logf("Job metrics: %+v", metricsForJob) }