fetch_ml/tests/integration/telemetry_integration_test.go
Jeremie Fraeys c980167041 test: implement comprehensive test suite with multiple test types
- Add end-to-end tests for complete workflow validation
- Include integration tests for API and database interactions
- Add unit tests for all major components and utilities
- Include performance tests for payload handling
- Add CLI API integration tests
- Include Podman container integration tests
- Add WebSocket and queue execution tests
- Include shell script tests for setup validation

Provides comprehensive test coverage ensuring platform reliability
and functionality across all components and interactions.
2025-12-04 16:55:13 -05:00

452 lines
12 KiB
Go

package tests
import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/jfraeys/fetch_ml/internal/metrics"
"github.com/jfraeys/fetch_ml/internal/storage"
"github.com/jfraeys/fetch_ml/internal/telemetry"
"github.com/redis/go-redis/v9"
)
// setupTelemetryRedis creates a Redis client for telemetry testing
func setupTelemetryRedis(t *testing.T) *redis.Client {
rdb := redis.NewClient(&redis.Options{
Addr: "localhost:6379",
Password: "",
DB: 3, // Use DB 3 for telemetry tests to avoid conflicts
})
ctx := context.Background()
if err := rdb.Ping(ctx).Err(); err != nil {
t.Skipf("Redis not available, skipping telemetry test: %v", err)
return nil
}
// Clean up the test database
rdb.FlushDB(ctx)
t.Cleanup(func() {
rdb.FlushDB(ctx)
rdb.Close()
})
return rdb
}
func TestTelemetryMetricsCollection(t *testing.T) {
// t.Parallel() // Disable parallel to avoid conflicts
// Setup test environment
tempDir := t.TempDir()
rdb := setupTelemetryRedis(t)
if rdb == nil {
return
}
defer rdb.Close()
// Setup database
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
if err != nil {
t.Fatalf("Failed to create database: %v", err)
}
defer db.Close()
// Initialize database schema
schema := `
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY,
job_name TEXT NOT NULL,
args TEXT,
status TEXT NOT NULL DEFAULT 'pending',
priority INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
started_at DATETIME,
ended_at DATETIME,
worker_id TEXT,
error TEXT,
datasets TEXT,
metadata TEXT,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS workers (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
status TEXT NOT NULL DEFAULT 'active',
current_jobs INTEGER DEFAULT 0,
max_jobs INTEGER DEFAULT 1,
metadata TEXT
);
CREATE TABLE IF NOT EXISTS job_metrics (
job_id TEXT NOT NULL,
metric_name TEXT NOT NULL,
metric_value TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (job_id, metric_name),
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
);
`
err = db.Initialize(schema)
if err != nil {
t.Fatalf("Failed to initialize database: %v", err)
}
// Test 1: Metrics Collection
m := &metrics.Metrics{}
// Record some task metrics
m.RecordTaskStart()
m.RecordTaskSuccess(100 * time.Millisecond)
m.RecordTaskCompletion()
m.RecordTaskStart()
m.RecordTaskSuccess(200 * time.Millisecond)
m.RecordTaskCompletion()
m.RecordTaskStart()
m.RecordTaskFailure()
m.RecordTaskCompletion()
m.SetQueuedTasks(5)
m.RecordDataTransfer(1024*1024, 50*time.Millisecond) // 1MB
// Get stats and verify
stats := m.GetStats()
// Verify metrics
if stats["tasks_processed"] != int64(2) {
t.Errorf("Expected 2 processed tasks, got %v", stats["tasks_processed"])
}
if stats["tasks_failed"] != int64(1) {
t.Errorf("Expected 1 failed task, got %v", stats["tasks_failed"])
}
if stats["active_tasks"] != int64(0) {
t.Errorf("Expected 0 active tasks, got %v", stats["active_tasks"])
}
if stats["queued_tasks"] != int64(5) {
t.Errorf("Expected 5 queued tasks, got %v", stats["queued_tasks"])
}
// Verify success rate calculation
successRate := stats["success_rate"].(float64)
expectedRate := float64(2-1) / float64(2) // (processed - failed) / processed = (2-1)/2 = 0.5
if successRate != expectedRate {
t.Errorf("Expected success rate %.2f, got %.2f", expectedRate, successRate)
}
// Verify data transfer
dataTransferred := stats["data_transferred_gb"].(float64)
expectedGB := float64(1024*1024) / (1024 * 1024 * 1024) // 1MB in GB
if dataTransferred != expectedGB {
t.Errorf("Expected data transferred %.6f GB, got %.6f GB", expectedGB, dataTransferred)
}
t.Logf("Metrics collected successfully: %+v", stats)
}
func TestTelemetryIOStats(t *testing.T) {
// t.Parallel() // Disable parallel to avoid conflicts
// Skip on non-Linux systems (proc filesystem)
if runtime.GOOS != "linux" {
t.Skip("IO stats test requires Linux /proc filesystem")
return
}
// Test IO stats collection
before, err := telemetry.ReadProcessIO()
if err != nil {
t.Fatalf("Failed to read initial IO stats: %v", err)
}
// Perform some I/O operations
testFile := filepath.Join(t.TempDir(), "io_test.txt")
data := "This is test data for I/O operations\n"
// Write operation
err = os.WriteFile(testFile, []byte(data), 0644)
if err != nil {
t.Fatalf("Failed to write test file: %v", err)
}
// Read operation
_, err = os.ReadFile(testFile)
if err != nil {
t.Fatalf("Failed to read test file: %v", err)
}
// Get IO stats after operations
after, err := telemetry.ReadProcessIO()
if err != nil {
t.Fatalf("Failed to read final IO stats: %v", err)
}
// Calculate delta
delta := telemetry.DiffIO(before, after)
// Verify we had some I/O (should be non-zero)
if delta.ReadBytes == 0 && delta.WriteBytes == 0 {
t.Log("Warning: No I/O detected (this might be okay on some systems)")
} else {
t.Logf("I/O stats - Read: %d bytes, Write: %d bytes", delta.ReadBytes, delta.WriteBytes)
}
}
func TestTelemetrySystemHealth(t *testing.T) {
// t.Parallel() // Disable parallel to avoid conflicts
// Setup test environment
tempDir := t.TempDir()
rdb := setupTelemetryRedis(t)
if rdb == nil {
return
}
defer rdb.Close()
// Setup database
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
if err != nil {
t.Fatalf("Failed to create database: %v", err)
}
defer db.Close()
// Initialize database schema
schema := `
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY,
job_name TEXT NOT NULL,
args TEXT,
status TEXT NOT NULL DEFAULT 'pending',
priority INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
started_at DATETIME,
ended_at DATETIME,
worker_id TEXT,
error TEXT,
datasets TEXT,
metadata TEXT,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS workers (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
status TEXT NOT NULL DEFAULT 'active',
current_jobs INTEGER DEFAULT 0,
max_jobs INTEGER DEFAULT 1,
metadata TEXT
);
CREATE TABLE IF NOT EXISTS job_metrics (
job_id TEXT NOT NULL,
metric_name TEXT NOT NULL,
metric_value TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (job_id, metric_name),
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
);
`
err = db.Initialize(schema)
if err != nil {
t.Fatalf("Failed to initialize database: %v", err)
}
// Test system health checks
ctx := context.Background()
// Check Redis health
redisPong, err := rdb.Ping(ctx).Result()
if err != nil {
t.Errorf("Redis health check failed: %v", err)
} else {
t.Logf("Redis health check: %s", redisPong)
}
// Check database health
testJob := &storage.Job{
ID: "health-check-job",
JobName: "Health Check",
Status: "pending",
Priority: 0,
}
err = db.CreateJob(testJob)
if err != nil {
t.Errorf("Database health check (create) failed: %v", err)
} else {
// Test read
_, err := db.GetJob("health-check-job")
if err != nil {
t.Errorf("Database health check (read) failed: %v", err)
} else {
t.Logf("Database health check: OK")
}
}
// Check system resources
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// Log system health metrics
t.Logf("System Health Report:")
t.Logf(" Memory Usage: %d bytes (%.2f MB)", memStats.Alloc, float64(memStats.Alloc)/1024/1024)
t.Logf(" Goroutines: %d", runtime.NumGoroutine())
t.Logf(" GC Cycles: %d", memStats.NumGC)
t.Logf(" Disk Space Available: Check passed (test directory created)")
// Verify basic system health indicators
if memStats.Alloc == 0 {
t.Error("Memory allocation seems abnormal (zero bytes)")
}
if runtime.NumGoroutine() == 0 {
t.Error("No goroutines running (seems abnormal for a running test)")
}
}
func TestTelemetryMetricsIntegration(t *testing.T) {
// t.Parallel() // Disable parallel to avoid conflicts
// Setup test environment
tempDir := t.TempDir()
rdb := setupTelemetryRedis(t)
if rdb == nil {
return
}
defer rdb.Close()
// Setup database
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
if err != nil {
t.Fatalf("Failed to create database: %v", err)
}
defer db.Close()
// Initialize database schema
schema := `
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY,
job_name TEXT NOT NULL,
args TEXT,
status TEXT NOT NULL DEFAULT 'pending',
priority INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
started_at DATETIME,
ended_at DATETIME,
worker_id TEXT,
error TEXT,
datasets TEXT,
metadata TEXT,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS workers (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
status TEXT NOT NULL DEFAULT 'active',
current_jobs INTEGER DEFAULT 0,
max_jobs INTEGER DEFAULT 1,
metadata TEXT
);
CREATE TABLE IF NOT EXISTS job_metrics (
job_id TEXT NOT NULL,
metric_name TEXT NOT NULL,
metric_value TEXT NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (job_id, metric_name),
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
);
`
err = db.Initialize(schema)
if err != nil {
t.Fatalf("Failed to initialize database: %v", err)
}
// Test integrated metrics collection with job lifecycle
m := &metrics.Metrics{}
// Simulate job processing workflow
for i := 0; i < 5; i++ {
jobID := fmt.Sprintf("metrics-job-%d", i)
// Create job in database
job := &storage.Job{
ID: jobID,
JobName: fmt.Sprintf("Metrics Test Job %d", i),
Status: "pending",
Priority: 0,
}
err = db.CreateJob(job)
if err != nil {
t.Fatalf("Failed to create job %d: %v", i, err)
}
// Record metrics for job processing
m.RecordTaskStart()
// Simulate work
time.Sleep(10 * time.Millisecond)
// Record job metrics in database
err = db.RecordJobMetric(jobID, "cpu_usage", fmt.Sprintf("%.1f", float64(20+i*5)))
if err != nil {
t.Fatalf("Failed to record CPU metric for job %d: %v", i, err)
}
err = db.RecordJobMetric(jobID, "memory_usage", fmt.Sprintf("%.1f", float64(100+i*20)))
if err != nil {
t.Fatalf("Failed to record memory metric for job %d: %v", i, err)
}
// Complete job
m.RecordTaskSuccess(10 * time.Millisecond)
m.RecordTaskCompletion()
err = db.UpdateJobStatus(jobID, "completed", "worker-1", "")
if err != nil {
t.Fatalf("Failed to update job %d status: %v", i, err)
}
// Simulate data transfer
dataSize := int64(1024 * (i + 1)) // Increasing data sizes
m.RecordDataTransfer(dataSize, 5*time.Millisecond)
}
// Verify metrics collection
stats := m.GetStats()
if stats["tasks_processed"] != int64(5) {
t.Errorf("Expected 5 processed tasks, got %v", stats["tasks_processed"])
}
// Verify database metrics
metricsForJob, err := db.GetJobMetrics("metrics-job-2")
if err != nil {
t.Fatalf("Failed to get metrics for job: %v", err)
}
if len(metricsForJob) != 2 {
t.Errorf("Expected 2 metrics for job, got %d", len(metricsForJob))
}
if metricsForJob["cpu_usage"] != "30.0" {
t.Errorf("Expected CPU usage 30.0, got %s", metricsForJob["cpu_usage"])
}
if metricsForJob["memory_usage"] != "140.0" {
t.Errorf("Expected memory usage 140.0, got %s", metricsForJob["memory_usage"])
}
t.Logf("Integrated metrics test completed successfully")
t.Logf("Final metrics: %+v", stats)
t.Logf("Job metrics: %+v", metricsForJob)
}