- Fix YAML tags in auth config struct (json -> yaml) - Update CLI configs to use pre-hashed API keys - Remove double hashing in WebSocket client - Fix port mapping (9102 -> 9103) in CLI commands - Update permission keys to use jobs:read, jobs:create, etc. - Clean up all debug logging from CLI and server - All user roles now authenticate correctly: * Admin: Can queue jobs and see all jobs * Researcher: Can queue jobs and see own jobs * Analyst: Can see status (read-only access) Multi-user authentication is now fully functional.
591 lines
16 KiB
Go
591 lines
16 KiB
Go
package benchmarks
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"path/filepath"
|
|
"runtime"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/metrics"
|
|
"github.com/jfraeys/fetch_ml/internal/storage"
|
|
fixtures "github.com/jfraeys/fetch_ml/tests/fixtures"
|
|
"github.com/redis/go-redis/v9"
|
|
)
|
|
|
|
// setupPerformanceRedis creates a Redis client for performance testing
|
|
func setupPerformanceRedis(t *testing.T) *redis.Client {
|
|
rdb := redis.NewClient(&redis.Options{
|
|
Addr: "localhost:6379",
|
|
Password: "",
|
|
DB: 4, // Use DB 4 for performance tests to avoid conflicts
|
|
})
|
|
|
|
ctx := context.Background()
|
|
if err := rdb.Ping(ctx).Err(); err != nil {
|
|
t.Skipf("Redis not available, skipping performance test: %v", err)
|
|
return nil
|
|
}
|
|
|
|
// Clean up the test database
|
|
rdb.FlushDB(ctx)
|
|
|
|
t.Cleanup(func() {
|
|
rdb.FlushDB(ctx)
|
|
defer func() { _ = rdb.Close() }()
|
|
})
|
|
|
|
return rdb
|
|
}
|
|
|
|
func TestPayloadPerformanceSmall(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupPerformanceRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := fixtures.TestSchema
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test small payload performance
|
|
numJobs := 100
|
|
payloadSize := 1024 // 1KB payloads
|
|
|
|
m := &metrics.Metrics{}
|
|
ctx := context.Background()
|
|
|
|
start := time.Now()
|
|
|
|
// Create jobs with small payloads
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("small-payload-job-%d", i)
|
|
|
|
// Create small payload
|
|
payload := make([]byte, payloadSize)
|
|
for j := range payload {
|
|
payload[j] = byte(i % 256)
|
|
}
|
|
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: fmt.Sprintf("Small Payload Job %d", i),
|
|
Status: "pending",
|
|
Priority: 0,
|
|
Args: string(payload),
|
|
}
|
|
|
|
m.RecordTaskStart()
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job %d: %v", i, err)
|
|
}
|
|
m.RecordTaskCompletion()
|
|
|
|
// Queue job in Redis
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to queue job %d: %v", i, err)
|
|
}
|
|
|
|
m.RecordDataTransfer(int64(len(payload)), 0)
|
|
}
|
|
|
|
creationTime := time.Since(start)
|
|
t.Logf("Created %d jobs with %d byte payloads in %v", numJobs, payloadSize, creationTime)
|
|
|
|
// Process jobs
|
|
start = time.Now()
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("small-payload-job-%d", i)
|
|
|
|
// Update job status
|
|
err = db.UpdateJobStatus(jobID, "completed", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job %d: %v", i, err)
|
|
}
|
|
|
|
// Record metrics
|
|
err = db.RecordJobMetric(jobID, "processing_time", "100")
|
|
if err != nil {
|
|
t.Fatalf("Failed to record metric for job %d: %v", i, err)
|
|
}
|
|
|
|
// Pop from queue
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Fatalf("Failed to pop job %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
processingTime := time.Since(start)
|
|
t.Logf("Processed %d jobs in %v", numJobs, processingTime)
|
|
|
|
// Performance metrics
|
|
totalTime := creationTime + processingTime
|
|
jobsPerSecond := float64(numJobs) / totalTime.Seconds()
|
|
avgTimePerJob := totalTime / time.Duration(numJobs)
|
|
|
|
t.Logf("Performance Results:")
|
|
t.Logf(" Total time: %v", totalTime)
|
|
t.Logf(" Jobs per second: %.2f", jobsPerSecond)
|
|
t.Logf(" Average time per job: %v", avgTimePerJob)
|
|
|
|
// Verify performance thresholds
|
|
if jobsPerSecond < 50 { // Should handle at least 50 jobs/second for small payloads
|
|
t.Errorf("Performance below threshold: %.2f jobs/sec (expected >= 50)", jobsPerSecond)
|
|
}
|
|
|
|
if avgTimePerJob > 20*time.Millisecond { // Should handle each job in under 20ms
|
|
t.Errorf("Average job time too high: %v (expected <= 20ms)", avgTimePerJob)
|
|
}
|
|
|
|
stats := m.GetStats()
|
|
t.Logf("Final metrics: %+v", stats)
|
|
}
|
|
|
|
func TestPayloadPerformanceLarge(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupPerformanceRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := fixtures.TestSchema
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test large payload performance
|
|
numJobs := 10 // Fewer jobs for large payloads
|
|
payloadSize := 1024 * 1024 // 1MB payloads
|
|
|
|
m := &metrics.Metrics{}
|
|
ctx := context.Background()
|
|
|
|
start := time.Now()
|
|
|
|
// Create jobs with large payloads
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("large-payload-job-%d", i)
|
|
|
|
// Create large payload
|
|
payload := make([]byte, payloadSize)
|
|
for j := range payload {
|
|
payload[j] = byte(i % 256)
|
|
}
|
|
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: fmt.Sprintf("Large Payload Job %d", i),
|
|
Status: "pending",
|
|
Priority: 0,
|
|
Args: string(payload),
|
|
}
|
|
|
|
m.RecordTaskStart()
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job %d: %v", i, err)
|
|
}
|
|
m.RecordTaskCompletion()
|
|
|
|
// Queue job in Redis
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to queue job %d: %v", i, err)
|
|
}
|
|
|
|
m.RecordDataTransfer(int64(len(payload)), 0)
|
|
}
|
|
|
|
creationTime := time.Since(start)
|
|
t.Logf("Created %d jobs with %d byte payloads in %v", numJobs, payloadSize, creationTime)
|
|
|
|
// Process jobs
|
|
start = time.Now()
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("large-payload-job-%d", i)
|
|
|
|
// Update job status
|
|
err = db.UpdateJobStatus(jobID, "completed", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job %d: %v", i, err)
|
|
}
|
|
|
|
// Record metrics
|
|
err = db.RecordJobMetric(jobID, "processing_time", "1000")
|
|
if err != nil {
|
|
t.Fatalf("Failed to record metric for job %d: %v", i, err)
|
|
}
|
|
|
|
// Pop from queue
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Fatalf("Failed to pop job %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
processingTime := time.Since(start)
|
|
t.Logf("Processed %d jobs in %v", numJobs, processingTime)
|
|
|
|
// Performance metrics
|
|
totalTime := creationTime + processingTime
|
|
jobsPerSecond := float64(numJobs) / totalTime.Seconds()
|
|
avgTimePerJob := totalTime / time.Duration(numJobs)
|
|
dataThroughput := float64(numJobs*payloadSize) / totalTime.Seconds() / (1024 * 1024) // MB/sec
|
|
|
|
t.Logf("Performance Results:")
|
|
t.Logf(" Total time: %v", totalTime)
|
|
t.Logf(" Jobs per second: %.2f", jobsPerSecond)
|
|
t.Logf(" Average time per job: %v", avgTimePerJob)
|
|
t.Logf(" Data throughput: %.2f MB/sec", dataThroughput)
|
|
|
|
// Verify performance thresholds (more lenient for large payloads)
|
|
if jobsPerSecond < 1 { // Should handle at least 1 job/second for large payloads
|
|
t.Errorf("Performance below threshold: %.2f jobs/sec (expected >= 1)", jobsPerSecond)
|
|
}
|
|
|
|
if avgTimePerJob > 1*time.Second { // Should handle each large job in under 1 second
|
|
t.Errorf("Average job time too high: %v (expected <= 1s)", avgTimePerJob)
|
|
}
|
|
|
|
if dataThroughput < 10 { // Should handle at least 10 MB/sec
|
|
t.Errorf("Data throughput too low: %.2f MB/sec (expected >= 10)", dataThroughput)
|
|
}
|
|
|
|
stats := m.GetStats()
|
|
t.Logf("Final metrics: %+v", stats)
|
|
}
|
|
|
|
func TestPayloadPerformanceConcurrent(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupPerformanceRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := `
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id TEXT PRIMARY KEY,
|
|
job_name TEXT NOT NULL,
|
|
args TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
ended_at DATETIME,
|
|
worker_id TEXT,
|
|
error TEXT,
|
|
datasets TEXT,
|
|
metadata TEXT,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE TABLE IF NOT EXISTS workers (
|
|
id TEXT PRIMARY KEY,
|
|
hostname TEXT NOT NULL,
|
|
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
current_jobs INTEGER DEFAULT 0,
|
|
max_jobs INTEGER DEFAULT 1,
|
|
metadata TEXT
|
|
);
|
|
CREATE TABLE IF NOT EXISTS job_metrics (
|
|
job_id TEXT NOT NULL,
|
|
metric_name TEXT NOT NULL,
|
|
metric_value TEXT NOT NULL,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (job_id, metric_name),
|
|
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
`
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test concurrent payload performance
|
|
numWorkers := 5
|
|
jobsPerWorker := 20
|
|
payloadSize := 10 * 1024 // 10KB payloads
|
|
|
|
m := &metrics.Metrics{}
|
|
ctx := context.Background()
|
|
|
|
start := time.Now()
|
|
|
|
// Create jobs concurrently
|
|
done := make(chan bool, numWorkers)
|
|
for worker := 0; worker < numWorkers; worker++ {
|
|
go func(w int) {
|
|
defer func() { done <- true }()
|
|
|
|
for i := 0; i < jobsPerWorker; i++ {
|
|
jobID := fmt.Sprintf("concurrent-job-w%d-i%d", w, i)
|
|
|
|
// Create payload
|
|
payload := make([]byte, payloadSize)
|
|
for j := range payload {
|
|
payload[j] = byte((w + i) % 256)
|
|
}
|
|
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: fmt.Sprintf("Concurrent Job W%d I%d", w, i),
|
|
Status: "pending",
|
|
Priority: 0,
|
|
Args: string(payload),
|
|
}
|
|
|
|
m.RecordTaskStart()
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Errorf("Worker %d failed to create job %d: %v", w, i, err)
|
|
return
|
|
}
|
|
m.RecordTaskCompletion()
|
|
|
|
// Queue job in Redis
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Errorf("Worker %d failed to queue job %d: %v", w, i, err)
|
|
return
|
|
}
|
|
|
|
m.RecordDataTransfer(int64(len(payload)), 0)
|
|
}
|
|
}(worker)
|
|
}
|
|
|
|
// Wait for all workers to complete
|
|
for i := 0; i < numWorkers; i++ {
|
|
<-done
|
|
}
|
|
|
|
creationTime := time.Since(start)
|
|
totalJobs := numWorkers * jobsPerWorker
|
|
t.Logf("Created %d jobs concurrently with %d byte payloads in %v", totalJobs, payloadSize, creationTime)
|
|
|
|
// Process jobs concurrently
|
|
start = time.Now()
|
|
for worker := 0; worker < numWorkers; worker++ {
|
|
go func(w int) {
|
|
defer func() { done <- true }()
|
|
|
|
for i := 0; i < jobsPerWorker; i++ {
|
|
jobID := fmt.Sprintf("concurrent-job-w%d-i%d", w, i)
|
|
|
|
// Update job status
|
|
err = db.UpdateJobStatus(jobID, "completed", fmt.Sprintf("worker-%d", w), "")
|
|
if err != nil {
|
|
t.Errorf("Worker %d failed to update job %d: %v", w, i, err)
|
|
return
|
|
}
|
|
|
|
// Record metrics
|
|
err = db.RecordJobMetric(jobID, "processing_time", "50")
|
|
if err != nil {
|
|
t.Errorf("Worker %d failed to record metric for job %d: %v", w, i, err)
|
|
return
|
|
}
|
|
|
|
// Pop from queue
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Errorf("Worker %d failed to pop job %d: %v", w, i, err)
|
|
return
|
|
}
|
|
}
|
|
}(worker)
|
|
}
|
|
|
|
// Wait for all workers to complete
|
|
for i := 0; i < numWorkers; i++ {
|
|
<-done
|
|
}
|
|
|
|
processingTime := time.Since(start)
|
|
t.Logf("Processed %d jobs concurrently in %v", totalJobs, processingTime)
|
|
|
|
// Performance metrics
|
|
totalTime := creationTime + processingTime
|
|
jobsPerSecond := float64(totalJobs) / totalTime.Seconds()
|
|
avgTimePerJob := totalTime / time.Duration(totalJobs)
|
|
concurrencyFactor := float64(totalJobs) / float64(creationTime.Seconds()) / 50 // Relative to baseline
|
|
|
|
t.Logf("Concurrent Performance Results:")
|
|
t.Logf(" Total time: %v", totalTime)
|
|
t.Logf(" Jobs per second: %.2f", jobsPerSecond)
|
|
t.Logf(" Average time per job: %v", avgTimePerJob)
|
|
t.Logf(" Concurrency factor: %.2f", concurrencyFactor)
|
|
|
|
// Verify concurrent performance benefits
|
|
if jobsPerSecond < 100 { // Should handle at least 100 jobs/second with concurrency
|
|
t.Errorf("Concurrent performance below threshold: %.2f jobs/sec (expected >= 100)", jobsPerSecond)
|
|
}
|
|
|
|
if concurrencyFactor < 2.0 { // Should be at least 2x faster than sequential
|
|
t.Errorf("Concurrency benefit too low: %.2fx (expected >= 2x)", concurrencyFactor)
|
|
}
|
|
|
|
stats := m.GetStats()
|
|
t.Logf("Final metrics: %+v", stats)
|
|
}
|
|
|
|
func TestPayloadMemoryUsage(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupPerformanceRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := `
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id TEXT PRIMARY KEY,
|
|
job_name TEXT NOT NULL,
|
|
args TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
ended_at DATETIME,
|
|
worker_id TEXT,
|
|
error TEXT,
|
|
datasets TEXT,
|
|
metadata TEXT,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE TABLE IF NOT EXISTS workers (
|
|
id TEXT PRIMARY KEY,
|
|
hostname TEXT NOT NULL,
|
|
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
current_jobs INTEGER DEFAULT 0,
|
|
max_jobs INTEGER DEFAULT 1,
|
|
metadata TEXT
|
|
);
|
|
CREATE TABLE IF NOT EXISTS job_metrics (
|
|
job_id TEXT NOT NULL,
|
|
metric_name TEXT NOT NULL,
|
|
metric_value TEXT NOT NULL,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (job_id, metric_name),
|
|
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
`
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test memory usage with different payload sizes
|
|
payloadSizes := []int{1024, 10 * 1024, 100 * 1024, 1024 * 1024} // 1KB, 10KB, 100KB, 1MB
|
|
numJobs := 10
|
|
|
|
for _, payloadSize := range payloadSizes {
|
|
// Force GC to get clean memory baseline
|
|
runtime.GC()
|
|
|
|
var memBefore runtime.MemStats
|
|
runtime.ReadMemStats(&memBefore)
|
|
|
|
// Create jobs with specific payload size
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("memory-test-%d-%d", payloadSize, i)
|
|
|
|
payload := make([]byte, payloadSize)
|
|
for j := range payload {
|
|
payload[j] = byte(i % 256)
|
|
}
|
|
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: fmt.Sprintf("Memory Test %d", i),
|
|
Status: "pending",
|
|
Priority: 0,
|
|
Args: string(payload),
|
|
}
|
|
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
var memAfter runtime.MemStats
|
|
runtime.ReadMemStats(&memAfter)
|
|
|
|
memoryUsed := memAfter.Alloc - memBefore.Alloc
|
|
memoryPerJob := memoryUsed / uint64(numJobs)
|
|
payloadOverhead := float64(memoryPerJob) / float64(payloadSize)
|
|
|
|
t.Logf("Memory usage for %d byte payloads:", payloadSize)
|
|
t.Logf(" Total memory used: %d bytes (%.2f MB)", memoryUsed, float64(memoryUsed)/1024/1024)
|
|
t.Logf(" Memory per job: %d bytes", memoryPerJob)
|
|
t.Logf(" Payload overhead ratio: %.2f", payloadOverhead)
|
|
|
|
// Verify memory usage is reasonable (overhead should be less than 10x payload size)
|
|
if payloadOverhead > 10.0 {
|
|
t.Errorf("Memory overhead too high for %d byte payloads: %.2fx (expected <= 10x)", payloadSize, payloadOverhead)
|
|
}
|
|
|
|
// TODO: Clean up jobs for next iteration
|
|
// Note: In a real implementation, we'd need a way to delete jobs
|
|
// For now, we'll just continue as the test will cleanup automatically
|
|
}
|
|
}
|