- Fix YAML tags in auth config struct (json -> yaml) - Update CLI configs to use pre-hashed API keys - Remove double hashing in WebSocket client - Fix port mapping (9102 -> 9103) in CLI commands - Update permission keys to use jobs:read, jobs:create, etc. - Clean up all debug logging from CLI and server - All user roles now authenticate correctly: * Admin: Can queue jobs and see all jobs * Researcher: Can queue jobs and see own jobs * Analyst: Can see status (read-only access) Multi-user authentication is now fully functional.
598 lines
15 KiB
Go
598 lines
15 KiB
Go
package tests
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/experiment"
|
|
"github.com/jfraeys/fetch_ml/internal/storage"
|
|
fixtures "github.com/jfraeys/fetch_ml/tests/fixtures"
|
|
"github.com/redis/go-redis/v9"
|
|
)
|
|
|
|
const statusCompleted = "completed"
|
|
|
|
// setupRedis creates a Redis client for testing
|
|
func setupRedis(t *testing.T) *redis.Client {
|
|
rdb := redis.NewClient(&redis.Options{
|
|
Addr: "localhost:6379",
|
|
Password: "",
|
|
DB: 2, // Use DB 2 for e2e tests to avoid conflicts
|
|
})
|
|
|
|
ctx := context.Background()
|
|
if err := rdb.Ping(ctx).Err(); err != nil {
|
|
t.Skipf("Redis not available, skipping e2e test: %v", err)
|
|
return nil
|
|
}
|
|
|
|
// Clean up the test database
|
|
rdb.FlushDB(ctx)
|
|
|
|
t.Cleanup(func() {
|
|
_ = rdb.FlushDB(ctx)
|
|
_ = rdb.Close()
|
|
})
|
|
|
|
return rdb
|
|
}
|
|
|
|
func TestCompleteJobLifecycle(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid Redis conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := fixtures.TestSchema
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Setup experiment manager
|
|
expManager := experiment.NewManager(filepath.Join(tempDir, "experiments"))
|
|
|
|
// Test 1: Complete job lifecycle
|
|
jobID := "lifecycle-job-1"
|
|
|
|
// Step 1: Create job
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: "Lifecycle Test Job",
|
|
Status: "pending",
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
Args: "",
|
|
Priority: 0,
|
|
}
|
|
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job: %v", err)
|
|
}
|
|
|
|
// Step 2: Queue job in Redis
|
|
ctx := context.Background()
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to queue job: %v", err)
|
|
}
|
|
|
|
// Step 3: Create experiment
|
|
err = expManager.CreateExperiment(jobID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create experiment: %v", err)
|
|
}
|
|
|
|
// Create experiment metadata
|
|
expDir := filepath.Join(tempDir, "experiments")
|
|
_ = os.MkdirAll(expDir, 0750)
|
|
|
|
expPath := filepath.Join(expDir, jobID+".yaml")
|
|
expData := fmt.Sprintf(`name: %s
|
|
commit_id: abc123
|
|
user: testuser
|
|
created_at: %s
|
|
`, job.JobName, job.CreatedAt.Format(time.RFC3339))
|
|
err = os.WriteFile(expPath, []byte(expData), 0600)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create experiment metadata: %v", err)
|
|
}
|
|
|
|
// Step 4: Update job status to running
|
|
err = db.UpdateJobStatus(job.ID, "running", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job status to running: %v", err)
|
|
}
|
|
|
|
// Update Redis status
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, "running", time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to set Redis status: %v", err)
|
|
}
|
|
|
|
// Step 5: Record metrics during execution
|
|
err = db.RecordJobMetric(jobID, "cpu_usage", "75.5")
|
|
if err != nil {
|
|
t.Fatalf("Failed to record job metric: %v", err)
|
|
}
|
|
|
|
err = db.RecordJobMetric(jobID, "memory_usage", "1024.0")
|
|
if err != nil {
|
|
t.Fatalf("Failed to record job metric: %v", err)
|
|
}
|
|
|
|
// Step 6: Complete job
|
|
err = db.UpdateJobStatus(jobID, statusCompleted, "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job status to completed: %v", err)
|
|
}
|
|
|
|
// Pop job from queue to simulate processing
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Fatalf("Failed to pop job from queue: %v", err)
|
|
}
|
|
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, statusCompleted, time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to update Redis status: %v", err)
|
|
}
|
|
|
|
// Step 7: Verify complete lifecycle
|
|
// Check job in database
|
|
finalJob, err := db.GetJob(jobID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to get final job: %v", err)
|
|
}
|
|
|
|
if finalJob.Status != statusCompleted {
|
|
t.Errorf("Expected job status 'completed', got '%s'", finalJob.Status)
|
|
}
|
|
|
|
// Check Redis status
|
|
redisStatus := rdb.Get(ctx, "ml:status:"+jobID).Val()
|
|
if redisStatus != statusCompleted {
|
|
t.Errorf("Expected Redis status 'completed', got '%s'", redisStatus)
|
|
}
|
|
|
|
// Check experiment exists
|
|
if !expManager.ExperimentExists(jobID) {
|
|
t.Error("Experiment should exist")
|
|
}
|
|
|
|
// Check metrics
|
|
metrics, err := db.GetJobMetrics(jobID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to get job metrics: %v", err)
|
|
}
|
|
|
|
if len(metrics) != 2 {
|
|
t.Errorf("Expected 2 metrics, got %d", len(metrics))
|
|
}
|
|
|
|
// Check queue is empty
|
|
queueLength := rdb.LLen(ctx, "ml:queue").Val()
|
|
if queueLength != 0 {
|
|
t.Errorf("Expected empty queue, got %d", queueLength)
|
|
}
|
|
}
|
|
|
|
func TestMultipleJobsLifecycle(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid Redis conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := fixtures.TestSchema
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test 2: Multiple concurrent jobs
|
|
numJobs := 3
|
|
jobIDs := make([]string, numJobs)
|
|
|
|
// Create multiple jobs
|
|
for i := 0; i < numJobs; i++ {
|
|
jobID := fmt.Sprintf("multi-job-%d", i)
|
|
jobIDs[i] = jobID
|
|
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: fmt.Sprintf("Multi Job %d", i),
|
|
Status: "pending",
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
Args: "",
|
|
Priority: 0,
|
|
}
|
|
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job %d: %v", i, err)
|
|
}
|
|
|
|
// Queue job
|
|
ctx := context.Background()
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to queue job %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Verify all jobs are queued
|
|
ctx := context.Background()
|
|
queueLength := rdb.LLen(ctx, "ml:queue").Val()
|
|
if int(queueLength) != numJobs {
|
|
t.Errorf("Expected queue length %d, got %d", numJobs, queueLength)
|
|
}
|
|
|
|
// Process jobs
|
|
for i, jobID := range jobIDs {
|
|
// Update to running
|
|
err = db.UpdateJobStatus(jobID, "running", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job %d to running: %v", i, err)
|
|
}
|
|
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, "running", time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to set Redis status for job %d: %v", i, err)
|
|
}
|
|
|
|
// Record metric
|
|
err = db.RecordJobMetric(jobID, "cpu_usage", fmt.Sprintf("%.1f", float64(50+i*10)))
|
|
if err != nil {
|
|
t.Fatalf("Failed to record metric for job %d: %v", i, err)
|
|
}
|
|
|
|
// Complete job
|
|
err = db.UpdateJobStatus(jobID, statusCompleted, "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job %d to completed: %v", i, err)
|
|
}
|
|
|
|
// Pop job from queue to simulate processing
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Fatalf("Failed to pop job %d from queue: %v", i, err)
|
|
}
|
|
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, statusCompleted, time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to update Redis status for job %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Verify all jobs completed
|
|
for i, jobID := range jobIDs {
|
|
job, err := db.GetJob(jobID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to get job %d: %v", i, err)
|
|
}
|
|
|
|
if job.Status != statusCompleted {
|
|
t.Errorf("Job %d status should be completed, got '%s'", i, job.Status)
|
|
}
|
|
|
|
redisStatus := rdb.Get(ctx, "ml:status:"+jobID).Val()
|
|
if redisStatus != statusCompleted {
|
|
t.Errorf("Job %d Redis status should be completed, got '%s'", i, redisStatus)
|
|
}
|
|
}
|
|
|
|
// Verify queue is empty
|
|
queueLength = rdb.LLen(ctx, "ml:queue").Val()
|
|
if queueLength != 0 {
|
|
t.Errorf("Expected empty queue, got %d", queueLength)
|
|
}
|
|
}
|
|
|
|
func TestFailedJobHandling(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid Redis conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := `
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id TEXT PRIMARY KEY,
|
|
job_name TEXT NOT NULL,
|
|
args TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
ended_at DATETIME,
|
|
worker_id TEXT,
|
|
error TEXT,
|
|
datasets TEXT,
|
|
metadata TEXT,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE TABLE IF NOT EXISTS workers (
|
|
id TEXT PRIMARY KEY,
|
|
hostname TEXT NOT NULL,
|
|
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
current_jobs INTEGER DEFAULT 0,
|
|
max_jobs INTEGER DEFAULT 1,
|
|
metadata TEXT
|
|
);
|
|
CREATE TABLE IF NOT EXISTS job_metrics (
|
|
job_id TEXT NOT NULL,
|
|
metric_name TEXT NOT NULL,
|
|
metric_value TEXT NOT NULL,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (job_id, metric_name),
|
|
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
`
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Test 3: Failed job handling
|
|
jobID := "failed-job-1"
|
|
|
|
// Create job
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: "Failed Test Job",
|
|
Status: "pending",
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
Args: "",
|
|
Priority: 0,
|
|
}
|
|
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job: %v", err)
|
|
}
|
|
|
|
// Queue job
|
|
ctx := context.Background()
|
|
err = rdb.LPush(ctx, "ml:queue", jobID).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to queue job: %v", err)
|
|
}
|
|
|
|
// Update to running
|
|
err = db.UpdateJobStatus(jobID, "running", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job to running: %v", err)
|
|
}
|
|
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, "running", time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to set Redis status: %v", err)
|
|
}
|
|
|
|
// Simulate failure
|
|
err = db.UpdateJobStatus(jobID, "failed", "worker-1", "simulated error")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job to failed: %v", err)
|
|
}
|
|
|
|
// Pop job from queue to simulate processing (even failed jobs are processed)
|
|
_, err = rdb.LPop(ctx, "ml:queue").Result()
|
|
if err != nil {
|
|
t.Fatalf("Failed to pop job from queue: %v", err)
|
|
}
|
|
|
|
err = rdb.Set(ctx, "ml:status:"+jobID, "failed", time.Hour).Err()
|
|
if err != nil {
|
|
t.Fatalf("Failed to update Redis status: %v", err)
|
|
}
|
|
|
|
// Verify failed state
|
|
finalJob, err := db.GetJob(jobID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to get final job: %v", err)
|
|
}
|
|
|
|
if finalJob.Status != "failed" {
|
|
t.Errorf("Expected job status 'failed', got '%s'", finalJob.Status)
|
|
}
|
|
|
|
redisStatus := rdb.Get(ctx, "ml:status:"+jobID).Val()
|
|
if redisStatus != "failed" {
|
|
t.Errorf("Expected Redis status 'failed', got '%s'", redisStatus)
|
|
}
|
|
|
|
// Verify queue is empty (job was processed)
|
|
queueLength := rdb.LLen(ctx, "ml:queue").Val()
|
|
if queueLength != 0 {
|
|
t.Errorf("Expected empty queue, got %d", queueLength)
|
|
}
|
|
}
|
|
|
|
func TestJobCleanup(t *testing.T) {
|
|
// t.Parallel() // Disable parallel to avoid Redis conflicts
|
|
|
|
// Setup test environment
|
|
tempDir := t.TempDir()
|
|
rdb := setupRedis(t)
|
|
if rdb == nil {
|
|
return
|
|
}
|
|
defer func() { _ = rdb.Close() }()
|
|
|
|
// Setup database
|
|
db, err := storage.NewDBFromPath(filepath.Join(tempDir, "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("Failed to create database: %v", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Initialize database schema
|
|
schema := `
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id TEXT PRIMARY KEY,
|
|
job_name TEXT NOT NULL,
|
|
args TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
ended_at DATETIME,
|
|
worker_id TEXT,
|
|
error TEXT,
|
|
datasets TEXT,
|
|
metadata TEXT,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE TABLE IF NOT EXISTS workers (
|
|
id TEXT PRIMARY KEY,
|
|
hostname TEXT NOT NULL,
|
|
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
current_jobs INTEGER DEFAULT 0,
|
|
max_jobs INTEGER DEFAULT 1,
|
|
metadata TEXT
|
|
);
|
|
CREATE TABLE IF NOT EXISTS job_metrics (
|
|
job_id TEXT NOT NULL,
|
|
metric_name TEXT NOT NULL,
|
|
metric_value TEXT NOT NULL,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (job_id, metric_name),
|
|
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
`
|
|
err = db.Initialize(schema)
|
|
if err != nil {
|
|
t.Fatalf("Failed to initialize database: %v", err)
|
|
}
|
|
|
|
// Setup experiment manager
|
|
expManager := experiment.NewManager(filepath.Join(tempDir, "experiments"))
|
|
|
|
// Test 4: Job cleanup
|
|
jobID := "cleanup-job-1"
|
|
commitID := "cleanupcommit"
|
|
|
|
// Create job and experiment
|
|
job := &storage.Job{
|
|
ID: jobID,
|
|
JobName: "Cleanup Test Job",
|
|
Status: "pending",
|
|
CreatedAt: time.Now(),
|
|
UpdatedAt: time.Now(),
|
|
Args: "",
|
|
Priority: 0,
|
|
}
|
|
|
|
err = db.CreateJob(job)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create job: %v", err)
|
|
}
|
|
|
|
// Create experiment with proper metadata
|
|
err = expManager.CreateExperiment(commitID)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create experiment: %v", err)
|
|
}
|
|
|
|
// Create proper metadata file
|
|
metadata := &experiment.Metadata{
|
|
CommitID: commitID,
|
|
Timestamp: time.Now().AddDate(0, 0, -2).Unix(), // 2 days ago
|
|
JobName: "Cleanup Test Job",
|
|
User: "testuser",
|
|
}
|
|
|
|
err = expManager.WriteMetadata(metadata)
|
|
if err != nil {
|
|
t.Fatalf("Failed to write metadata: %v", err)
|
|
}
|
|
|
|
// Add some files to experiment
|
|
filesDir := expManager.GetFilesPath(commitID)
|
|
testFile := filepath.Join(filesDir, "test.txt")
|
|
err = os.WriteFile(testFile, []byte("test content"), 0600)
|
|
if err != nil {
|
|
t.Fatalf("Failed to create test file: %v", err)
|
|
}
|
|
|
|
// Verify experiment exists
|
|
if !expManager.ExperimentExists(commitID) {
|
|
t.Error("Experiment should exist")
|
|
}
|
|
|
|
// Complete job
|
|
err = db.UpdateJobStatus(jobID, "completed", "worker-1", "")
|
|
if err != nil {
|
|
t.Fatalf("Failed to update job status: %v", err)
|
|
}
|
|
|
|
// Cleanup old experiments (keep 0 - should prune everything)
|
|
pruned, err := expManager.PruneExperiments(0, 0)
|
|
if err != nil {
|
|
t.Fatalf("Failed to prune experiments: %v", err)
|
|
}
|
|
|
|
if len(pruned) != 1 {
|
|
t.Errorf("Expected 1 pruned experiment, got %d", len(pruned))
|
|
}
|
|
|
|
// Verify experiment is gone
|
|
if expManager.ExperimentExists(commitID) {
|
|
t.Error("Experiment should be pruned")
|
|
}
|
|
|
|
// Verify job still exists in database
|
|
_, err = db.GetJob(jobID)
|
|
if err != nil {
|
|
t.Errorf("Job should still exist in database: %v", err)
|
|
}
|
|
}
|