package tests import ( "fmt" "os" "path/filepath" "testing" "time" tests "github.com/jfraeys/fetch_ml/tests/fixtures" ) // TestQueueExecution tests that experiments are processed sequentially through the queue func TestQueueExecution(t *testing.T) { t.Parallel() // Enable parallel execution testDir := t.TempDir() // Use fixtures for examples directory operations examplesDir := tests.NewExamplesDir(filepath.Join("..", "fixtures", "examples")) // Test 1: Create multiple experiments from actual examples and add them to queue t.Run("QueueSubmission", func(t *testing.T) { // Create server queue structure queueDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "pending") // Use actual examples with different priorities experiments := []struct { name string priority int exampleDir string }{ {"sklearn_classification", 1, "sklearn_project"}, {"xgboost_classification", 2, "xgboost_project"}, {"pytorch_nn", 3, "pytorch_project"}, } for _, exp := range experiments { // Copy actual example files using fixtures sourceDir := examplesDir.GetPath(exp.exampleDir) experimentDir := filepath.Join(testDir, exp.name) // Copy all files from example directory if err := tests.CopyDir(sourceDir, experimentDir); err != nil { t.Fatalf("Failed to copy example %s: %v", exp.exampleDir, err) } // Add to queue (simulate job submission) timestamp := time.Now().Format("20060102_150405") jobName := fmt.Sprintf("%s_%s_priority_%d", exp.name, timestamp, exp.priority) jobDir := filepath.Join(queueDir, jobName) if err := os.MkdirAll(jobDir, 0755); err != nil { t.Fatalf("Failed to create queue directory for %s: %v", exp.name, err) } // Copy experiment files to queue files := []string{"train.py", "requirements.txt", "README.md"} for _, file := range files { src := filepath.Join(experimentDir, file) dst := filepath.Join(jobDir, file) if _, err := os.Stat(src); os.IsNotExist(err) { continue // Skip if file doesn't exist } data, err := os.ReadFile(src) if err != nil { t.Fatalf("Failed to read %s for %s: %v", file, exp.name, err) } if err := os.WriteFile(dst, data, 0755); err != nil { t.Fatalf("Failed to copy %s for %s: %v", file, exp.name, err) } } // Create queue metadata file queueMetadata := filepath.Join(jobDir, "queue_metadata.json") metadata := fmt.Sprintf(`{ "job_name": "%s", "experiment_name": "%s", "example_source": "%s", "priority": %d, "status": "pending", "submitted_at": "%s" }`, jobName, exp.name, exp.exampleDir, exp.priority, time.Now().Format(time.RFC3339)) if err := os.WriteFile(queueMetadata, []byte(metadata), 0644); err != nil { t.Fatalf("Failed to create queue metadata for %s: %v", exp.name, err) } } // Verify all experiments are in queue for _, exp := range experiments { queueJobs, err := filepath.Glob(filepath.Join(queueDir, fmt.Sprintf("%s_*_priority_%d", exp.name, exp.priority))) if err != nil || len(queueJobs) == 0 { t.Errorf("Queue job should exist for %s with priority %d", exp.name, exp.priority) } } }) // Test 2: Simulate sequential processing (queue behavior) t.Run("SequentialProcessing", func(t *testing.T) { pendingDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "pending") runningDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "running") finishedDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "finished") // Create directories if they don't exist if err := os.MkdirAll(runningDir, 0755); err != nil { t.Fatalf("Failed to create running directory: %v", err) } if err := os.MkdirAll(finishedDir, 0755); err != nil { t.Fatalf("Failed to create finished directory: %v", err) } // Process jobs in priority order (1, 2, 3) for priority := 1; priority <= 3; priority++ { // Find job with this priority jobs, err := filepath.Glob(filepath.Join(pendingDir, fmt.Sprintf("*_priority_%d", priority))) if err != nil { t.Fatalf("Failed to find jobs with priority %d: %v", priority, err) } if len(jobs) == 0 { t.Fatalf("No job found with priority %d", priority) } jobDir := jobs[0] // Take first job with this priority jobName := filepath.Base(jobDir) // Move from pending to running runningJobDir := filepath.Join(runningDir, jobName) if err := os.Rename(jobDir, runningJobDir); err != nil { t.Fatalf("Failed to move job %s to running: %v", jobName, err) } // Verify only one job is running at this time runningJobs, err := filepath.Glob(filepath.Join(runningDir, "*")) if err != nil || len(runningJobs) != 1 { t.Errorf("Expected exactly 1 running job, found %d", len(runningJobs)) } // Simulate execution by creating results (using actual framework patterns) outputDir := filepath.Join(runningJobDir, "results") if err := os.MkdirAll(outputDir, 0755); err != nil { t.Fatalf("Failed to create output directory for %s: %v", jobName, err) } // Read the actual train.py to determine framework trainScript := filepath.Join(runningJobDir, "train.py") scriptContent, err := os.ReadFile(trainScript) if err != nil { t.Fatalf("Failed to read train.py for %s: %v", jobName, err) } // Determine framework from script content framework := "unknown" scriptStr := string(scriptContent) if contains(scriptStr, "sklearn") { framework = "scikit-learn" } else if contains(scriptStr, "xgboost") { framework = "xgboost" } else if contains(scriptStr, "torch") { framework = "pytorch" } else if contains(scriptStr, "tensorflow") { framework = "tensorflow" } else if contains(scriptStr, "statsmodels") { framework = "statsmodels" } resultsFile := filepath.Join(outputDir, "results.json") results := fmt.Sprintf(`{ "job_name": "%s", "framework": "%s", "priority": %d, "status": "completed", "execution_order": %d, "started_at": "%s", "completed_at": "%s", "source": "actual_example" }`, jobName, framework, priority, priority, time.Now().Add(-time.Duration(priority)*time.Minute).Format(time.RFC3339), time.Now().Format(time.RFC3339)) if err := os.WriteFile(resultsFile, []byte(results), 0644); err != nil { t.Fatalf("Failed to create results for %s: %v", jobName, err) } // Move from running to finished finishedJobDir := filepath.Join(finishedDir, jobName) if err := os.Rename(runningJobDir, finishedJobDir); err != nil { t.Fatalf("Failed to move job %s to finished: %v", jobName, err) } // Verify job is no longer in pending or running if _, err := os.Stat(jobDir); !os.IsNotExist(err) { t.Errorf("Job %s should no longer be in pending directory", jobName) } if _, err := os.Stat(runningJobDir); !os.IsNotExist(err) { t.Errorf("Job %s should no longer be in running directory", jobName) } } // Verify all jobs completed finishedJobs, err := filepath.Glob(filepath.Join(finishedDir, "*")) if err != nil || len(finishedJobs) != 3 { t.Errorf("Expected 3 finished jobs, got %d", len(finishedJobs)) } // Verify queue is empty pendingJobs, err := filepath.Glob(filepath.Join(pendingDir, "*")) if err != nil || len(pendingJobs) != 0 { t.Errorf("Expected 0 pending jobs after processing, found %d", len(pendingJobs)) } // Verify no jobs are running runningJobs, err := filepath.Glob(filepath.Join(runningDir, "*")) if err != nil || len(runningJobs) != 0 { t.Errorf("Expected 0 running jobs after processing, found %d", len(runningJobs)) } }) } // TestQueueCapacity tests queue capacity and resource limits func TestQueueCapacity(t *testing.T) { t.Parallel() // Enable parallel execution testDir := t.TempDir() t.Run("QueueCapacityLimits", func(t *testing.T) { // Use fixtures for examples directory operations examplesDir := tests.NewExamplesDir(filepath.Join("..", "fixtures", "examples")) pendingDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "pending") runningDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "running") finishedDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "finished") // Create directories if err := os.MkdirAll(pendingDir, 0755); err != nil { t.Fatalf("Failed to create pending directory: %v", err) } if err := os.MkdirAll(runningDir, 0755); err != nil { t.Fatalf("Failed to create running directory: %v", err) } if err := os.MkdirAll(finishedDir, 0755); err != nil { t.Fatalf("Failed to create finished directory: %v", err) } // Create more jobs than server can handle simultaneously using actual examples examples := []string{"standard_ml_project", "sklearn_project", "xgboost_project", "pytorch_project", "tensorflow_project"} totalJobs := len(examples) for i, example := range examples { jobName := fmt.Sprintf("capacity_test_job_%d", i) jobDir := filepath.Join(pendingDir, jobName) if err := os.MkdirAll(jobDir, 0755); err != nil { t.Fatalf("Failed to create job directory %s: %v", jobDir, err) } // Copy actual example files using fixtures sourceDir := examplesDir.GetPath(example) // Copy actual example files if _, err := os.Stat(sourceDir); os.IsNotExist(err) { // Create minimal files if example doesn't exist trainScript := filepath.Join(jobDir, "train.py") script := fmt.Sprintf(`#!/usr/bin/env python3 import json, time from pathlib import Path def main(): results = { "job_id": %d, "example": "%s", "status": "completed", "completion_time": time.strftime("%%Y-%%m-%%d %%H:%%M:%%S") } output_dir = Path("./results") output_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "results.json", "w") as f: json.dump(results, f, indent=2) if __name__ == "__main__": main() `, i, example) if err := os.WriteFile(trainScript, []byte(script), 0755); err != nil { t.Fatalf("Failed to create train script for job %d: %v", i, err) } } else { // Copy actual example files files := []string{"train.py", "requirements.txt"} for _, file := range files { src := filepath.Join(sourceDir, file) dst := filepath.Join(jobDir, file) if _, err := os.Stat(src); os.IsNotExist(err) { continue // Skip if file doesn't exist } data, err := os.ReadFile(src) if err != nil { t.Fatalf("Failed to read %s for job %d: %v", file, i, err) } if err := os.WriteFile(dst, data, 0755); err != nil { t.Fatalf("Failed to copy %s for job %d: %v", file, i, err) } } } } // Verify all jobs are in pending queue pendingJobs, err := filepath.Glob(filepath.Join(pendingDir, "capacity_test_job_*")) if err != nil || len(pendingJobs) != totalJobs { t.Errorf("Expected %d pending jobs, found %d", totalJobs, len(pendingJobs)) } // Process one job at a time (sequential execution) for i := 0; i < totalJobs; i++ { // Move one job to running jobName := fmt.Sprintf("capacity_test_job_%d", i) pendingJobDir := filepath.Join(pendingDir, jobName) runningJobDir := filepath.Join(runningDir, jobName) if err := os.Rename(pendingJobDir, runningJobDir); err != nil { t.Fatalf("Failed to move job %d to running: %v", i, err) } // Verify only one job is running runningJobs, err := filepath.Glob(filepath.Join(runningDir, "*")) if err != nil || len(runningJobs) != 1 { t.Errorf("Expected exactly 1 running job, found %d", len(runningJobs)) } // Simulate job completion time.Sleep(5 * time.Millisecond) // Reduced from 10ms // Move to finished finishedJobDir := filepath.Join(finishedDir, jobName) if err := os.Rename(runningJobDir, finishedJobDir); err != nil { t.Fatalf("Failed to move job %d to finished: %v", i, err) } // Verify no jobs are running between jobs runningJobs, err = filepath.Glob(filepath.Join(runningDir, "*")) if err != nil || len(runningJobs) != 0 { t.Errorf("Expected 0 running jobs between jobs, found %d", len(runningJobs)) } } // Verify all jobs completed finishedJobs, err := filepath.Glob(filepath.Join(finishedDir, "capacity_test_job_*")) if err != nil || len(finishedJobs) != totalJobs { t.Errorf("Expected %d finished jobs, found %d", totalJobs, len(finishedJobs)) } // Verify queue is empty pendingJobs, err = filepath.Glob(filepath.Join(pendingDir, "capacity_test_job_*")) if err != nil || len(pendingJobs) != 0 { t.Errorf("Expected 0 pending jobs after processing, found %d", len(pendingJobs)) } }) } // TestResourceIsolation tests that experiments have isolated resources func TestResourceIsolation(t *testing.T) { t.Parallel() // Enable parallel execution testDir := t.TempDir() t.Run("OutputDirectoryIsolation", func(t *testing.T) { // Use fixtures for examples directory operations examplesDir := tests.NewExamplesDir(filepath.Join("..", "fixtures", "examples")) // Create multiple experiments with same timestamp using actual examples timestamp := "20231201_143022" examples := []string{"sklearn_project", "xgboost_project", "pytorch_project"} runningDir := filepath.Join(testDir, "server", "home", "mluser", "ml_jobs", "running") for i, expName := range examples { jobName := fmt.Sprintf("exp%d_%s", i, timestamp) outputDir := filepath.Join(runningDir, jobName, "results") if err := os.MkdirAll(outputDir, 0755); err != nil { t.Fatalf("Failed to create output directory: %v", err) } // Copy actual example files using fixtures sourceDir := examplesDir.GetPath(expName) // Read actual example to create realistic results trainScript := filepath.Join(sourceDir, "train.py") framework := "unknown" if content, err := os.ReadFile(trainScript); err == nil { scriptStr := string(content) if contains(scriptStr, "sklearn") { framework = "scikit-learn" } else if contains(scriptStr, "xgboost") { framework = "xgboost" } else if contains(scriptStr, "torch") { framework = "pytorch" } } // Create unique results file based on actual framework resultsFile := filepath.Join(outputDir, "results.json") results := fmt.Sprintf(`{ "experiment": "exp%d", "framework": "%s", "job_name": "%s", "output_dir": "%s", "example_source": "%s", "unique_id": "exp%d_%d" }`, i, framework, jobName, outputDir, expName, i, time.Now().UnixNano()) if err := os.WriteFile(resultsFile, []byte(results), 0644); err != nil { t.Fatalf("Failed to create results for %s: %v", expName, err) } } // Verify each experiment has its own isolated output directory for i, expName := range examples { jobName := fmt.Sprintf("exp%d_%s", i, timestamp) outputDir := filepath.Join(runningDir, jobName, "results") resultsFile := filepath.Join(outputDir, "results.json") if _, err := os.Stat(resultsFile); os.IsNotExist(err) { t.Errorf("Results file should exist for %s in isolated directory", expName) } // Verify content is unique content, err := os.ReadFile(resultsFile) if err != nil { t.Fatalf("Failed to read results for %s: %v", expName, err) } if !contains(string(content), fmt.Sprintf("exp%d", i)) { t.Errorf("Results file should contain experiment ID exp%d", i) } if !contains(string(content), expName) { t.Errorf("Results file should contain example source %s", expName) } } }) } // Helper function to check if string contains substring func contains(s, substr string) bool { return len(s) >= len(substr) && (s == substr || len(s) > len(substr) && (s[:len(substr)] == substr || s[len(s)-len(substr):] == substr || findSubstring(s, substr))) } func findSubstring(s, substr string) bool { for i := 0; i <= len(s)-len(substr); i++ { if s[i:i+len(substr)] == substr { return true } } return false }