fetch_ml/tests/integration/zero_install_test.go
Jeremie Fraeys 5f53104fcd
test: modernize test suite for streamlined infrastructure
- Update E2E tests for consolidated docker-compose.test.yml
- Remove references to obsolete logs-debug.yml
- Enhance test fixtures and utilities
- Improve integration test coverage for KMS, queue, scheduler
- Update unit tests for config constants and worker execution
- Modernize cleanup-status.sh with new Makefile targets
2026-03-04 13:24:24 -05:00

238 lines
7.2 KiB
Go

package tests
import (
"os"
"path/filepath"
"testing"
"github.com/jfraeys/fetch_ml/internal/fileutil"
)
// TestZeroInstallWorkflow tests the complete minimal zero-install workflow
func TestZeroInstallWorkflow(t *testing.T) {
t.Parallel() // Enable parallel execution
// Setup test environment
testDir := t.TempDir()
// Step 1: Create experiment locally (simulating DS workflow)
experimentDir := filepath.Join(testDir, "my_experiment")
if err := os.MkdirAll(experimentDir, 0750); err != nil {
t.Fatalf("Failed to create experiment directory: %v", err)
}
// Create train.py (simplified from README example)
trainScript := filepath.Join(experimentDir, "train.py")
trainCode := `import argparse, json, logging, time
from pathlib import Path
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=10)
parser.add_argument("--output_dir", type=str, required=True)
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info(f"Training for {args.epochs} epochs...")
for epoch in range(args.epochs):
loss = 1.0 - (epoch * 0.1)
accuracy = 0.5 + (epoch * 0.045)
logger.info(f"Epoch {epoch + 1}: loss={loss:.4f}, acc={accuracy:.4f}")
time.sleep(0.1) // Reduced from 0.5
results = {"accuracy": accuracy, "loss": loss, "epochs": args.epochs}
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "results.json", "w") as f:
json.dump(results, f)
logger.info("Training complete!")
if __name__ == "__main__":
main()
`
//nolint:gosec // G306: Script needs execute permissions
if err := os.WriteFile(trainScript, []byte(trainCode), 0750); err != nil {
t.Fatalf("Failed to create train.py: %v", err)
}
// Test 1: Verify experiment structure (Step 1 validation)
t.Run("Step1_CreateExperiment", func(t *testing.T) {
// Check train.py exists and is executable
if _, err := os.Stat(trainScript); os.IsNotExist(err) {
t.Error("train.py should exist after experiment creation")
}
info, err := os.Stat(trainScript)
if err != nil {
t.Fatalf("Failed to stat train.py: %v", err)
}
if info.Mode().Perm()&0111 == 0 {
t.Error("train.py should be executable")
}
})
// Step 2: Simulate upload process (rsync simulation)
t.Run("Step2_UploadExperiment", func(t *testing.T) {
// Create server directory structure (simulate ml-server.company.com)
serverDir := filepath.Join(testDir, "server")
homeDir := filepath.Join(serverDir, "home", "mluser")
pendingDir := filepath.Join(homeDir, "ml_jobs", "pending")
// Generate timestamp-based job name (simulating workflow)
jobName := "my_experiment_20231201_143022"
jobDir := filepath.Join(pendingDir, jobName)
if err := os.MkdirAll(jobDir, 0750); err != nil {
t.Fatalf("Failed to create server directories: %v", err)
}
// Simulate rsync upload (copy experiment files)
files := []string{"train.py"}
for _, file := range files {
src := filepath.Join(experimentDir, file)
dst := filepath.Join(jobDir, file)
data, err := fileutil.SecureFileRead(src)
if err != nil {
t.Fatalf("Failed to read %s: %v", file, err)
}
//nolint:gosec // G306: Script needs execute permissions
if err := os.WriteFile(dst, data, 0750); err != nil {
t.Fatalf("Failed to copy %s: %v", file, err)
}
}
// Verify upload succeeded
for _, file := range files {
dst := filepath.Join(jobDir, file)
if _, err := os.Stat(dst); os.IsNotExist(err) {
t.Errorf("Uploaded file %s should exist in pending directory", file)
}
}
// Verify job directory structure
if _, err := os.Stat(pendingDir); os.IsNotExist(err) {
t.Error("Pending directory should exist")
}
if _, err := os.Stat(jobDir); os.IsNotExist(err) {
t.Error("Job directory should exist")
}
})
// Step 3: Simulate TUI access (minimal - just verify TUI would launch)
t.Run("Step3_TUIAccess", func(t *testing.T) {
// Create fetch_ml directory structure (simulating server setup)
serverDir := filepath.Join(testDir, "server")
fetchMlDir := filepath.Join(serverDir, "home", "mluser", "fetch_ml")
buildDir := filepath.Join(fetchMlDir, "build")
configsDir := filepath.Join(fetchMlDir, "configs")
if err := os.MkdirAll(buildDir, 0750); err != nil {
t.Fatalf("Failed to create fetch_ml directories: %v", err)
}
if err := os.MkdirAll(configsDir, 0750); err != nil {
t.Fatalf("Failed to create configs directory: %v", err)
}
// Create mock TUI binary
tuiBinary := filepath.Join(buildDir, "tui")
tuiContent := "#!/bin/bash\necho 'Mock TUI would launch here'"
//nolint:gosec // G306: Script needs execute permissions
if err := os.WriteFile(tuiBinary, []byte(tuiContent), 0750); err != nil {
t.Fatalf("Failed to create mock TUI binary: %v", err)
}
// Create config file
configFile := filepath.Join(configsDir, "config.yaml")
configContent := `server:
host: "localhost"
port: 8080
redis:
addr: "localhost:6379"
db: 0
data_dir: "/home/mluser/datasets"
output_dir: "/home/mluser/ml_jobs"
`
if err := os.WriteFile(configFile, []byte(configContent), 0600); err != nil {
t.Fatalf("Failed to create config file: %v", err)
}
// Verify TUI setup
if _, err := os.Stat(tuiBinary); os.IsNotExist(err) {
t.Error("TUI binary should exist")
}
if _, err := os.Stat(configFile); os.IsNotExist(err) {
t.Error("Config file should exist")
}
})
// Test: Verify complete workflow files exist
t.Run("CompleteWorkflowValidation", func(t *testing.T) {
// Verify experiment files exist
if _, err := os.Stat(trainScript); os.IsNotExist(err) {
t.Error("Experiment train.py should exist")
}
// Verify uploaded files exist
uploadedEntrypoint := filepath.Join(
testDir, "server", "home", "mluser", "ml_jobs", "pending",
"my_experiment_20231201_143022", "train.py")
if _, err := os.Stat(uploadedEntrypoint); os.IsNotExist(err) {
t.Error("Uploaded train.py should exist in pending directory")
}
// Verify TUI setup exists
tuiBinary := filepath.Join(testDir, "server", "home", "mluser", "fetch_ml", "build", "tui")
if _, err := os.Stat(tuiBinary); os.IsNotExist(err) {
t.Error("TUI binary should exist for workflow completion")
}
})
}
// TestMinimalWorkflowSecurity tests security aspects of minimal workflow
func TestMinimalWorkflowSecurity(t *testing.T) {
t.Parallel() // Enable parallel execution
testDir := t.TempDir()
// Create mock SSH environment
sshRc := filepath.Join(testDir, "sshrc")
sshRcContent := `#!/bin/bash
# Mock SSH rc - TUI only
if [ -n "$SSH_CONNECTION" ] && [ -z "$SSH_ORIGINAL_COMMAND" ]; then
echo "TUI would launch here"
else
echo "Command execution blocked for security"
exit 1
fi
`
//nolint:gosec // G306: Script needs execute permissions
if err := os.WriteFile(sshRc, []byte(sshRcContent), 0750); err != nil {
t.Fatalf("Failed to create SSH rc: %v", err)
}
t.Run("TUIOnlyAccess", func(t *testing.T) {
// Verify SSH rc exists and is executable
if _, err := os.Stat(sshRc); os.IsNotExist(err) {
t.Error("SSH rc should exist")
}
info, err := os.Stat(sshRc)
if err != nil {
t.Fatalf("Failed to stat SSH rc: %v", err)
}
if info.Mode().Perm()&0111 == 0 {
t.Error("SSH rc should be executable")
}
})
}