Add comprehensive research context tracking to jobs: - Narrative fields: hypothesis, context, intent, expected_outcome - Experiment groups and tags for organization - Run comparison (compare command) for diff analysis - Run search (find command) with criteria filtering - Run export (export command) for data portability - Outcome setting (outcome command) for experiment validation Update queue and requeue commands to support narrative fields. Add narrative validation to manifest validator. Add WebSocket handlers for compare, find, export, and outcome operations. Includes E2E tests for phase 2 features.
175 lines
4.5 KiB
Go
175 lines
4.5 KiB
Go
package tests
|
|
|
|
import (
|
|
"encoding/json"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"testing"
|
|
)
|
|
|
|
// runCLI runs the CLI with given arguments and returns output
|
|
func runCLI(t *testing.T, cliPath string, args ...string) (string, error) {
|
|
t.Helper()
|
|
cmd := exec.Command(cliPath, args...)
|
|
cmd.Dir = t.TempDir()
|
|
output, err := cmd.CombinedOutput()
|
|
return string(output), err
|
|
}
|
|
|
|
// contains checks if string contains substring
|
|
func contains(s, substr string) bool {
|
|
for i := 0; i <= len(s)-len(substr); i++ {
|
|
if s[i:i+len(substr)] == substr {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// TestCompareRunsE2E tests the ml compare command end-to-end
|
|
func TestCompareRunsE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("CompareUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "compare", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected compare --help to show usage")
|
|
}
|
|
})
|
|
|
|
t.Run("CompareDummyRuns", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "compare", "run_abc", "run_def", "--json")
|
|
t.Logf("Compare output: %s", output)
|
|
|
|
var result map[string]any
|
|
if err := json.Unmarshal([]byte(output), &result); err == nil {
|
|
if _, hasA := result["run_a"]; hasA {
|
|
t.Log("Compare returned structured response")
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestFindRunsE2E tests the ml find command end-to-end
|
|
func TestFindRunsE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("FindUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "find", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected find --help to show usage")
|
|
}
|
|
})
|
|
|
|
t.Run("FindByOutcome", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "find", "--outcome", "validates", "--json")
|
|
t.Logf("Find output: %s", output)
|
|
|
|
var result map[string]any
|
|
if err := json.Unmarshal([]byte(output), &result); err == nil {
|
|
t.Log("Find returned JSON response")
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestExportRunE2E tests the ml export command end-to-end
|
|
func TestExportRunE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("ExportUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "export", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected export --help to show usage")
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestRequeueWithChangesE2E tests the ml requeue command with changes
|
|
func TestRequeueWithChangesE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("RequeueUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "requeue", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected requeue --help to show usage")
|
|
}
|
|
})
|
|
|
|
t.Run("RequeueWithOverrides", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "requeue", "abc123", "--lr=0.002", "--json")
|
|
t.Logf("Requeue output: %s", output)
|
|
})
|
|
}
|
|
|
|
// TestOutcomeSetE2E tests the ml outcome set command
|
|
func TestOutcomeSetE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("OutcomeSetUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "outcome", "set", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected outcome set --help to show usage")
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestDatasetVerifyE2E tests the ml dataset verify command
|
|
func TestDatasetVerifyE2E(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
cliPath := e2eCLIPath(t)
|
|
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
|
|
t.Skip("CLI not built - run 'make build' first")
|
|
}
|
|
|
|
t.Run("DatasetVerifyUsage", func(t *testing.T) {
|
|
output, _ := runCLI(t, cliPath, "dataset", "verify", "--help")
|
|
if !contains(output, "Usage") {
|
|
t.Error("expected dataset verify --help to show usage")
|
|
}
|
|
})
|
|
|
|
t.Run("DatasetVerifyTempDir", func(t *testing.T) {
|
|
datasetDir := t.TempDir()
|
|
for i := 0; i < 5; i++ {
|
|
f := filepath.Join(datasetDir, "file.txt")
|
|
os.WriteFile(f, []byte("test data"), 0644)
|
|
}
|
|
|
|
output, _ := runCLI(t, cliPath, "dataset", "verify", datasetDir, "--json")
|
|
t.Logf("Dataset verify output: %s", output)
|
|
|
|
var result map[string]any
|
|
if err := json.Unmarshal([]byte(output), &result); err == nil {
|
|
if result["ok"] == true {
|
|
t.Log("Dataset verify returned ok")
|
|
}
|
|
}
|
|
})
|
|
}
|