fetch_ml/tests/e2e/phase2_features_test.go
Jeremie Fraeys 260e18499e
feat: Research features - narrative fields and outcome tracking
Add comprehensive research context tracking to jobs:
- Narrative fields: hypothesis, context, intent, expected_outcome
- Experiment groups and tags for organization
- Run comparison (compare command) for diff analysis
- Run search (find command) with criteria filtering
- Run export (export command) for data portability
- Outcome setting (outcome command) for experiment validation

Update queue and requeue commands to support narrative fields.
Add narrative validation to manifest validator.
Add WebSocket handlers for compare, find, export, and outcome operations.

Includes E2E tests for phase 2 features.
2026-02-18 21:27:05 -05:00

175 lines
4.5 KiB
Go

package tests
import (
"encoding/json"
"os"
"os/exec"
"path/filepath"
"testing"
)
// runCLI runs the CLI with given arguments and returns output
func runCLI(t *testing.T, cliPath string, args ...string) (string, error) {
t.Helper()
cmd := exec.Command(cliPath, args...)
cmd.Dir = t.TempDir()
output, err := cmd.CombinedOutput()
return string(output), err
}
// contains checks if string contains substring
func contains(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
// TestCompareRunsE2E tests the ml compare command end-to-end
func TestCompareRunsE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("CompareUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "compare", "--help")
if !contains(output, "Usage") {
t.Error("expected compare --help to show usage")
}
})
t.Run("CompareDummyRuns", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "compare", "run_abc", "run_def", "--json")
t.Logf("Compare output: %s", output)
var result map[string]any
if err := json.Unmarshal([]byte(output), &result); err == nil {
if _, hasA := result["run_a"]; hasA {
t.Log("Compare returned structured response")
}
}
})
}
// TestFindRunsE2E tests the ml find command end-to-end
func TestFindRunsE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("FindUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "find", "--help")
if !contains(output, "Usage") {
t.Error("expected find --help to show usage")
}
})
t.Run("FindByOutcome", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "find", "--outcome", "validates", "--json")
t.Logf("Find output: %s", output)
var result map[string]any
if err := json.Unmarshal([]byte(output), &result); err == nil {
t.Log("Find returned JSON response")
}
})
}
// TestExportRunE2E tests the ml export command end-to-end
func TestExportRunE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("ExportUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "export", "--help")
if !contains(output, "Usage") {
t.Error("expected export --help to show usage")
}
})
}
// TestRequeueWithChangesE2E tests the ml requeue command with changes
func TestRequeueWithChangesE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("RequeueUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "requeue", "--help")
if !contains(output, "Usage") {
t.Error("expected requeue --help to show usage")
}
})
t.Run("RequeueWithOverrides", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "requeue", "abc123", "--lr=0.002", "--json")
t.Logf("Requeue output: %s", output)
})
}
// TestOutcomeSetE2E tests the ml outcome set command
func TestOutcomeSetE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("OutcomeSetUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "outcome", "set", "--help")
if !contains(output, "Usage") {
t.Error("expected outcome set --help to show usage")
}
})
}
// TestDatasetVerifyE2E tests the ml dataset verify command
func TestDatasetVerifyE2E(t *testing.T) {
t.Parallel()
cliPath := e2eCLIPath(t)
if _, err := os.Stat(cliPath); os.IsNotExist(err) {
t.Skip("CLI not built - run 'make build' first")
}
t.Run("DatasetVerifyUsage", func(t *testing.T) {
output, _ := runCLI(t, cliPath, "dataset", "verify", "--help")
if !contains(output, "Usage") {
t.Error("expected dataset verify --help to show usage")
}
})
t.Run("DatasetVerifyTempDir", func(t *testing.T) {
datasetDir := t.TempDir()
for i := 0; i < 5; i++ {
f := filepath.Join(datasetDir, "file.txt")
os.WriteFile(f, []byte("test data"), 0644)
}
output, _ := runCLI(t, cliPath, "dataset", "verify", datasetDir, "--json")
t.Logf("Dataset verify output: %s", output)
var result map[string]any
if err := json.Unmarshal([]byte(output), &result); err == nil {
if result["ok"] == true {
t.Log("Dataset verify returned ok")
}
}
})
}