fetch_ml/cmd/errors/main.go
Jeremie Fraeys 7194826871
feat: implement research-grade maintainability phases 1,3,4,7
Phase 1: Event Sourcing
- Add TaskEvent types (queued, started, completed, failed, etc.)
- Create EventStore with Redis Streams (append-only)
- Support event querying by task ID and time range

Phase 3: Diagnosable Failures
- Enhance TaskExecutionError with Context map, Timestamp, Recoverable flag
- Update container.go to populate error context (image, GPU, duration)
- Add WithContext helper for building error context
- Create cmd/errors CLI for querying task errors

Phase 4: Testable Security
- Add security fields to PodmanConfig (Privileged, Network, ReadOnlyMounts)
- Create ValidateSecurityPolicy() with ErrSecurityViolation
- Add security contract tests (privileged rejection, host network rejection)
- Tests serve as executable security documentation

Phase 7: Reproducible Builds
- Add BuildHash and BuildTime ldflags to Makefile
- Create verify-build target for reproducibility testing
- Add -version and -verify flags to api-server

All tests pass:
- go test ./internal/errtypes/...
- go test ./internal/container/... -run Security
- go test ./internal/queue/...
- go build ./cmd/api-server/...
2026-02-18 15:27:50 -05:00

82 lines
2.3 KiB
Go

// Package main implements the ml errors command for querying task errors
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"time"
"github.com/jfraeys/fetch_ml/internal/errtypes"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "Usage: errors <task_id> [--json]")
fmt.Fprintln(os.Stderr, " task_id: The task ID to query errors for")
fmt.Fprintln(os.Stderr, " --json: Output as JSON instead of formatted text")
os.Exit(1)
}
taskID := os.Args[1]
jsonOutput := len(os.Args) > 2 && os.Args[2] == "--json"
// Determine base path from environment or default
basePath := os.Getenv("FETCH_ML_BASE_PATH")
if basePath == "" {
home, err := os.UserHomeDir()
if err != nil {
fmt.Fprintf(os.Stderr, "Error: failed to get home directory: %v\n", err)
os.Exit(1)
}
basePath = filepath.Join(home, "ml_jobs")
}
// Try to read error file
errorPath := filepath.Join(basePath, "errors", taskID+".json")
data, err := os.ReadFile(errorPath)
if err != nil {
// Error file may not exist - check if task exists in other states
fmt.Fprintf(os.Stderr, "Error: no error record found for task %s\n", taskID)
fmt.Fprintf(os.Stderr, "Expected: %s\n", errorPath)
os.Exit(1)
}
var execErr errtypes.TaskExecutionError
if err := json.Unmarshal(data, &execErr); err != nil {
fmt.Fprintf(os.Stderr, "Error: failed to parse error record: %v\n", err)
os.Exit(1)
}
if jsonOutput {
// Output as pretty-printed JSON
output, err := json.MarshalIndent(execErr, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "Error: failed to format error: %v\n", err)
os.Exit(1)
}
fmt.Println(string(output))
} else {
// Output as formatted text
fmt.Printf("Error Report for Task: %s\n", execErr.TaskID)
fmt.Printf("Job Name: %s\n", execErr.JobName)
fmt.Printf("Phase: %s\n", execErr.Phase)
fmt.Printf("Time: %s\n", execErr.Timestamp.Format(time.RFC3339))
fmt.Printf("Recoverable: %v\n", execErr.Recoverable)
fmt.Println()
if execErr.Message != "" {
fmt.Printf("Message: %s\n", execErr.Message)
}
if execErr.Err != nil {
fmt.Printf("Underlying Error: %v\n", execErr.Err)
}
if len(execErr.Context) > 0 {
fmt.Println()
fmt.Println("Context:")
for key, value := range execErr.Context {
fmt.Printf(" %s: %s\n", key, value)
}
}
}
}