package manifest import ( "errors" "fmt" "strings" "github.com/jfraeys/fetch_ml/internal/privacy" ) // ErrIncompleteManifest is returned when a required manifest field is missing. var ErrIncompleteManifest = errors.New("incomplete manifest") // Validator validates that a RunManifest is complete before execution. type Validator struct { requiredFields []string } // NewValidator creates a new manifest validator with default required fields. func NewValidator() *Validator { return &Validator{ requiredFields: []string{ "commit_id", "deps_manifest_sha256", }, } } // NewValidatorWithFields creates a validator with custom required fields. func NewValidatorWithFields(fields []string) *Validator { return &Validator{ requiredFields: fields, } } // ValidationError contains details about a validation failure. type ValidationError struct { Field string `json:"field"` Message string `json:"message"` } // Error returns the error string. func (e ValidationError) Error() string { return fmt.Sprintf("validation error for field '%s': %s", e.Field, e.Message) } // Validate checks that the manifest has all required fields. // Returns an error listing all missing fields. func (v *Validator) Validate(m *RunManifest) error { if m == nil { return fmt.Errorf("manifest is nil: %w", ErrIncompleteManifest) } var validationErrors []ValidationError for _, field := range v.requiredFields { if err := v.validateField(m, field); err != nil { validationErrors = append(validationErrors, *err) } } if len(validationErrors) > 0 { // Build comprehensive error message msg := "manifest validation failed:\n" for _, err := range validationErrors { msg += fmt.Sprintf(" - %s\n", err.Error()) } return fmt.Errorf("%s: %w", msg, ErrIncompleteManifest) } return nil } // ValidateStrict fails if ANY optional fields commonly used for provenance are missing. // This is for high-assurance environments. func (v *Validator) ValidateStrict(m *RunManifest) error { if err := v.Validate(m); err != nil { return err } // Additional strict checks var strictErrors []ValidationError if m.WorkerVersion == "" { strictErrors = append(strictErrors, ValidationError{ Field: "worker_version", Message: "required for strict provenance", }) } if m.PodmanImage == "" { strictErrors = append(strictErrors, ValidationError{ Field: "podman_image", Message: "required for strict provenance", }) } if len(strictErrors) > 0 { msg := "strict manifest validation failed:\n" for _, err := range strictErrors { msg += fmt.Sprintf(" - %s\n", err.Error()) } return fmt.Errorf("%s: %w", msg, ErrIncompleteManifest) } return nil } // validateField checks a single required field. func (v *Validator) validateField(m *RunManifest, field string) *ValidationError { switch field { case "commit_id": if m.CommitID == "" { return &ValidationError{ Field: field, Message: "commit_id is required for code provenance", } } case "deps_manifest_sha256": if m.DepsManifestSHA == "" { return &ValidationError{ Field: field, Message: "deps_manifest_sha256 is required for dependency provenance", } } case "run_id": if m.RunID == "" { return &ValidationError{ Field: field, Message: "run_id is required", } } case "task_id": if m.TaskID == "" { return &ValidationError{ Field: field, Message: "task_id is required", } } case "job_name": if m.JobName == "" { return &ValidationError{ Field: field, Message: "job_name is required", } } case "snapshot_sha256": if m.SnapshotID != "" && m.SnapshotSHA256 == "" { return &ValidationError{ Field: field, Message: "snapshot_sha256 is required when snapshot_id is provided", } } } return nil } // IsValidationError checks if an error is a manifest validation error. func IsValidationError(err error) bool { return errors.Is(err, ErrIncompleteManifest) } // NarrativeValidation contains validation results. type NarrativeValidation struct { Warnings []string `json:"warnings,omitempty"` Errors []string `json:"errors,omitempty"` PIIFindings []privacy.PIIFinding `json:"pii_findings,omitempty"` } // OutcomeValidation contains validation results. type OutcomeValidation struct { Warnings []string `json:"warnings,omitempty"` Errors []string `json:"errors,omitempty"` } // Valid outcome statuses. var ValidOutcomeStatuses = []string{ "validated", "invalidated", "inconclusive", "partial", "", } // isValidOutcomeStatus checks if status is valid. func isValidOutcomeStatus(status string) bool { for _, s := range ValidOutcomeStatuses { if s == status { return true } } return false } // ValidateNarrative validates a Narrative struct. func ValidateNarrative(n *Narrative) NarrativeValidation { result := NarrativeValidation{ Warnings: make([]string, 0), Errors: make([]string, 0), } if n == nil { return result } // Validate hypothesis length if len(n.Hypothesis) > 5000 { result.Errors = append(result.Errors, "hypothesis exceeds 5000 characters") } else if len(n.Hypothesis) > 1000 { result.Warnings = append(result.Warnings, "hypothesis is very long (>1000 chars)") } // Validate context length if len(n.Context) > 10000 { result.Errors = append(result.Errors, "context exceeds 10000 characters") } // Validate tags count if len(n.Tags) > 50 { result.Errors = append(result.Errors, "too many tags (max 50)") } else if len(n.Tags) > 20 { result.Warnings = append(result.Warnings, "many tags (>20)") } // Validate tag lengths for i, tag := range n.Tags { if len(tag) > 50 { result.Errors = append(result.Errors, fmt.Sprintf("tag %d exceeds 50 characters", i)) } if strings.ContainsAny(tag, ",;|/\\") { result.Warnings = append(result.Warnings, fmt.Sprintf("tag %d contains special characters", i)) } } // Check for PII in text fields fields := map[string]string{ "hypothesis": n.Hypothesis, "context": n.Context, "intent": n.Intent, } for fieldName, text := range fields { if findings := privacy.DetectPII(text); len(findings) > 0 { result.PIIFindings = append(result.PIIFindings, findings...) result.Warnings = append(result.Warnings, fmt.Sprintf("potential PII detected in %s field", fieldName)) } } return result } // ValidateOutcome validates an Outcome struct. func ValidateOutcome(o *Outcome) OutcomeValidation { result := OutcomeValidation{ Warnings: make([]string, 0), Errors: make([]string, 0), } if o == nil { return result } // Validate status if !isValidOutcomeStatus(o.Status) { result.Errors = append(result.Errors, fmt.Sprintf("invalid status: %s (must be validated, invalidated, inconclusive, partial, or empty)", o.Status)) } // Validate summary length if len(o.Summary) > 1000 { result.Errors = append(result.Errors, "summary exceeds 1000 characters") } else if len(o.Summary) > 200 { result.Warnings = append(result.Warnings, "summary is long (>200 chars)") } // Validate key learnings count if len(o.KeyLearnings) > 5 { result.Errors = append(result.Errors, "too many key learnings (max 5)") } // Validate key learning lengths for i, learning := range o.KeyLearnings { if len(learning) > 500 { result.Errors = append(result.Errors, fmt.Sprintf("key learning %d exceeds 500 characters", i)) } } // Validate follow-up runs references if len(o.FollowUpRuns) > 10 { result.Warnings = append(result.Warnings, "many follow-up runs (>10)") } // Check for PII in text fields if findings := privacy.DetectPII(o.Summary); len(findings) > 0 { result.Warnings = append(result.Warnings, "potential PII detected in summary") } for i, learning := range o.KeyLearnings { if findings := privacy.DetectPII(learning); len(findings) > 0 { result.Warnings = append(result.Warnings, fmt.Sprintf("potential PII detected in key learning %d", i)) } } return result }