fetch_ml/internal/manifest/validator.go
Jeremie Fraeys 8271277dc3
feat: implement research-grade maintainability phases 2, 5, 8, 10
Phase 2: Deterministic Manifests
- Add manifest.Validator with required field checking
- Support Validate() and ValidateStrict() modes
- Integrate validation into worker executor before execution
- Block execution if manifest missing commit_id or deps_manifest_sha256

Phase 5: Pinned Dependencies
- Add hermetic.dockerfile template with pinned system deps
- Frozen package versions: libblas3, libcudnn8, etc.
- Support for deps_manifest.json and requirements.txt with hashes
- Image tagging strategy: deps-<first-8-of-sha256>

Phase 8: Tests as Specifications
- Add queue_spec_test.go with executable scheduler specs
- Document priority ordering (higher first)
- Document FIFO tiebreaker for same priority
- Test cases for negative/zero priorities

Phase 10: Local Dev Parity
- Create root-level docker-compose.dev.yml
- Simplified from deployments/ for quick local dev
- Redis + API server + Worker with hot reload volumes
- Debug ports: 9101 (API), 6379 (Redis)
2026-02-18 15:34:28 -05:00

159 lines
3.8 KiB
Go

package manifest
import (
"errors"
"fmt"
)
// ErrIncompleteManifest is returned when a required manifest field is missing.
var ErrIncompleteManifest = errors.New("incomplete manifest")
// Validator validates that a RunManifest is complete before execution.
type Validator struct {
requiredFields []string
}
// NewValidator creates a new manifest validator with default required fields.
func NewValidator() *Validator {
return &Validator{
requiredFields: []string{
"commit_id",
"deps_manifest_sha256",
},
}
}
// NewValidatorWithFields creates a validator with custom required fields.
func NewValidatorWithFields(fields []string) *Validator {
return &Validator{
requiredFields: fields,
}
}
// ValidationError contains details about a validation failure.
type ValidationError struct {
Field string `json:"field"`
Message string `json:"message"`
}
// Error returns the error string.
func (e ValidationError) Error() string {
return fmt.Sprintf("validation error for field '%s': %s", e.Field, e.Message)
}
// Validate checks that the manifest has all required fields.
// Returns an error listing all missing fields.
func (v *Validator) Validate(m *RunManifest) error {
if m == nil {
return fmt.Errorf("manifest is nil: %w", ErrIncompleteManifest)
}
var validationErrors []ValidationError
for _, field := range v.requiredFields {
if err := v.validateField(m, field); err != nil {
validationErrors = append(validationErrors, *err)
}
}
if len(validationErrors) > 0 {
// Build comprehensive error message
msg := "manifest validation failed:\n"
for _, err := range validationErrors {
msg += fmt.Sprintf(" - %s\n", err.Error())
}
return fmt.Errorf("%s: %w", msg, ErrIncompleteManifest)
}
return nil
}
// ValidateStrict fails if ANY optional fields commonly used for provenance are missing.
// This is for high-assurance environments.
func (v *Validator) ValidateStrict(m *RunManifest) error {
if err := v.Validate(m); err != nil {
return err
}
// Additional strict checks
var strictErrors []ValidationError
if m.WorkerVersion == "" {
strictErrors = append(strictErrors, ValidationError{
Field: "worker_version",
Message: "required for strict provenance",
})
}
if m.PodmanImage == "" {
strictErrors = append(strictErrors, ValidationError{
Field: "podman_image",
Message: "required for strict provenance",
})
}
if len(strictErrors) > 0 {
msg := "strict manifest validation failed:\n"
for _, err := range strictErrors {
msg += fmt.Sprintf(" - %s\n", err.Error())
}
return fmt.Errorf("%s: %w", msg, ErrIncompleteManifest)
}
return nil
}
// validateField checks a single required field.
func (v *Validator) validateField(m *RunManifest, field string) *ValidationError {
switch field {
case "commit_id":
if m.CommitID == "" {
return &ValidationError{
Field: field,
Message: "commit_id is required for code provenance",
}
}
case "deps_manifest_sha256":
if m.DepsManifestSHA == "" {
return &ValidationError{
Field: field,
Message: "deps_manifest_sha256 is required for dependency provenance",
}
}
case "run_id":
if m.RunID == "" {
return &ValidationError{
Field: field,
Message: "run_id is required",
}
}
case "task_id":
if m.TaskID == "" {
return &ValidationError{
Field: field,
Message: "task_id is required",
}
}
case "job_name":
if m.JobName == "" {
return &ValidationError{
Field: field,
Message: "job_name is required",
}
}
case "snapshot_sha256":
if m.SnapshotID != "" && m.SnapshotSHA256 == "" {
return &ValidationError{
Field: field,
Message: "snapshot_sha256 is required when snapshot_id is provided",
}
}
}
return nil
}
// IsValidationError checks if an error is a manifest validation error.
func IsValidationError(err error) bool {
return errors.Is(err, ErrIncompleteManifest)
}