fetch_ml/internal/api/helpers/validation_helpers.go

// Package helpers provides validation utilities for WebSocket handlers.
package helpers

import (
	"encoding/hex"
	"os"
	"path/filepath"
	"strings"

	"github.com/jfraeys/fetch_ml/internal/experiment"
	"github.com/jfraeys/fetch_ml/internal/manifest"
	"github.com/jfraeys/fetch_ml/internal/queue"
	"github.com/jfraeys/fetch_ml/internal/storage"
	"github.com/jfraeys/fetch_ml/internal/worker"
)

// ValidateCommitIDFormat validates the commit ID format (40 hex chars)
func ValidateCommitIDFormat(commitID string) (ok bool, errMsg string) {
	if len(commitID) != 40 {
		return false, "invalid commit_id length"
	}
	if _, err := hex.DecodeString(commitID); err != nil {
		return false, "invalid commit_id hex"
	}
	return true, ""
}

// ValidateExperimentManifest validates the experiment manifest integrity
func ValidateExperimentManifest(expMgr *experiment.Manager, commitID string) (ok bool, details string) {
	if err := expMgr.ValidateManifest(commitID); err != nil {
		return false, err.Error()
	}
	return true, ""
}

// ValidateDepsManifest validates the dependency manifest presence and hash
func ValidateDepsManifest(
	expMgr *experiment.Manager,
	commitID string,
) (depName string, check ValidateCheck, errMsgs []string) {
	filesPath := expMgr.GetFilesPath(commitID)
	depName, depErr := worker.SelectDependencyManifest(filesPath)
	if depErr != nil {
		return "", ValidateCheck{OK: false, Details: depErr.Error()}, []string{"deps manifest missing"}
	}

	sha, err := FileSHA256Hex(filepath.Join(filesPath, depName))
	if err != nil {
		return depName, ValidateCheck{OK: false, Details: err.Error()}, []string{"deps manifest hash failed"}
	}
	return depName, ValidateCheck{OK: true, Actual: depName + ":" + sha}, nil
}

// ValidateCheck represents a validation check result
type ValidateCheck struct {
	OK       bool   `json:"ok"`
	Expected string `json:"expected,omitempty"`
	Actual   string `json:"actual,omitempty"`
	Details  string `json:"details,omitempty"`
}

// ValidateReport represents a validation report
type ValidateReport struct {
	OK       bool                     `json:"ok"`
	CommitID string                   `json:"commit_id,omitempty"`
	TaskID   string                   `json:"task_id,omitempty"`
	Checks   map[string]ValidateCheck `json:"checks"`
	Errors   []string                 `json:"errors,omitempty"`
	Warnings []string                 `json:"warnings,omitempty"`
	TS       string                   `json:"ts"`
}

// NewValidateReport creates a new validation report
func NewValidateReport() ValidateReport {
	return ValidateReport{
		OK:     true,
		Checks: map[string]ValidateCheck{},
	}
}

// ShouldRequireRunManifest returns true if run manifest should be required for the given status
func ShouldRequireRunManifest(task *queue.Task) bool {
	if task == nil {
		return false
	}
	s := strings.ToLower(strings.TrimSpace(task.Status))
	switch s {
	case "running", "completed", "failed":
		return true
	default:
		return false
	}
}

// ExpectedRunManifestBucketForStatus returns the expected bucket for a given status
func ExpectedRunManifestBucketForStatus(status string) (string, bool) {
	s := strings.ToLower(strings.TrimSpace(status))
	switch s {
	case "queued", "pending":
		return "pending", true
	case "running":
		return "running", true
	case "completed", "finished":
		return "finished", true
	case "failed":
		return "failed", true
	default:
		return "", false
	}
}

// FindRunManifestDir finds the run manifest directory for a job
func FindRunManifestDir(basePath string, jobName string) (dir string, bucket string, found bool) {
	if strings.TrimSpace(basePath) == "" || strings.TrimSpace(jobName) == "" {
		return "", "", false
	}
	jobPaths := storage.NewJobPaths(basePath)
	typedRoots := []struct {
		bucket string
		root   string
	}{
		{bucket: "running", root: jobPaths.RunningPath()},
		{bucket: "pending", root: jobPaths.PendingPath()},
		{bucket: "finished", root: jobPaths.FinishedPath()},
		{bucket: "failed", root: jobPaths.FailedPath()},
	}
	for _, item := range typedRoots {
		dir := filepath.Join(item.root, jobName)
		if info, err := os.Stat(dir); err == nil && info.IsDir() {
			if _, err := os.Stat(manifest.ManifestPath(dir)); err == nil {
				return dir, item.bucket, true
			}
		}
	}
	return "", "", false
}

// ValidateRunManifestLifecycle validates the run manifest lifecycle fields
func ValidateRunManifestLifecycle(rm *manifest.RunManifest, status string) (ok bool, details string) {
	statusLower := strings.ToLower(strings.TrimSpace(status))

	switch statusLower {
	case "running":
		if rm.StartedAt.IsZero() {
			return false, "missing started_at for running task"
		}
		if !rm.EndedAt.IsZero() {
			return false, "ended_at must be empty for running task"
		}
		if rm.ExitCode != nil {
			return false, "exit_code must be empty for running task"
		}
	case "completed", "failed":
		if rm.StartedAt.IsZero() {
			return false, "missing started_at for completed/failed task"
		}
		if rm.EndedAt.IsZero() {
			return false, "missing ended_at for completed/failed task"
		}
		if rm.ExitCode == nil {
			return false, "missing exit_code for completed/failed task"
		}
		if !rm.StartedAt.IsZero() && !rm.EndedAt.IsZero() && rm.EndedAt.Before(rm.StartedAt) {
			return false, "ended_at is before started_at"
		}
	case "queued", "pending":
		// queued/pending tasks may not have started yet.
		if !rm.EndedAt.IsZero() || rm.ExitCode != nil {
			return false, "queued/pending task should not have ended_at/exit_code"
		}
	}
	return true, ""
}

// ValidateTaskIDMatch validates the task ID in the run manifest matches the expected task
func ValidateTaskIDMatch(rm *manifest.RunManifest, expectedTaskID string) ValidateCheck {
	if strings.TrimSpace(rm.TaskID) == "" {
		return ValidateCheck{OK: false, Expected: expectedTaskID}
	}
	if rm.TaskID != expectedTaskID {
		return ValidateCheck{OK: false, Expected: expectedTaskID, Actual: rm.TaskID}
	}
	return ValidateCheck{OK: true, Expected: expectedTaskID, Actual: rm.TaskID}
}

// ValidateCommitIDMatch validates the commit ID in the run manifest matches the expected commit
func ValidateCommitIDMatch(rmCommitID, expectedCommitID string) ValidateCheck {
	want := strings.TrimSpace(expectedCommitID)
	got := strings.TrimSpace(rmCommitID)
	if want != "" && got != "" && want != got {
		return ValidateCheck{OK: false, Expected: want, Actual: got}
	}
	if want != "" {
		return ValidateCheck{OK: true, Expected: want, Actual: got}
	}
	return ValidateCheck{OK: true}
}

// ValidateDepsProvenance validates the dependency manifest provenance
func ValidateDepsProvenance(wantName, wantSHA, gotName, gotSHA string) ValidateCheck {
	if wantName == "" || wantSHA == "" || gotName == "" || gotSHA == "" {
		return ValidateCheck{OK: true}
	}
	expected := wantName + ":" + wantSHA
	actual := gotName + ":" + gotSHA
	if wantName != gotName || wantSHA != gotSHA {
		return ValidateCheck{OK: false, Expected: expected, Actual: actual}
	}
	return ValidateCheck{OK: true, Expected: expected, Actual: actual}
}

// ValidateSnapshotID validates the snapshot ID in the run manifest
func ValidateSnapshotID(wantID, gotID string) ValidateCheck {
	if wantID == "" || gotID == "" {
		return ValidateCheck{OK: true, Expected: wantID, Actual: gotID}
	}
	if wantID != gotID {
		return ValidateCheck{OK: false, Expected: wantID, Actual: gotID}
	}
	return ValidateCheck{OK: true, Expected: wantID, Actual: gotID}
}

// ValidateSnapshotSHA validates the snapshot SHA in the run manifest
func ValidateSnapshotSHA(wantSHA, gotSHA string) ValidateCheck {
	if wantSHA == "" || gotSHA == "" {
		return ValidateCheck{OK: true, Expected: wantSHA, Actual: gotSHA}
	}
	if wantSHA != gotSHA {
		return ValidateCheck{OK: false, Expected: wantSHA, Actual: gotSHA}
	}
	return ValidateCheck{OK: true, Expected: wantSHA, Actual: gotSHA}
}

// ContainerStat is a function type for stat operations (for mocking in tests)
var ContainerStat = func(path string) (os.FileInfo, error) {
	return os.Stat(path)
}