fetch_ml/internal/api/helpers/validation_helpers.go
Jeremie Fraeys b05470b30a
refactor: improve API structure and WebSocket protocol
- Extract WebSocket protocol handling to dedicated module
- Add helper functions for DB operations, validation, and responses
- Improve WebSocket frame handling and opcodes
- Refactor dataset, job, and Jupyter handlers
- Add duplicate detection processing
2026-02-16 20:38:12 -05:00

237 lines
7.8 KiB
Go

// Package helpers provides validation utilities for WebSocket handlers.
package helpers
import (
"encoding/hex"
"os"
"path/filepath"
"strings"
"github.com/jfraeys/fetch_ml/internal/config"
"github.com/jfraeys/fetch_ml/internal/experiment"
"github.com/jfraeys/fetch_ml/internal/manifest"
"github.com/jfraeys/fetch_ml/internal/queue"
"github.com/jfraeys/fetch_ml/internal/worker"
)
// ValidateCommitIDFormat validates the commit ID format (40 hex chars)
func ValidateCommitIDFormat(commitID string) (ok bool, errMsg string) {
if len(commitID) != 40 {
return false, "invalid commit_id length"
}
if _, err := hex.DecodeString(commitID); err != nil {
return false, "invalid commit_id hex"
}
return true, ""
}
// ValidateExperimentManifest validates the experiment manifest integrity
func ValidateExperimentManifest(expMgr *experiment.Manager, commitID string) (ok bool, details string) {
if err := expMgr.ValidateManifest(commitID); err != nil {
return false, err.Error()
}
return true, ""
}
// ValidateDepsManifest validates the dependency manifest presence and hash
func ValidateDepsManifest(
expMgr *experiment.Manager,
commitID string,
) (depName string, check ValidateCheck, errMsgs []string) {
filesPath := expMgr.GetFilesPath(commitID)
depName, depErr := worker.SelectDependencyManifest(filesPath)
if depErr != nil {
return "", ValidateCheck{OK: false, Details: depErr.Error()}, []string{"deps manifest missing"}
}
sha, err := FileSHA256Hex(filepath.Join(filesPath, depName))
if err != nil {
return depName, ValidateCheck{OK: false, Details: err.Error()}, []string{"deps manifest hash failed"}
}
return depName, ValidateCheck{OK: true, Actual: depName + ":" + sha}, nil
}
// ValidateCheck represents a validation check result
type ValidateCheck struct {
OK bool `json:"ok"`
Expected string `json:"expected,omitempty"`
Actual string `json:"actual,omitempty"`
Details string `json:"details,omitempty"`
}
// ValidateReport represents a validation report
type ValidateReport struct {
OK bool `json:"ok"`
CommitID string `json:"commit_id,omitempty"`
TaskID string `json:"task_id,omitempty"`
Checks map[string]ValidateCheck `json:"checks"`
Errors []string `json:"errors,omitempty"`
Warnings []string `json:"warnings,omitempty"`
TS string `json:"ts"`
}
// NewValidateReport creates a new validation report
func NewValidateReport() ValidateReport {
return ValidateReport{
OK: true,
Checks: map[string]ValidateCheck{},
}
}
// ShouldRequireRunManifest returns true if run manifest should be required for the given status
func ShouldRequireRunManifest(task *queue.Task) bool {
if task == nil {
return false
}
s := strings.ToLower(strings.TrimSpace(task.Status))
switch s {
case "running", "completed", "failed":
return true
default:
return false
}
}
// ExpectedRunManifestBucketForStatus returns the expected bucket for a given status
func ExpectedRunManifestBucketForStatus(status string) (string, bool) {
s := strings.ToLower(strings.TrimSpace(status))
switch s {
case "queued", "pending":
return "pending", true
case "running":
return "running", true
case "completed", "finished":
return "finished", true
case "failed":
return "failed", true
default:
return "", false
}
}
// FindRunManifestDir finds the run manifest directory for a job
func FindRunManifestDir(basePath string, jobName string) (dir string, bucket string, found bool) {
if strings.TrimSpace(basePath) == "" || strings.TrimSpace(jobName) == "" {
return "", "", false
}
jobPaths := config.NewJobPaths(basePath)
typedRoots := []struct {
bucket string
root string
}{
{bucket: "running", root: jobPaths.RunningPath()},
{bucket: "pending", root: jobPaths.PendingPath()},
{bucket: "finished", root: jobPaths.FinishedPath()},
{bucket: "failed", root: jobPaths.FailedPath()},
}
for _, item := range typedRoots {
dir := filepath.Join(item.root, jobName)
if info, err := os.Stat(dir); err == nil && info.IsDir() {
if _, err := os.Stat(manifest.ManifestPath(dir)); err == nil {
return dir, item.bucket, true
}
}
}
return "", "", false
}
// ValidateRunManifestLifecycle validates the run manifest lifecycle fields
func ValidateRunManifestLifecycle(rm *manifest.RunManifest, status string) (ok bool, details string) {
statusLower := strings.ToLower(strings.TrimSpace(status))
switch statusLower {
case "running":
if rm.StartedAt.IsZero() {
return false, "missing started_at for running task"
}
if !rm.EndedAt.IsZero() {
return false, "ended_at must be empty for running task"
}
if rm.ExitCode != nil {
return false, "exit_code must be empty for running task"
}
case "completed", "failed":
if rm.StartedAt.IsZero() {
return false, "missing started_at for completed/failed task"
}
if rm.EndedAt.IsZero() {
return false, "missing ended_at for completed/failed task"
}
if rm.ExitCode == nil {
return false, "missing exit_code for completed/failed task"
}
if !rm.StartedAt.IsZero() && !rm.EndedAt.IsZero() && rm.EndedAt.Before(rm.StartedAt) {
return false, "ended_at is before started_at"
}
case "queued", "pending":
// queued/pending tasks may not have started yet.
if !rm.EndedAt.IsZero() || rm.ExitCode != nil {
return false, "queued/pending task should not have ended_at/exit_code"
}
}
return true, ""
}
// ValidateTaskIDMatch validates the task ID in the run manifest matches the expected task
func ValidateTaskIDMatch(rm *manifest.RunManifest, expectedTaskID string) ValidateCheck {
if strings.TrimSpace(rm.TaskID) == "" {
return ValidateCheck{OK: false, Expected: expectedTaskID}
}
if rm.TaskID != expectedTaskID {
return ValidateCheck{OK: false, Expected: expectedTaskID, Actual: rm.TaskID}
}
return ValidateCheck{OK: true, Expected: expectedTaskID, Actual: rm.TaskID}
}
// ValidateCommitIDMatch validates the commit ID in the run manifest matches the expected commit
func ValidateCommitIDMatch(rmCommitID, expectedCommitID string) ValidateCheck {
want := strings.TrimSpace(expectedCommitID)
got := strings.TrimSpace(rmCommitID)
if want != "" && got != "" && want != got {
return ValidateCheck{OK: false, Expected: want, Actual: got}
}
if want != "" {
return ValidateCheck{OK: true, Expected: want, Actual: got}
}
return ValidateCheck{OK: true}
}
// ValidateDepsProvenance validates the dependency manifest provenance
func ValidateDepsProvenance(wantName, wantSHA, gotName, gotSHA string) ValidateCheck {
if wantName == "" || wantSHA == "" || gotName == "" || gotSHA == "" {
return ValidateCheck{OK: true}
}
expected := wantName + ":" + wantSHA
actual := gotName + ":" + gotSHA
if wantName != gotName || wantSHA != gotSHA {
return ValidateCheck{OK: false, Expected: expected, Actual: actual}
}
return ValidateCheck{OK: true, Expected: expected, Actual: actual}
}
// ValidateSnapshotID validates the snapshot ID in the run manifest
func ValidateSnapshotID(wantID, gotID string) ValidateCheck {
if wantID == "" || gotID == "" {
return ValidateCheck{OK: true, Expected: wantID, Actual: gotID}
}
if wantID != gotID {
return ValidateCheck{OK: false, Expected: wantID, Actual: gotID}
}
return ValidateCheck{OK: true, Expected: wantID, Actual: gotID}
}
// ValidateSnapshotSHA validates the snapshot SHA in the run manifest
func ValidateSnapshotSHA(wantSHA, gotSHA string) ValidateCheck {
if wantSHA == "" || gotSHA == "" {
return ValidateCheck{OK: true, Expected: wantSHA, Actual: gotSHA}
}
if wantSHA != gotSHA {
return ValidateCheck{OK: false, Expected: wantSHA, Actual: gotSHA}
}
return ValidateCheck{OK: true, Expected: wantSHA, Actual: gotSHA}
}
// ContainerStat is a function type for stat operations (for mocking in tests)
var ContainerStat = func(path string) (os.FileInfo, error) {
return os.Stat(path)
}