// Package domain provides core domain types for fetch_ml. // These types have zero internal dependencies and are used across all packages. package domain import ( "time" ) // Task represents an ML experiment task type Task struct { ID string `json:"id"` JobName string `json:"job_name"` Args string `json:"args"` Status string `json:"status"` // queued, running, completed, failed Priority int64 `json:"priority"` CreatedAt time.Time `json:"created_at"` StartedAt *time.Time `json:"started_at,omitempty"` EndedAt *time.Time `json:"ended_at,omitempty"` WorkerID string `json:"worker_id,omitempty"` Error string `json:"error,omitempty"` Output string `json:"output,omitempty"` // SnapshotID references the experiment snapshot (code + deps) for this task. // Currently stores an opaque identifier. Future: verify checksum/digest before execution // to ensure reproducibility and detect tampering. SnapshotID string `json:"snapshot_id,omitempty"` // DatasetSpecs is the preferred structured dataset input and should be authoritative. DatasetSpecs []DatasetSpec `json:"dataset_specs,omitempty"` // Datasets is kept for backward compatibility (legacy callers). Datasets []string `json:"datasets,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` // Resource requests (optional, 0 means unspecified) CPU int `json:"cpu,omitempty"` MemoryGB int `json:"memory_gb,omitempty"` GPU int `json:"gpu,omitempty"` GPUMemory string `json:"gpu_memory,omitempty"` // User ownership and permissions UserID string `json:"user_id"` // User who owns this task Username string `json:"username"` // Username for display CreatedBy string `json:"created_by"` // User who submitted the task // Lease management for task resilience LeaseExpiry *time.Time `json:"lease_expiry,omitempty"` // When task lease expires LeasedBy string `json:"leased_by,omitempty"` // Worker ID holding lease // Retry management RetryCount int `json:"retry_count"` // Number of retry attempts made MaxRetries int `json:"max_retries"` // Maximum retry limit (default 3) LastError string `json:"last_error,omitempty"` // Last error encountered NextRetry *time.Time `json:"next_retry,omitempty"` // When to retry next (exponential backoff) // Attempt tracking - complete history of all execution attempts Attempts []Attempt `json:"attempts,omitempty"` // Optional tracking configuration for this task Tracking *TrackingConfig `json:"tracking,omitempty"` } // Attempt represents a single execution attempt of a task type Attempt struct { Attempt int `json:"attempt"` // Attempt number (1-indexed) StartedAt time.Time `json:"started_at"` // When attempt started EndedAt *time.Time `json:"ended_at,omitempty"` // When attempt ended (if completed) WorkerID string `json:"worker_id,omitempty"` // Which worker ran this attempt Status string `json:"status"` // running, completed, failed FailureClass FailureClass `json:"failure_class,omitempty"` // Failure classification (if failed) ExitCode int `json:"exit_code,omitempty"` // Process exit code Signal string `json:"signal,omitempty"` // Termination signal (if any) Error string `json:"error,omitempty"` // Error message (if failed) LogTail string `json:"log_tail,omitempty"` // Last N lines of log output }