package queue import ( "time" "github.com/jfraeys/fetch_ml/internal/config" ) // DatasetSpec describes a dataset input with optional provenance fields. type DatasetSpec struct { Name string `json:"name"` Version string `json:"version,omitempty"` Checksum string `json:"checksum,omitempty"` URI string `json:"uri,omitempty"` } // Task represents an ML experiment task type Task struct { ID string `json:"id"` JobName string `json:"job_name"` Args string `json:"args"` Status string `json:"status"` // queued, running, completed, failed Priority int64 `json:"priority"` CreatedAt time.Time `json:"created_at"` StartedAt *time.Time `json:"started_at,omitempty"` EndedAt *time.Time `json:"ended_at,omitempty"` WorkerID string `json:"worker_id,omitempty"` Error string `json:"error,omitempty"` Output string `json:"output,omitempty"` // TODO(phase1): SnapshotID is an opaque identifier only. // TODO(phase2): Resolve SnapshotID and verify its checksum/digest before execution. SnapshotID string `json:"snapshot_id,omitempty"` // DatasetSpecs is the preferred structured dataset input and should be authoritative. DatasetSpecs []DatasetSpec `json:"dataset_specs,omitempty"` // Datasets is kept for backward compatibility (legacy callers). Datasets []string `json:"datasets,omitempty"` Metadata map[string]string `json:"metadata,omitempty"` // Resource requests (optional, 0 means unspecified) CPU int `json:"cpu,omitempty"` MemoryGB int `json:"memory_gb,omitempty"` GPU int `json:"gpu,omitempty"` GPUMemory string `json:"gpu_memory,omitempty"` // User ownership and permissions UserID string `json:"user_id"` // User who owns this task Username string `json:"username"` // Username for display CreatedBy string `json:"created_by"` // User who submitted the task // Lease management for task resilience LeaseExpiry *time.Time `json:"lease_expiry,omitempty"` // When task lease expires LeasedBy string `json:"leased_by,omitempty"` // Worker ID holding lease // Retry management RetryCount int `json:"retry_count"` // Number of retry attempts made MaxRetries int `json:"max_retries"` // Maximum retry limit (default 3) LastError string `json:"last_error,omitempty"` // Last error encountered NextRetry *time.Time `json:"next_retry,omitempty"` // When to retry next (exponential backoff) // Optional tracking configuration for this task Tracking *TrackingConfig `json:"tracking,omitempty"` } // TrackingConfig specifies experiment tracking tools to enable for a task. type TrackingConfig struct { MLflow *MLflowTrackingConfig `json:"mlflow,omitempty"` TensorBoard *TensorBoardTrackingConfig `json:"tensorboard,omitempty"` Wandb *WandbTrackingConfig `json:"wandb,omitempty"` } // MLflowTrackingConfig controls MLflow integration. type MLflowTrackingConfig struct { Enabled bool `json:"enabled"` Mode string `json:"mode,omitempty"` // "sidecar" | "remote" | "disabled" TrackingURI string `json:"tracking_uri,omitempty"` // Explicit tracking URI for remote mode } // TensorBoardTrackingConfig controls TensorBoard integration. type TensorBoardTrackingConfig struct { Enabled bool `json:"enabled"` Mode string `json:"mode,omitempty"` // "sidecar" | "disabled" } // WandbTrackingConfig controls Weights & Biases integration. type WandbTrackingConfig struct { Enabled bool `json:"enabled"` Mode string `json:"mode,omitempty"` // "remote" | "disabled" APIKey string `json:"api_key,omitempty"` Project string `json:"project,omitempty"` Entity string `json:"entity,omitempty"` } // Redis key constants var ( TaskQueueKey = config.RedisTaskQueueKey TaskPrefix = config.RedisTaskPrefix TaskStatusPrefix = config.RedisTaskStatusPrefix WorkerHeartbeat = config.RedisWorkerHeartbeat WorkerPrewarmKey = config.RedisWorkerPrewarmKey JobMetricsPrefix = config.RedisJobMetricsPrefix )