package worker import ( "log/slog" "strings" "time" "github.com/jfraeys/fetch_ml/internal/logging" "github.com/jfraeys/fetch_ml/internal/manifest" "github.com/jfraeys/fetch_ml/internal/metrics" "github.com/jfraeys/fetch_ml/internal/queue" "github.com/jfraeys/fetch_ml/internal/worker/executor" "github.com/jfraeys/fetch_ml/internal/worker/lifecycle" ) // simpleManifestWriter is a basic ManifestWriter implementation for testing type simpleManifestWriter struct{} func (w *simpleManifestWriter) Upsert(dir string, task *queue.Task, mutate func(*manifest.RunManifest)) { // Try to load existing manifest, or create new one m, err := manifest.LoadFromDir(dir) if err != nil { m = w.BuildInitial(task, "") } mutate(m) _ = m.WriteToDir(dir) } func (w *simpleManifestWriter) BuildInitial(task *queue.Task, podmanImage string) *manifest.RunManifest { m := manifest.NewRunManifest( "run-"+task.ID, task.ID, task.JobName, time.Now().UTC(), ) m.CommitID = task.Metadata["commit_id"] m.DepsManifestName = task.Metadata["deps_manifest_name"] return m } // NewTestWorker creates a minimal Worker for testing purposes. // It initializes only the fields needed for unit tests. func NewTestWorker(cfg *Config) *Worker { if cfg == nil { cfg = &Config{} } logger := logging.NewLogger(slog.LevelInfo, false) metricsObj := &metrics.Metrics{} // Create executors and runner for testing writer := &simpleManifestWriter{} localExecutor := executor.NewLocalExecutor(logger, writer) containerExecutor := executor.NewContainerExecutor( logger, nil, executor.ContainerConfig{ PodmanImage: cfg.PodmanImage, BasePath: cfg.BasePath, }, ) jobRunner := executor.NewJobRunner( localExecutor, containerExecutor, writer, logger, ) return &Worker{ id: cfg.WorkerID, config: cfg, logger: logger, metrics: metricsObj, health: lifecycle.NewHealthMonitor(), runner: jobRunner, } } // NewTestWorkerWithQueue creates a test Worker with a queue client. func NewTestWorkerWithQueue(cfg *Config, queueClient queue.Backend) *Worker { w := NewTestWorker(cfg) w.queueClient = queueClient return w } // NewTestWorkerWithJupyter creates a test Worker with Jupyter manager. func NewTestWorkerWithJupyter(cfg *Config, jupyterMgr JupyterManager) *Worker { w := NewTestWorker(cfg) w.jupyter = jupyterMgr return w } // NewTestWorkerWithRunner creates a test Worker with JobRunner initialized. // Note: This creates a minimal runner for testing purposes. func NewTestWorkerWithRunner(cfg *Config) *Worker { w := NewTestWorker(cfg) // Runner will be set by tests that need it return w } // NewTestWorkerWithRunLoop creates a test Worker with RunLoop initialized. // Note: RunLoop requires proper queue client setup. func NewTestWorkerWithRunLoop(cfg *Config, queueClient queue.Backend) *Worker { w := NewTestWorker(cfg) // RunLoop will be set by tests that need it return w } // ResolveDatasets resolves dataset paths for a task. // This version matches the test expectations for backwards compatibility. // Priority: DatasetSpecs > Datasets > Args parsing func ResolveDatasets(task *queue.Task) []string { if task == nil { return nil } // Priority 1: DatasetSpecs if len(task.DatasetSpecs) > 0 { var paths []string for _, spec := range task.DatasetSpecs { paths = append(paths, spec.Name) } return paths } // Priority 2: Datasets if len(task.Datasets) > 0 { return task.Datasets } // Priority 3: Parse from Args if task.Args != "" { // Simple parsing: --datasets a,b,c or --datasets a b c args := task.Args if idx := strings.Index(args, "--datasets"); idx != -1 { after := args[idx+len("--datasets "):] after = strings.TrimSpace(after) // Split by comma or space if strings.Contains(after, ",") { return strings.Split(after, ",") } parts := strings.Fields(after) if len(parts) > 0 { return parts } } } return nil }