fetch_ml/internal/worker/testutil.go
Jeremie Fraeys a1ce267b86
feat: Implement all worker stub methods with real functionality
- VerifySnapshot: SHA256 verification using integrity package
- EnforceTaskProvenance: Strict and best-effort provenance validation
- RunJupyterTask: Full Jupyter service lifecycle (start/stop/remove/restore/list_packages)
- RunJob: Job execution using executor.JobRunner
- PrewarmNextOnce: Prewarming with queue integration

All methods now use new architecture components instead of placeholders
2026-02-17 17:37:56 -05:00

87 lines
2.1 KiB
Go

package worker
import (
"log/slog"
"strings"
"github.com/jfraeys/fetch_ml/internal/logging"
"github.com/jfraeys/fetch_ml/internal/metrics"
"github.com/jfraeys/fetch_ml/internal/queue"
"github.com/jfraeys/fetch_ml/internal/worker/lifecycle"
)
// NewTestWorker creates a minimal Worker for testing purposes.
// It initializes only the fields needed for unit tests.
func NewTestWorker(cfg *Config) *Worker {
if cfg == nil {
cfg = &Config{}
}
logger := logging.NewLogger(slog.LevelInfo, false)
metricsObj := &metrics.Metrics{}
return &Worker{
id: "test-worker",
config: cfg,
logger: logger,
metrics: metricsObj,
health: lifecycle.NewHealthMonitor(),
}
}
// NewTestWorkerWithQueue creates a test Worker with a queue client.
func NewTestWorkerWithQueue(cfg *Config, queueClient queue.Backend) *Worker {
w := NewTestWorker(cfg)
_ = queueClient
return w
}
// NewTestWorkerWithJupyter creates a test Worker with Jupyter manager.
func NewTestWorkerWithJupyter(cfg *Config, jupyterMgr JupyterManager) *Worker {
w := NewTestWorker(cfg)
w.jupyter = jupyterMgr
return w
}
// ResolveDatasets resolves dataset paths for a task.
// This version matches the test expectations for backwards compatibility.
// Priority: DatasetSpecs > Datasets > Args parsing
func ResolveDatasets(task *queue.Task) []string {
if task == nil {
return nil
}
// Priority 1: DatasetSpecs
if len(task.DatasetSpecs) > 0 {
var paths []string
for _, spec := range task.DatasetSpecs {
paths = append(paths, spec.Name)
}
return paths
}
// Priority 2: Datasets
if len(task.Datasets) > 0 {
return task.Datasets
}
// Priority 3: Parse from Args
if task.Args != "" {
// Simple parsing: --datasets a,b,c or --datasets a b c
args := task.Args
if idx := strings.Index(args, "--datasets"); idx != -1 {
after := args[idx+len("--datasets "):]
after = strings.TrimSpace(after)
// Split by comma or space
if strings.Contains(after, ",") {
return strings.Split(after, ",")
}
parts := strings.Fields(after)
if len(parts) > 0 {
return parts
}
}
}
return nil
}