fetch_ml/internal/worker/errors/execution.go
Jeremie Fraeys 3fb6902fa1
feat(worker): integrate scheduler endpoints and security hardening
Update worker system for scheduler integration:
- Worker server with scheduler registration
- Configuration with scheduler endpoint support
- Artifact handling with integrity verification
- Container executor with supply chain validation
- Local executor enhancements
- GPU detection improvements (cross-platform)
- Error handling with execution context
- Factory pattern for executor instantiation
- Hash integrity with native library support
2026-02-26 12:06:16 -05:00

106 lines
2.7 KiB
Go

// Package errors provides structured error types for the worker.
package errors
import (
"fmt"
"time"
)
// ExecutionError provides structured error context for task execution failures.
// It captures the task ID, execution phase, specific operation, root cause,
// and additional context to make debugging easier.
type ExecutionError struct {
Timestamp time.Time
Cause error
Context map[string]string
TaskID string
Phase string
Operation string
}
// Error implements the error interface with a formatted message.
func (e ExecutionError) Error() string {
return fmt.Sprintf("[%s/%s] task=%s: %v", e.Phase, e.Operation, e.TaskID, e.Cause)
}
// Unwrap returns the underlying error for error chain inspection.
func (e ExecutionError) Unwrap() error {
return e.Cause
}
// WithContext adds a key-value pair to the error context.
func (e ExecutionError) WithContext(key, value string) ExecutionError {
if e.Context == nil {
e.Context = make(map[string]string)
}
e.Context[key] = value
return e
}
// ContextString returns a formatted string of all context values.
func (e ExecutionError) ContextString() string {
if len(e.Context) == 0 {
return ""
}
result := ""
for k, v := range e.Context {
if result != "" {
result += ", "
}
result += fmt.Sprintf("%s=%s", k, v)
}
return result
}
// NewExecutionError creates a new ExecutionError with the given parameters.
func NewExecutionError(taskID, phase, operation string, cause error) ExecutionError {
return ExecutionError{
TaskID: taskID,
Phase: phase,
Operation: operation,
Cause: cause,
Context: make(map[string]string),
Timestamp: time.Now(),
}
}
// IsExecutionError checks if an error is an ExecutionError.
func IsExecutionError(err error) bool {
_, ok := err.(ExecutionError)
return ok
}
// Common error operations for the worker lifecycle.
const (
// Preparation operations
OpCreateWorkspace = "create_workspace"
OpFetchDataset = "fetch_dataset"
OpMountVolume = "mount_volume"
OpSetupEnvironment = "setup_environment"
OpStageSnapshot = "stage_snapshot"
// Execution operations
OpStartContainer = "start_container"
OpExecuteCommand = "execute_command"
OpMonitorExecution = "monitor_execution"
// Collection operations
OpCollectResults = "collect_results"
OpUploadArtifacts = "upload_artifacts"
OpCleanupWorkspace = "cleanup_workspace"
// Validation operations
OpValidateManifest = "validate_manifest"
OpCheckResources = "check_resources"
OpVerifyProvenance = "verify_provenance"
)
// Common phase names (should match TaskState values).
const (
PhaseQueued = "queued"
PhasePreparing = "preparing"
PhaseRunning = "running"
PhaseCollecting = "collecting"
PhaseCompleted = "completed"
PhaseFailed = "failed"
)