From 6b2c377680b173f59ac413cee65c8587e539aa3b Mon Sep 17 00:00:00 2001
From: Jeremie Fraeys <jfaeys@gmail.com>
Date: Thu, 26 Feb 2026 12:06:35 -0500
Subject: [PATCH] refactor(jupyter): enhance security and scheduler integration

Update Jupyter integration for security and scheduler support:
- Enhanced security configuration with audit logging
- Health monitoring with scheduler event integration
- Package manager with network policy enforcement
- Service manager with lifecycle hooks
- Network manager with tenant isolation
- Workspace metadata with tenant tags
- Config with resource limits
- Podman container integration improvements
- Experiment manager with tracking integration
- Manifest runner with security checks
---
 internal/container/podman.go           | 144 +++++++-------------
 internal/experiment/manager.go         |   5 +-
 internal/jupyter/config.go             |  37 ++---
 internal/jupyter/health_monitor.go     |  14 +-
 internal/jupyter/network_manager.go    |   4 +-
 internal/jupyter/package_manager.go    |  37 +++--
 internal/jupyter/security_enhanced.go  |  89 ++++++------
 internal/jupyter/service_manager.go    |  67 +++++----
 internal/jupyter/workspace_manager.go  |  12 +-
 internal/jupyter/workspace_metadata.go |  25 ++--
 internal/manifest/run_manifest.go      | 181 +++++++++++++++++--------
 internal/manifest/schema_version.go    |   2 +-
 12 files changed, 319 insertions(+), 298 deletions(-)

diff --git a/internal/container/podman.go b/internal/container/podman.go
index ae1f71f..1128762 100644
--- a/internal/container/podman.go
+++ b/internal/container/podman.go
@@ -306,20 +306,20 @@ func (pm *PodmanManager) ExecContainer(ctx context.Context, containerID string,
 
 // PodmanConfig holds configuration for Podman container execution
 type PodmanConfig struct {
-	Image              string
-	Workspace          string
-	Results            string
-	ContainerWorkspace string
-	ContainerResults   string
-	AppleGPU           bool
-	GPUDevices         []string
 	Env                map[string]string
 	Volumes            map[string]string
 	Memory             string
+	ContainerWorkspace string
+	ContainerResults   string
+	Results            string
+	Workspace          string
+	Image              string
 	CPUs               string
-	Privileged         bool   // Security: must be false
-	Network            string // Security: must not be "host"
-	ReadOnlyMounts     bool   // Security: true for dataset mounts
+	Network            string
+	GPUDevices         []string
+	AppleGPU           bool
+	Privileged         bool
+	ReadOnlyMounts     bool
 }
 
 // PodmanResourceOverrides converts per-task resource requests into Podman-compatible
@@ -338,15 +338,22 @@ func PodmanResourceOverrides(cpu int, memoryGB int) (cpus string, memory string)
 
 // PodmanSecurityConfig holds security configuration for Podman containers
 type PodmanSecurityConfig struct {
-	NoNewPrivileges bool
-	DropAllCaps     bool
+	SeccompProfile  string
+	NetworkMode     string
 	AllowedCaps     []string
-	UserNS          bool
 	RunAsUID        int
 	RunAsGID        int
-	SeccompProfile  string
+	NoNewPrivileges bool
+	DropAllCaps     bool
+	UserNS          bool
 	ReadOnlyRoot    bool
-	NetworkMode     string
+	// Process Isolation
+	MaxProcesses int
+	MaxOpenFiles int
+	DisableSwap  bool
+	OOMScoreAdj  int
+	TaskUID      int
+	TaskGID      int
 }
 
 // BuildSecurityArgs builds security-related podman arguments from PodmanSecurityConfig
@@ -395,6 +402,27 @@ func BuildSecurityArgs(sandbox PodmanSecurityConfig) []string {
 	}
 	args = append(args, "--network", networkMode)
 
+	// Process Isolation
+	// Fork bomb protection - limit number of processes
+	if sandbox.MaxProcesses > 0 {
+		args = append(args, "--pids-limit", strconv.Itoa(sandbox.MaxProcesses))
+	}
+
+	// File descriptor limits
+	if sandbox.MaxOpenFiles > 0 {
+		args = append(args, "--ulimit", fmt.Sprintf("nofile=%d:%d", sandbox.MaxOpenFiles, sandbox.MaxOpenFiles))
+	}
+
+	// OOM killer score adjustment (lower = less likely to be killed)
+	if sandbox.OOMScoreAdj != 0 {
+		args = append(args, "--oom-score-adj", strconv.Itoa(sandbox.OOMScoreAdj))
+	}
+
+	// Disable swap (memory-swap equals memory means no swap)
+	if sandbox.DisableSwap {
+		args = append(args, "--memory-swap=0")
+	}
+
 	return args
 }
 
@@ -488,84 +516,6 @@ func BuildPodmanCommand(
 	return exec.CommandContext(ctx, "podman", args...)
 }
 
-// BuildPodmanCommandLegacy builds a Podman command using legacy security settings
-// Deprecated: Use BuildPodmanCommand with SandboxConfig instead
-func BuildPodmanCommandLegacy(
-	ctx context.Context,
-	cfg PodmanConfig,
-	scriptPath, depsPath string,
-	extraArgs []string,
-) *exec.Cmd {
-	args := []string{
-		"run", "--rm",
-		"--security-opt", "no-new-privileges",
-		"--cap-drop", "ALL",
-	}
-
-	// Add network mode if specified
-	if cfg.Network != "" {
-		args = append(args, "--network", cfg.Network)
-	}
-
-	// Add read-only root filesystem
-	if cfg.ReadOnlyMounts {
-		args = append(args, "--read-only")
-	}
-
-	if cfg.Memory != "" {
-		args = append(args, "--memory", cfg.Memory)
-	} else {
-		args = append(args, "--memory", config.DefaultPodmanMemory)
-	}
-
-	if cfg.CPUs != "" {
-		args = append(args, "--cpus", cfg.CPUs)
-	} else {
-		args = append(args, "--cpus", config.DefaultPodmanCPUs)
-	}
-
-	args = append(args, "--userns", "keep-id")
-
-	// Mount workspace
-	workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
-	args = append(args, "-v", workspaceMount)
-
-	// Mount results
-	resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
-	args = append(args, "-v", resultsMount)
-
-	// Mount additional volumes
-	for hostPath, containerPath := range cfg.Volumes {
-		mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
-		args = append(args, "-v", mount)
-	}
-
-	// Use injected GPU device paths for Apple GPU or custom configurations
-	for _, device := range cfg.GPUDevices {
-		args = append(args, "--device", device)
-	}
-
-	// Add environment variables
-	for key, value := range cfg.Env {
-		args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
-	}
-
-	// Image and command
-	args = append(args, cfg.Image,
-		"--workspace", cfg.ContainerWorkspace,
-		"--deps", depsPath,
-		"--script", scriptPath,
-	)
-
-	// Add extra arguments via --args flag
-	if len(extraArgs) > 0 {
-		args = append(args, "--args")
-		args = append(args, extraArgs...)
-	}
-
-	return exec.CommandContext(ctx, "podman", args...)
-}
-
 // ValidateSecurityPolicy validates that the container configuration meets security requirements.
 // Returns an error if the configuration violates security policies.
 func ValidateSecurityPolicy(cfg PodmanConfig) error {
@@ -588,10 +538,10 @@ func ValidateSecurityPolicy(cfg PodmanConfig) error {
 
 // PodmanSecret represents a secret to be mounted in a container
 type PodmanSecret struct {
-	Name   string // Secret name in Podman
-	Data   []byte // Secret data (will be base64 encoded)
-	Target string // Mount path inside container
-	EnvVar string // Environment variable name (optional, if set mounts as env var instead of file)
+	Name   string
+	Target string
+	EnvVar string
+	Data   []byte
 }
 
 // CreateSecret creates a Podman secret from the given data
diff --git a/internal/experiment/manager.go b/internal/experiment/manager.go
index b910861..44c460f 100644
--- a/internal/experiment/manager.go
+++ b/internal/experiment/manager.go
@@ -104,7 +104,7 @@ func (m *Manager) CreateExperiment(commitID string) error {
 	return nil
 }
 
-// WriteMetadata writes experiment metadata to meta.bin
+// WriteMetadata writes experiment metadata to meta.bin with crash safety (fsync)
 func (m *Manager) WriteMetadata(meta *Metadata) error {
 	path := m.GetMetadataPath(meta.CommitID)
 
@@ -134,7 +134,8 @@ func (m *Manager) WriteMetadata(meta *Metadata) error {
 	buf = append(buf, byte(len(meta.User)))
 	buf = append(buf, []byte(meta.User)...)
 
-	return os.WriteFile(path, buf, 0o600)
+	// SECURITY: Write with fsync for crash safety
+	return fileutil.WriteFileSafe(path, buf, 0o600)
 }
 
 // ReadMetadata reads experiment metadata from meta.bin
diff --git a/internal/jupyter/config.go b/internal/jupyter/config.go
index 080cea0..f40fa46 100644
--- a/internal/jupyter/config.go
+++ b/internal/jupyter/config.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 )
 
@@ -58,34 +59,34 @@ type ConfigManager struct {
 type JupyterConfig struct {
 	Version          string                 `json:"version"`
 	Environment      string                 `json:"environment"`
-	Service          ServiceConfig          `json:"service"`
+	Logging          LoggingConfig          `json:"logging"`
+	Resources        ResourceConfig         `json:"resources"`
+	Security         SecurityConfig         `json:"security"`
 	Workspace        WorkspaceConfig        `json:"workspace"`
 	Network          NetworkConfig          `json:"network"`
-	Security         SecurityConfig         `json:"security"`
-	Resources        ResourceConfig         `json:"resources"`
-	Health           HealthConfig           `json:"health"`
-	Logging          LoggingConfig          `json:"logging"`
 	DefaultSettings  DefaultSettingsConfig  `json:"default_settings"`
+	Service          ServiceConfig          `json:"service"`
 	AdvancedSettings AdvancedSettingsConfig `json:"advanced_settings"`
+	Health           HealthConfig           `json:"health"`
 }
 
 // WorkspaceConfig defines workspace configuration
 type WorkspaceConfig struct {
-	DefaultPath      string            `json:"default_path"`
-	AutoCreate       bool              `json:"auto_create"`
 	MountOptions     map[string]string `json:"mount_options"`
+	DefaultPath      string            `json:"default_path"`
+	MaxWorkspaceSize string            `json:"max_workspace_size"`
 	AllowedPaths     []string          `json:"allowed_paths"`
 	DeniedPaths      []string          `json:"denied_paths"`
-	MaxWorkspaceSize string            `json:"max_workspace_size"`
+	AutoCreate       bool              `json:"auto_create"`
 }
 
 // HealthConfig defines health monitoring configuration
 type HealthConfig struct {
-	Enabled        bool          `json:"enabled"`
 	CheckInterval  time.Duration `json:"check_interval"`
 	Timeout        time.Duration `json:"timeout"`
 	RetryAttempts  int           `json:"retry_attempts"`
 	MaxServiceAge  time.Duration `json:"max_service_age"`
+	Enabled        bool          `json:"enabled"`
 	AutoCleanup    bool          `json:"auto_cleanup"`
 	MetricsEnabled bool          `json:"metrics_enabled"`
 }
@@ -96,31 +97,31 @@ type LoggingConfig struct {
 	Format     string `json:"format"`
 	Output     string `json:"output"`
 	MaxSize    string `json:"max_size"`
-	MaxBackups int    `json:"max_backups"`
 	MaxAge     string `json:"max_age"`
+	MaxBackups int    `json:"max_backups"`
 }
 
 // DefaultSettingsConfig defines default settings for new services
 type DefaultSettingsConfig struct {
-	Image          string            `json:"default_image"`
-	Port           int               `json:"default_port"`
-	Workspace      string            `json:"default_workspace"`
 	Environment    map[string]string `json:"environment"`
+	Image          string            `json:"default_image"`
+	Workspace      string            `json:"default_workspace"`
+	ShutdownPolicy string            `json:"shutdown_policy"`
+	Port           int               `json:"default_port"`
+	StopTimeout    time.Duration     `json:"stop_timeout"`
 	AutoStart      bool              `json:"auto_start"`
 	AutoStop       bool              `json:"auto_stop"`
-	StopTimeout    time.Duration     `json:"stop_timeout"`
-	ShutdownPolicy string            `json:"shutdown_policy"`
 }
 
 // AdvancedSettingsConfig defines advanced configuration options
 type AdvancedSettingsConfig struct {
+	ExperimentalFeatures  []string      `json:"experimental_features"`
 	MaxConcurrentServices int           `json:"max_concurrent_services"`
 	ServiceTimeout        time.Duration `json:"service_timeout"`
 	StartupTimeout        time.Duration `json:"startup_timeout"`
 	GracefulShutdown      bool          `json:"graceful_shutdown"`
 	ForceCleanup          bool          `json:"force_cleanup"`
 	DebugMode             bool          `json:"debug_mode"`
-	ExperimentalFeatures  []string      `json:"experimental_features"`
 }
 
 // NewConfigManager creates a new configuration manager
@@ -190,8 +191,8 @@ func (cm *ConfigManager) SaveConfig() error {
 		return fmt.Errorf("failed to marshal config: %w", err)
 	}
 
-	// Write configuration file
-	if err := os.WriteFile(cm.configPath, data, 0600); err != nil {
+	// Write configuration file with crash safety (fsync)
+	if err := fileutil.WriteFileSafe(cm.configPath, data, 0600); err != nil {
 		return fmt.Errorf("failed to write config file: %w", err)
 	}
 
diff --git a/internal/jupyter/health_monitor.go b/internal/jupyter/health_monitor.go
index 291c424..3154728 100644
--- a/internal/jupyter/health_monitor.go
+++ b/internal/jupyter/health_monitor.go
@@ -20,33 +20,33 @@ const (
 type HealthMonitor struct {
 	logger        *logging.Logger
 	services      map[string]*JupyterService
-	servicesMutex sync.RWMutex
-	interval      time.Duration
 	client        *http.Client
+	interval      time.Duration
+	servicesMutex sync.RWMutex
 }
 
 // HealthStatus represents the health status of a service
 type HealthStatus struct {
+	LastCheck    time.Time              `json:"last_check"`
+	Metrics      map[string]interface{} `json:"metrics"`
 	ServiceID    string                 `json:"service_id"`
 	ServiceName  string                 `json:"service_name"`
 	Status       string                 `json:"status"`
-	LastCheck    time.Time              `json:"last_check"`
-	ResponseTime time.Duration          `json:"response_time"`
 	URL          string                 `json:"url"`
 	ContainerID  string                 `json:"container_id"`
 	Errors       []string               `json:"errors"`
-	Metrics      map[string]interface{} `json:"metrics"`
+	ResponseTime time.Duration          `json:"response_time"`
 }
 
 // HealthReport contains a comprehensive health report
 type HealthReport struct {
 	Timestamp     time.Time                `json:"timestamp"`
+	Services      map[string]*HealthStatus `json:"services"`
+	Summary       string                   `json:"summary"`
 	TotalServices int                      `json:"total_services"`
 	Healthy       int                      `json:"healthy"`
 	Unhealthy     int                      `json:"unhealthy"`
 	Unknown       int                      `json:"unknown"`
-	Services      map[string]*HealthStatus `json:"services"`
-	Summary       string                   `json:"summary"`
 }
 
 // NewHealthMonitor creates a new health monitor
diff --git a/internal/jupyter/network_manager.go b/internal/jupyter/network_manager.go
index f0ade2a..57296b3 100644
--- a/internal/jupyter/network_manager.go
+++ b/internal/jupyter/network_manager.go
@@ -18,9 +18,9 @@ type NetworkManager struct {
 
 // PortAllocator manages port allocation for services
 type PortAllocator struct {
+	usedPorts map[int]bool
 	startPort int
 	endPort   int
-	usedPorts map[int]bool
 }
 
 // NewNetworkManager creates a new network manager
@@ -313,10 +313,10 @@ func (nm *NetworkManager) GetNetworkStatus() *NetworkStatus {
 
 // NetworkStatus contains network status information
 type NetworkStatus struct {
+	PortRange      string `json:"port_range"`
 	TotalPorts     int    `json:"total_ports"`
 	AvailablePorts int    `json:"available_ports"`
 	UsedPorts      int    `json:"used_ports"`
-	PortRange      string `json:"port_range"`
 	Services       int    `json:"services"`
 }
 
diff --git a/internal/jupyter/package_manager.go b/internal/jupyter/package_manager.go
index 50bc41a..5ea1efa 100644
--- a/internal/jupyter/package_manager.go
+++ b/internal/jupyter/package_manager.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 )
 
@@ -18,22 +19,22 @@ const (
 // PackageManager manages package installations in Jupyter workspaces
 type PackageManager struct {
 	logger           *logging.Logger
-	trustedChannels  []string
 	allowedPackages  map[string]bool
-	blockedPackages  []string
 	workspacePath    string
 	packageCachePath string
+	trustedChannels  []string
+	blockedPackages  []string
 }
 
 // PackageConfig defines package management configuration
 type PackageConfig struct {
-	TrustedChannels []string        `json:"trusted_channels"`
 	AllowedPackages map[string]bool `json:"allowed_packages"`
+	TrustedChannels []string        `json:"trusted_channels"`
 	BlockedPackages []string        `json:"blocked_packages"`
-	RequireApproval bool            `json:"require_approval"`
-	AutoApproveSafe bool            `json:"auto_approve_safe"`
 	MaxPackages     int             `json:"max_packages"`
 	InstallTimeout  time.Duration   `json:"install_timeout"`
+	RequireApproval bool            `json:"require_approval"`
+	AutoApproveSafe bool            `json:"auto_approve_safe"`
 	AllowCondaForge bool            `json:"allow_conda_forge"`
 	AllowPyPI       bool            `json:"allow_pypi"`
 	AllowLocal      bool            `json:"allow_local"`
@@ -41,28 +42,28 @@ type PackageConfig struct {
 
 // PackageRequest represents a package installation request
 type PackageRequest struct {
+	Timestamp       time.Time `json:"timestamp"`
+	ApprovalTime    time.Time `json:"approval_time,omitempty"`
 	PackageName     string    `json:"package_name"`
 	Version         string    `json:"version,omitempty"`
 	Channel         string    `json:"channel,omitempty"`
 	RequestedBy     string    `json:"requested_by"`
 	WorkspacePath   string    `json:"workspace_path"`
-	Timestamp       time.Time `json:"timestamp"`
-	Status          string    `json:"status"` // pending, approved, rejected, installed, failed
+	Status          string    `json:"status"`
 	RejectionReason string    `json:"rejection_reason,omitempty"`
 	ApprovalUser    string    `json:"approval_user,omitempty"`
-	ApprovalTime    time.Time `json:"approval_time,omitempty"`
 }
 
 // PackageInfo contains information about an installed package
 type PackageInfo struct {
+	InstalledAt  time.Time         `json:"installed_at"`
+	Metadata     map[string]string `json:"metadata"`
 	Name         string            `json:"name"`
 	Version      string            `json:"version"`
 	Channel      string            `json:"channel"`
-	InstalledAt  time.Time         `json:"installed_at"`
 	InstalledBy  string            `json:"installed_by"`
 	Size         string            `json:"size"`
 	Dependencies []string          `json:"dependencies"`
-	Metadata     map[string]string `json:"metadata"`
 }
 
 // NewPackageManager creates a new package manager
@@ -353,14 +354,18 @@ func (pm *PackageManager) GetPackageRequest(requestID string) (*PackageRequest,
 	return pm.loadPackageRequest(requestID)
 }
 
-// savePackageRequest saves a package request to cache
+// savePackageRequest saves a package request to cache with crash safety (fsync)
 func (pm *PackageManager) savePackageRequest(req *PackageRequest) error {
 	requestFile := filepath.Join(pm.packageCachePath, fmt.Sprintf("request_%s.json", req.PackageName))
 	data, err := json.MarshalIndent(req, "", "  ")
 	if err != nil {
 		return err
 	}
-	return os.WriteFile(requestFile, data, 0600)
+	// SECURITY: Write with fsync for crash safety
+	if err := fileutil.WriteFileSafe(requestFile, data, 0600); err != nil {
+		return fmt.Errorf("failed to write package request: %w", err)
+	}
+	return nil
 }
 
 // loadPackageRequest loads a package request from cache
@@ -406,14 +411,18 @@ func (pm *PackageManager) loadAllPackageRequests() ([]*PackageRequest, error) {
 	return requests, nil
 }
 
-// savePackageInfo saves package information to cache
+// savePackageInfo saves package information to cache with crash safety (fsync)
 func (pm *PackageManager) savePackageInfo(info *PackageInfo) error {
 	infoFile := filepath.Join(pm.packageCachePath, fmt.Sprintf("installed_%s.json", info.Name))
 	data, err := json.MarshalIndent(info, "", "  ")
 	if err != nil {
 		return err
 	}
-	return os.WriteFile(infoFile, data, 0600)
+	// SECURITY: Write with fsync for crash safety
+	if err := fileutil.WriteFileSafe(infoFile, data, 0600); err != nil {
+		return fmt.Errorf("failed to write package info: %w", err)
+	}
+	return nil
 }
 
 // loadAllPackageInfo loads all installed package information
diff --git a/internal/jupyter/security_enhanced.go b/internal/jupyter/security_enhanced.go
index 0717eae..7a35935 100644
--- a/internal/jupyter/security_enhanced.go
+++ b/internal/jupyter/security_enhanced.go
@@ -23,57 +23,44 @@ type SecurityManager struct {
 
 // EnhancedSecurityConfig provides comprehensive security settings
 type EnhancedSecurityConfig struct {
-	// Network Security
-	AllowNetwork   bool     `json:"allow_network"`
-	AllowedHosts   []string `json:"allowed_hosts"`
-	BlockedHosts   []string `json:"blocked_hosts"`
-	EnableFirewall bool     `json:"enable_firewall"`
-
-	// Package Security
-	TrustedChannels []string        `json:"trusted_channels"`
-	BlockedPackages []string        `json:"blocked_packages"`
-	AllowedPackages map[string]bool `json:"allowed_packages"`
-	RequireApproval bool            `json:"require_approval"`
-	AutoApproveSafe bool            `json:"auto_approve_safe"`
-	MaxPackages     int             `json:"max_packages"`
-	InstallTimeout  time.Duration   `json:"install_timeout"`
-	AllowCondaForge bool            `json:"allow_conda_forge"`
-	AllowPyPI       bool            `json:"allow_pypi"`
-	AllowLocal      bool            `json:"allow_local"`
-
-	// Container Security
-	ReadOnlyRoot     bool     `json:"read_only_root"`
-	DropCapabilities []string `json:"drop_capabilities"`
-	RunAsNonRoot     bool     `json:"run_as_non_root"`
-	EnableSeccomp    bool     `json:"enable_seccomp"`
-	NoNewPrivileges  bool     `json:"no_new_privileges"`
-
-	// Authentication Security
-	EnableTokenAuth   bool          `json:"enable_token_auth"`
-	TokenLength       int           `json:"token_length"`
-	TokenExpiry       time.Duration `json:"token_expiry"`
-	RequireHTTPS      bool          `json:"require_https"`
-	SessionTimeout    time.Duration `json:"session_timeout"`
-	MaxFailedAttempts int           `json:"max_failed_attempts"`
-	LockoutDuration   time.Duration `json:"lockout_duration"`
-
-	// File System Security
-	AllowedPaths     []string `json:"allowed_paths"`
-	DeniedPaths      []string `json:"denied_paths"`
-	MaxWorkspaceSize string   `json:"max_workspace_size"`
-	AllowExecFrom    []string `json:"allow_exec_from"`
-	BlockExecFrom    []string `json:"block_exec_from"`
-
-	// Resource Security
-	MaxMemoryLimit string `json:"max_memory_limit"`
-	MaxCPULimit    string `json:"max_cpu_limit"`
-	MaxDiskUsage   string `json:"max_disk_usage"`
-	MaxProcesses   int    `json:"max_processes"`
-
-	// Logging & Monitoring
-	SecurityLogLevel string `json:"security_log_level"`
-	AuditEnabled     bool   `json:"audit_enabled"`
-	RealTimeAlerts   bool   `json:"real_time_alerts"`
+	AllowedPackages   map[string]bool `json:"allowed_packages"`
+	SecurityLogLevel  string          `json:"security_log_level"`
+	MaxDiskUsage      string          `json:"max_disk_usage"`
+	MaxWorkspaceSize  string          `json:"max_workspace_size"`
+	MaxMemoryLimit    string          `json:"max_memory_limit"`
+	MaxCPULimit       string          `json:"max_cpu_limit"`
+	BlockedPackages   []string        `json:"blocked_packages"`
+	AllowedHosts      []string        `json:"allowed_hosts"`
+	BlockedHosts      []string        `json:"blocked_hosts"`
+	TrustedChannels   []string        `json:"trusted_channels"`
+	BlockExecFrom     []string        `json:"block_exec_from"`
+	AllowExecFrom     []string        `json:"allow_exec_from"`
+	DropCapabilities  []string        `json:"drop_capabilities"`
+	DeniedPaths       []string        `json:"denied_paths"`
+	AllowedPaths      []string        `json:"allowed_paths"`
+	MaxPackages       int             `json:"max_packages"`
+	SessionTimeout    time.Duration   `json:"session_timeout"`
+	MaxProcesses      int             `json:"max_processes"`
+	InstallTimeout    time.Duration   `json:"install_timeout"`
+	LockoutDuration   time.Duration   `json:"lockout_duration"`
+	TokenLength       int             `json:"token_length"`
+	TokenExpiry       time.Duration   `json:"token_expiry"`
+	MaxFailedAttempts int             `json:"max_failed_attempts"`
+	ReadOnlyRoot      bool            `json:"read_only_root"`
+	NoNewPrivileges   bool            `json:"no_new_privileges"`
+	EnableTokenAuth   bool            `json:"enable_token_auth"`
+	RunAsNonRoot      bool            `json:"run_as_non_root"`
+	AllowLocal        bool            `json:"allow_local"`
+	AllowPyPI         bool            `json:"allow_pypi"`
+	AllowCondaForge   bool            `json:"allow_conda_forge"`
+	RequireHTTPS      bool            `json:"require_https"`
+	AllowNetwork      bool            `json:"allow_network"`
+	AutoApproveSafe   bool            `json:"auto_approve_safe"`
+	RequireApproval   bool            `json:"require_approval"`
+	EnableSeccomp     bool            `json:"enable_seccomp"`
+	EnableFirewall    bool            `json:"enable_firewall"`
+	AuditEnabled      bool            `json:"audit_enabled"`
+	RealTimeAlerts    bool            `json:"real_time_alerts"`
 }
 
 // SecurityEvent represents a security-related event
diff --git a/internal/jupyter/service_manager.go b/internal/jupyter/service_manager.go
index 5f1d321..aca1c07 100644
--- a/internal/jupyter/service_manager.go
+++ b/internal/jupyter/service_manager.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/jfraeys/fetch_ml/internal/config"
 	"github.com/jfraeys/fetch_ml/internal/container"
+	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 )
 
@@ -73,12 +74,12 @@ func trashBaseDir() string {
 }
 
 type trashInfo struct {
-	OriginalName string    `json:"original_name"`
 	DeletedAt    time.Time `json:"deleted_at"`
-	DeletedBy    string    `json:"deleted_by"`
-	SizeBytes    int64     `json:"size_bytes"`
 	PurgeAfter   time.Time `json:"purge_after"`
+	OriginalName string    `json:"original_name"`
+	DeletedBy    string    `json:"deleted_by"`
 	Reason       string    `json:"reason"`
+	SizeBytes    int64     `json:"size_bytes"`
 }
 
 func (sm *ServiceManager) moveWorkspaceToTrash(workspacePath string, originalName string) (string, *trashInfo, error) {
@@ -119,7 +120,11 @@ func (sm *ServiceManager) moveWorkspaceToTrash(workspacePath string, originalNam
 	}
 	b, err := json.MarshalIndent(info, "", "  ")
 	if err == nil {
-		_ = os.WriteFile(filepath.Join(dest, ".trashinfo"), b, 0o600)
+		// SECURITY: Write with fsync for crash safety
+		trashinfoPath := filepath.Join(dest, ".trashinfo")
+		if err := fileutil.WriteFileSafe(trashinfoPath, b, 0o600); err != nil {
+			sm.logger.Warn("failed to write trashinfo", "error", err)
+		}
 	}
 
 	return dest, info, nil
@@ -243,25 +248,25 @@ func startupBlockedPackages(installBlocked []string) []string {
 // ServiceConfig holds configuration for Jupyter services
 type ServiceConfig struct {
 	DefaultImage     string         `json:"default_image"`
-	DefaultPort      int            `json:"default_port"`
 	DefaultWorkspace string         `json:"default_workspace"`
-	MaxServices      int            `json:"max_services"`
 	DefaultResources ResourceConfig `json:"default_resources"`
 	SecuritySettings SecurityConfig `json:"security_settings"`
 	NetworkConfig    NetworkConfig  `json:"network_config"`
+	DefaultPort      int            `json:"default_port"`
+	MaxServices      int            `json:"max_services"`
 }
 
 // NetworkConfig defines network settings for Jupyter containers
 type NetworkConfig struct {
+	BindAddress    string `json:"bind_address"`
+	Token          string `json:"token"`
+	Password       string `json:"password"`
+	NetworkName    string `json:"network_name"`
 	HostPort       int    `json:"host_port"`
 	ContainerPort  int    `json:"container_port"`
-	BindAddress    string `json:"bind_address"`
 	EnableToken    bool   `json:"enable_token"`
-	Token          string `json:"token"`
 	EnablePassword bool   `json:"enable_password"`
-	Password       string `json:"password"`
 	AllowRemote    bool   `json:"allow_remote"`
-	NetworkName    string `json:"network_name"`
 }
 
 // ResourceConfig defines resource limits for Jupyter containers
@@ -273,16 +278,16 @@ type ResourceConfig struct {
 
 // SecurityConfig holds security settings for Jupyter services
 type SecurityConfig struct {
-	AllowNetwork     bool            `json:"allow_network"`
-	AllowedHosts     []string        `json:"allowed_hosts"`
+	AllowedPackages  map[string]bool `json:"allowed_packages"`
+	DropCapabilities []string        `json:"drop_capabilities"`
 	BlockedHosts     []string        `json:"blocked_hosts"`
-	EnableFirewall   bool            `json:"enable_firewall"`
 	TrustedChannels  []string        `json:"trusted_channels"`
 	BlockedPackages  []string        `json:"blocked_packages"`
-	AllowedPackages  map[string]bool `json:"allowed_packages"`
+	AllowedHosts     []string        `json:"allowed_hosts"`
+	EnableFirewall   bool            `json:"enable_firewall"`
 	RequireApproval  bool            `json:"require_approval"`
 	ReadOnlyRoot     bool            `json:"read_only_root"`
-	DropCapabilities []string        `json:"drop_capabilities"`
+	AllowNetwork     bool            `json:"allow_network"`
 	RunAsNonRoot     bool            `json:"run_as_non_root"`
 	EnableSeccomp    bool            `json:"enable_seccomp"`
 	NoNewPrivileges  bool            `json:"no_new_privileges"`
@@ -290,19 +295,19 @@ type SecurityConfig struct {
 
 // JupyterService represents a running Jupyter instance
 type JupyterService struct {
-	ID          string            `json:"id"`
-	Name        string            `json:"name"`
-	Status      string            `json:"status"`
-	ContainerID string            `json:"container_id"`
-	Port        int               `json:"port"`
-	Workspace   string            `json:"workspace"`
-	Image       string            `json:"image"`
-	URL         string            `json:"url"`
 	CreatedAt   time.Time         `json:"created_at"`
 	LastAccess  time.Time         `json:"last_access"`
-	Config      ServiceConfig     `json:"config"`
-	Environment map[string]string `json:"environment"`
 	Metadata    map[string]string `json:"metadata"`
+	Environment map[string]string `json:"environment"`
+	Image       string            `json:"image"`
+	ContainerID string            `json:"container_id"`
+	URL         string            `json:"url"`
+	Workspace   string            `json:"workspace"`
+	ID          string            `json:"id"`
+	Status      string            `json:"status"`
+	Name        string            `json:"name"`
+	Config      ServiceConfig     `json:"config"`
+	Port        int               `json:"port"`
 }
 
 type InstalledPackage struct {
@@ -313,15 +318,15 @@ type InstalledPackage struct {
 
 // StartRequest defines parameters for starting a Jupyter service
 type StartRequest struct {
+	Environment map[string]string `json:"environment"`
+	Metadata    map[string]string `json:"metadata"`
 	Name        string            `json:"name"`
 	Workspace   string            `json:"workspace"`
 	Image       string            `json:"image"`
-	Port        int               `json:"port"`
 	Resources   ResourceConfig    `json:"resources"`
 	Security    SecurityConfig    `json:"security"`
 	Network     NetworkConfig     `json:"network"`
-	Environment map[string]string `json:"environment"`
-	Metadata    map[string]string `json:"metadata"`
+	Port        int               `json:"port"`
 }
 
 // NewServiceManager creates a new Jupyter service manager
@@ -1182,7 +1187,11 @@ func (sm *ServiceManager) saveServices() error {
 		return err
 	}
 
-	return os.WriteFile(servicesFile, data, 0600)
+	// SECURITY: Write with fsync for crash safety
+	if err := fileutil.WriteFileSafe(servicesFile, data, 0600); err != nil {
+		return fmt.Errorf("failed to write services file: %w", err)
+	}
+	return nil
 }
 
 // LinkWorkspaceWithExperiment links a workspace with an experiment
diff --git a/internal/jupyter/workspace_manager.go b/internal/jupyter/workspace_manager.go
index c1eb310..c4fdfef 100644
--- a/internal/jupyter/workspace_manager.go
+++ b/internal/jupyter/workspace_manager.go
@@ -17,28 +17,28 @@ const (
 // WorkspaceManager handles workspace mounting and volume management for Jupyter services
 type WorkspaceManager struct {
 	logger   *logging.Logger
-	basePath string
 	mounts   map[string]*WorkspaceMount
+	basePath string
 }
 
 // WorkspaceMount represents a workspace mount configuration
 type WorkspaceMount struct {
+	Options       map[string]string `json:"options"`
 	ID            string            `json:"id"`
 	HostPath      string            `json:"host_path"`
 	ContainerPath string            `json:"container_path"`
-	MountType     string            `json:"mount_type"` // "bind", "volume", "tmpfs"
+	MountType     string            `json:"mount_type"`
+	Services      []string          `json:"services"`
 	ReadOnly      bool              `json:"read_only"`
-	Options       map[string]string `json:"options"`
-	Services      []string          `json:"services"` // Service IDs using this mount
 }
 
 // MountRequest defines parameters for creating a workspace mount
 type MountRequest struct {
+	Options       map[string]string `json:"options"`
 	HostPath      string            `json:"host_path"`
 	ContainerPath string            `json:"container_path"`
 	MountType     string            `json:"mount_type"`
 	ReadOnly      bool              `json:"read_only"`
-	Options       map[string]string `json:"options"`
 }
 
 // NewWorkspaceManager creates a new workspace manager
@@ -394,6 +394,7 @@ func (wm *WorkspaceManager) GetWorkspaceInfo(workspacePath string) (*WorkspaceIn
 
 // WorkspaceInfo contains information about a workspace
 type WorkspaceInfo struct {
+	Modified      time.Time `json:"modified"`
 	Path          string    `json:"path"`
 	FileCount     int64     `json:"file_count"`
 	PythonFiles   int64     `json:"python_files"`
@@ -402,7 +403,6 @@ type WorkspaceInfo struct {
 	ConfigFiles   int64     `json:"config_files"`
 	Size          int64     `json:"size"`
 	TotalSize     int64     `json:"total_size"`
-	Modified      time.Time `json:"modified"`
 }
 
 // Cleanup removes unused mounts
diff --git a/internal/jupyter/workspace_metadata.go b/internal/jupyter/workspace_metadata.go
index 09a817f..5309354 100644
--- a/internal/jupyter/workspace_metadata.go
+++ b/internal/jupyter/workspace_metadata.go
@@ -9,29 +9,30 @@ import (
 	"time"
 
 	"github.com/jfraeys/fetch_ml/internal/config"
+	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 )
 
 // WorkspaceMetadata tracks the relationship between Jupyter workspaces and experiments
 type WorkspaceMetadata struct {
+	LinkedAt       time.Time         `json:"linked_at"`
+	LastSync       time.Time         `json:"last_sync"`
+	AdditionalData map[string]string `json:"additional_data"`
 	WorkspacePath  string            `json:"workspace_path"`
 	ExperimentID   string            `json:"experiment_id"`
 	ServiceID      string            `json:"service_id,omitempty"`
-	LinkedAt       time.Time         `json:"linked_at"`
-	LastSync       time.Time         `json:"last_sync"`
-	SyncDirection  string            `json:"sync_direction"` // "pull", "push", "bidirectional"
-	AutoSync       bool              `json:"auto_sync"`
-	SyncInterval   time.Duration     `json:"sync_interval"`
+	SyncDirection  string            `json:"sync_direction"`
 	Tags           []string          `json:"tags"`
-	AdditionalData map[string]string `json:"additional_data"`
+	SyncInterval   time.Duration     `json:"sync_interval"`
+	AutoSync       bool              `json:"auto_sync"`
 }
 
 // WorkspaceMetadataManager manages workspace metadata
 type WorkspaceMetadataManager struct {
 	logger   *logging.Logger
-	metadata map[string]*WorkspaceMetadata // key: workspace path
-	mutex    sync.RWMutex
+	metadata map[string]*WorkspaceMetadata
 	dataFile string
+	mutex    sync.RWMutex
 }
 
 // NewWorkspaceMetadataManager creates a new workspace metadata manager
@@ -319,7 +320,7 @@ func (wmm *WorkspaceMetadataManager) loadMetadata() error {
 	return nil
 }
 
-// saveMetadata saves metadata to disk using PathRegistry
+// saveMetadata saves metadata to disk using PathRegistry with crash safety (fsync)
 func (wmm *WorkspaceMetadataManager) saveMetadata() error {
 	// Use PathRegistry for consistent directory creation
 	paths := config.FromEnv()
@@ -332,7 +333,8 @@ func (wmm *WorkspaceMetadataManager) saveMetadata() error {
 		return fmt.Errorf("failed to marshal metadata: %w", err)
 	}
 
-	if err := os.WriteFile(wmm.dataFile, data, 0644); err != nil {
+	// SECURITY: Write with fsync for crash safety
+	if err := fileutil.WriteFileSafe(wmm.dataFile, data, 0644); err != nil {
 		return fmt.Errorf("failed to write metadata file: %w", err)
 	}
 
@@ -364,7 +366,8 @@ func (wmm *WorkspaceMetadataManager) createWorkspaceMetadataFile(
 		return fmt.Errorf("failed to marshal workspace metadata: %w", err)
 	}
 
-	if err := os.WriteFile(workspaceMetaFile, data, 0600); err != nil {
+	// SECURITY: Write with fsync for crash safety
+	if err := fileutil.WriteFileSafe(workspaceMetaFile, data, 0600); err != nil {
 		return fmt.Errorf("failed to write workspace metadata file: %w", err)
 	}
 
diff --git a/internal/manifest/run_manifest.go b/internal/manifest/run_manifest.go
index 379c643..0b8583f 100644
--- a/internal/manifest/run_manifest.go
+++ b/internal/manifest/run_manifest.go
@@ -108,16 +108,16 @@ type Outcome struct {
 }
 
 type ArtifactFile struct {
+	Modified  time.Time `json:"modified"`
 	Path      string    `json:"path"`
 	SizeBytes int64     `json:"size_bytes"`
-	Modified  time.Time `json:"modified"`
 }
 
 type Artifacts struct {
 	DiscoveryTime  time.Time      `json:"discovery_time"`
 	Files          []ArtifactFile `json:"files,omitempty"`
+	Exclusions     []Exclusion    `json:"exclusions,omitempty"`
 	TotalSizeBytes int64          `json:"total_size_bytes,omitempty"`
-	Exclusions     []Exclusion    `json:"exclusions,omitempty"` // R.5: Scan exclusions recorded
 }
 
 // Exclusion records why a path was excluded from artifact scanning
@@ -129,69 +129,58 @@ type Exclusion struct {
 // ExecutionEnvironment captures the runtime environment for reproducibility.
 // This enables reconstruction and comparison of runs.
 type ExecutionEnvironment struct {
-	ConfigHash         string            `json:"config_hash"`                    // R.2: Resolved config hash
-	GPUCount           int               `json:"gpu_count"`                      // GPU count detected
-	GPUDetectionMethod string            `json:"gpu_detection_method,omitempty"` // R.3: "nvml", "env_override", etc.
-	GPUVendor          string            `json:"gpu_vendor,omitempty"`           // Configured GPU vendor
-	MaxWorkers         int               `json:"max_workers"`                    // Active resource limits
-	PodmanCPUs         string            `json:"podman_cpus,omitempty"`          // CPU limit
-	SandboxNetworkMode string            `json:"sandbox_network_mode"`           // Sandbox settings
-	SandboxSeccomp     string            `json:"sandbox_seccomp,omitempty"`      // Seccomp profile
-	SandboxNoNewPrivs  bool              `json:"sandbox_no_new_privs"`           // Security flags
-	ComplianceMode     string            `json:"compliance_mode,omitempty"`      // HIPAA mode
-	ManifestNonce      string            `json:"manifest_nonce,omitempty"`       // Unique manifest identifier
-	Metadata           map[string]string `json:"metadata,omitempty"`             // Additional env info
+	Metadata           map[string]string `json:"metadata,omitempty"`
+	ConfigHash         string            `json:"config_hash"`
+	GPUDetectionMethod string            `json:"gpu_detection_method,omitempty"`
+	GPUVendor          string            `json:"gpu_vendor,omitempty"`
+	PodmanCPUs         string            `json:"podman_cpus,omitempty"`
+	SandboxNetworkMode string            `json:"sandbox_network_mode"`
+	SandboxSeccomp     string            `json:"sandbox_seccomp,omitempty"`
+	ComplianceMode     string            `json:"compliance_mode,omitempty"`
+	ManifestNonce      string            `json:"manifest_nonce,omitempty"`
+	GPUCount           int               `json:"gpu_count"`
+	MaxWorkers         int               `json:"max_workers"`
+	SandboxNoNewPrivs  bool              `json:"sandbox_no_new_privs"`
 }
 
 // RunManifest is a best-effort, self-contained provenance record for a run.
 // It is written to <run_dir>/run_manifest.json.
 type RunManifest struct {
-	RunID     string    `json:"run_id"`
-	TaskID    string    `json:"task_id"`
-	JobName   string    `json:"job_name"`
-	CreatedAt time.Time `json:"created_at"`
-	StartedAt time.Time `json:"started_at,omitempty"`
-	EndedAt   time.Time `json:"ended_at,omitempty"`
-
-	Annotations []Annotation `json:"annotations,omitempty"`
-	Narrative   *Narrative   `json:"narrative,omitempty"`
-	Outcome     *Outcome     `json:"outcome,omitempty"`
-	Artifacts   *Artifacts   `json:"artifacts,omitempty"`
-
-	CommitID              string `json:"commit_id,omitempty"`
-	ExperimentManifestSHA string `json:"experiment_manifest_sha,omitempty"`
-	DepsManifestName      string `json:"deps_manifest_name,omitempty"`
-	DepsManifestSHA       string `json:"deps_manifest_sha,omitempty"`
-	TrainScriptPath       string `json:"train_script_path,omitempty"`
-
-	WorkerVersion string `json:"worker_version,omitempty"`
-	PodmanImage   string `json:"podman_image,omitempty"`
-	ImageDigest   string `json:"image_digest,omitempty"`
-
-	SnapshotID     string `json:"snapshot_id,omitempty"`
-	SnapshotSHA256 string `json:"snapshot_sha256,omitempty"`
-
-	Command  string `json:"command,omitempty"`
-	Args     string `json:"args,omitempty"`
-	ExitCode *int   `json:"exit_code,omitempty"`
-	Error    string `json:"error,omitempty"`
-
-	StagingDurationMS   int64 `json:"staging_duration_ms,omitempty"`
-	ExecutionDurationMS int64 `json:"execution_duration_ms,omitempty"`
-	FinalizeDurationMS  int64 `json:"finalize_duration_ms,omitempty"`
-	TotalDurationMS     int64 `json:"total_duration_ms,omitempty"`
-
-	GPUDevices []string          `json:"gpu_devices,omitempty"`
-	WorkerHost string            `json:"worker_host,omitempty"`
-	Metadata   map[string]string `json:"metadata,omitempty"`
-
-	// Environment captures execution environment for reproducibility (R.1)
-	Environment *ExecutionEnvironment `json:"environment,omitempty"`
-
-	// Signature fields for tamper detection
-	Signature   string `json:"signature,omitempty"`
-	SignerKeyID string `json:"signer_key_id,omitempty"`
-	SigAlg      string `json:"sig_alg,omitempty"`
+	CreatedAt             time.Time             `json:"created_at"`
+	StartedAt             time.Time             `json:"started_at,omitempty"`
+	EndedAt               time.Time             `json:"ended_at,omitempty"`
+	Artifacts             *Artifacts            `json:"artifacts,omitempty"`
+	Environment           *ExecutionEnvironment `json:"environment,omitempty"`
+	Metadata              map[string]string     `json:"metadata,omitempty"`
+	ExitCode              *int                  `json:"exit_code,omitempty"`
+	Narrative             *Narrative            `json:"narrative,omitempty"`
+	Outcome               *Outcome              `json:"outcome,omitempty"`
+	PodmanImage           string                `json:"podman_image,omitempty"`
+	SnapshotID            string                `json:"snapshot_id,omitempty"`
+	ExperimentManifestSHA string                `json:"experiment_manifest_sha,omitempty"`
+	DepsManifestName      string                `json:"deps_manifest_name,omitempty"`
+	DepsManifestSHA       string                `json:"deps_manifest_sha,omitempty"`
+	TrainScriptPath       string                `json:"train_script_path,omitempty"`
+	WorkerVersion         string                `json:"worker_version,omitempty"`
+	RunID                 string                `json:"run_id"`
+	ImageDigest           string                `json:"image_digest,omitempty"`
+	WorkerHost            string                `json:"worker_host,omitempty"`
+	SnapshotSHA256        string                `json:"snapshot_sha256,omitempty"`
+	Command               string                `json:"command,omitempty"`
+	Args                  string                `json:"args,omitempty"`
+	CommitID              string                `json:"commit_id,omitempty"`
+	Error                 string                `json:"error,omitempty"`
+	SigAlg                string                `json:"sig_alg,omitempty"`
+	SignerKeyID           string                `json:"signer_key_id,omitempty"`
+	Signature             string                `json:"signature,omitempty"`
+	TaskID                string                `json:"task_id"`
+	JobName               string                `json:"job_name"`
+	Annotations           []Annotation          `json:"annotations,omitempty"`
+	GPUDevices            []string              `json:"gpu_devices,omitempty"`
+	TotalDurationMS       int64                 `json:"total_duration_ms,omitempty"`
+	FinalizeDurationMS    int64                 `json:"finalize_duration_ms,omitempty"`
+	ExecutionDurationMS   int64                 `json:"execution_duration_ms,omitempty"`
+	StagingDurationMS     int64                 `json:"staging_duration_ms,omitempty"`
 }
 
 func NewRunManifest(runID, taskID, jobName string, createdAt time.Time) *RunManifest {
@@ -402,3 +391,75 @@ func (m *RunManifest) ValidateStrict() error {
 	v := NewValidator()
 	return v.ValidateStrict(m)
 }
+
+// ValidateProvenance checks if the manifest has complete provenance information.
+// When ProvenanceBestEffort is false (default), this must pass before writing.
+// Returns an error describing what's missing if validation fails.
+func (m *RunManifest) ValidateProvenance() error {
+	if m == nil {
+		return fmt.Errorf("manifest is nil")
+	}
+
+	var missing []string
+
+	// Check Environment is present
+	if m.Environment == nil {
+		missing = append(missing, "environment metadata")
+	} else {
+		// Check ConfigHash - critical for reproducibility
+		if m.Environment.ConfigHash == "" {
+			missing = append(missing, "config_hash")
+		}
+
+		// Check GPU detection method
+		if m.Environment.GPUDetectionMethod == "" {
+			missing = append(missing, "gpu_detection_method")
+		}
+
+		// Check Sandbox configuration
+		if m.Environment.SandboxNetworkMode == "" {
+			missing = append(missing, "sandbox_network_mode")
+		}
+	}
+
+	// Check Artifacts are present (though they may be empty for new runs)
+	if m.Artifacts == nil {
+		missing = append(missing, "artifacts metadata")
+	}
+
+	// Check basic run identification
+	if m.RunID == "" {
+		missing = append(missing, "run_id")
+	}
+	if m.TaskID == "" {
+		missing = append(missing, "task_id")
+	}
+
+	if len(missing) > 0 {
+		return fmt.Errorf("incomplete provenance record: missing %v", missing)
+	}
+
+	return nil
+}
+
+// CanWrite checks if the manifest can be written given the provenance requirements.
+// When ProvenanceBestEffort is false (default), requires complete environment capture.
+// When true, allows partial manifests with warnings.
+func (m *RunManifest) CanWrite(provenanceBestEffort bool) error {
+	// Always validate basic structure
+	if err := m.Validate(); err != nil {
+		return fmt.Errorf("manifest validation failed: %w", err)
+	}
+
+	// If best-effort is enabled, allow partial manifests
+	if provenanceBestEffort {
+		return nil
+	}
+
+	// Otherwise, require complete provenance (fail-closed default)
+	if err := m.ValidateProvenance(); err != nil {
+		return fmt.Errorf("provenance validation failed (set provenance_best_effort: true to allow partial manifests): %w", err)
+	}
+
+	return nil
+}
diff --git a/internal/manifest/schema_version.go b/internal/manifest/schema_version.go
index ca743e4..c77023c 100644
--- a/internal/manifest/schema_version.go
+++ b/internal/manifest/schema_version.go
@@ -8,8 +8,8 @@ const SchemaVersion = "1.0.0"
 type SchemaVersionInfo struct {
 	Version     string
 	Date        string
-	Breaking    bool
 	Description string
+	Breaking    bool
 }
 
 // SchemaChangeHistory documents all schema versions