fetch_ml/internal/worker/process/isolation_unix.go
Jeremie Fraeys 0b5e99f720
refactor(scheduler,worker): improve service management and GPU detection
Scheduler enhancements:
- auth.go: Group membership validation in authentication
- hub.go: Task distribution with group affinity
- port_allocator.go: Dynamic port allocation with conflict resolution
- scheduler_conn.go: Connection pooling and retry logic
- service_manager.go: Lifecycle management for scheduler services
- service_templates.go: Template-based service configuration
- state.go: Persistent state management with recovery

Worker improvements:
- config.go: Extended configuration for task visibility rules
- execution/setup.go: Sandboxed execution environment setup
- executor/container.go: Container runtime integration
- executor/runner.go: Task runner with visibility enforcement
- gpu_detector.go: Robust GPU detection (NVIDIA, AMD, Apple Silicon, CPU fallback)
- integrity/validate.go: Data integrity validation
- lifecycle/runloop.go: Improved runloop with graceful shutdown
- lifecycle/service_manager.go: Service lifecycle coordination
- process/isolation.go + isolation_unix.go: Process isolation with namespaces/cgroups
- tenant/manager.go: Multi-tenant resource isolation
- tenant/middleware.go: Tenant context propagation
- worker.go: Core worker with group-scoped task execution
2026-03-08 13:03:15 -04:00

130 lines
3.7 KiB
Go

//go:build !windows
// +build !windows
package process
import (
"fmt"
"syscall"
)
// applyResourceLimits sets Unix/Linux resource limits
func applyResourceLimits(cfg IsolationConfig) error {
// Apply file descriptor limits (RLIMIT_NOFILE for FD exhaustion protection)
if cfg.MaxOpenFiles > 0 {
// Validate before conversion to prevent overflow
if cfg.MaxOpenFiles < 0 {
return fmt.Errorf("max open files cannot be negative: %d", cfg.MaxOpenFiles)
}
if err := setResourceLimit(syscall.RLIMIT_NOFILE, uint64(cfg.MaxOpenFiles)); err != nil {
return fmt.Errorf("failed to set max open files limit: %w", err)
}
}
// Apply process limits if available (Linux only)
if cfg.MaxProcesses > 0 {
if err := setProcessLimit(cfg.MaxProcesses); err != nil {
// Log but don't fail - this is defense in depth
return fmt.Errorf("failed to set max processes limit: %w", err)
}
}
return nil
}
// setResourceLimit sets a soft and hard rlimit for the current process
func setResourceLimit(resource int, limit uint64) error {
rl := &syscall.Rlimit{
Cur: limit,
Max: limit,
}
return syscall.Setrlimit(resource, rl)
}
// setProcessLimit sets RLIMIT_NPROC on Linux, no-op on other Unix
func setProcessLimit(maxProcs int) error {
// Validate before conversion to prevent overflow
if maxProcs < 0 {
return fmt.Errorf("max processes cannot be negative: %d", maxProcs)
}
// Try to set RLIMIT_NPROC - only available on Linux
// On Darwin/macOS, this returns ENOTSUP
const RLIMIT_NPROC = 7 // Linux value
rl := &syscall.Rlimit{
Cur: uint64(maxProcs),
Max: uint64(maxProcs),
}
err := syscall.Setrlimit(RLIMIT_NPROC, rl)
if err != nil {
// ENOTSUP means not supported (macOS)
if err == syscall.ENOTSUP || err == syscall.EINVAL {
return nil // Silently ignore on platforms that don't support it
}
return err
}
return nil
}
// disableSwap attempts to lock memory to prevent swapping (mlockall)
// This is best-effort and requires CAP_IPC_LOCK capability
func disableSwap() error {
// MCL_CURRENT: lock all current pages
// MCL_FUTURE: lock all future pages
const MCL_CURRENT = 0x1
const MCL_FUTURE = 0x2
// Note: mlockall requires CAP_IPC_LOCK capability
// If this fails, we log but continue (defense in depth)
return syscall.Mlockall(MCL_CURRENT | MCL_FUTURE)
}
// GetCurrentLimits returns the current rlimit values for diagnostics
func GetCurrentLimits() (map[string]uint64, error) {
limits := make(map[string]uint64)
// Get NOFILE limit (available on all platforms)
var nofile syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &nofile); err != nil {
return nil, fmt.Errorf("failed to get NOFILE limit: %w", err)
}
limits["NOFILE_soft"] = nofile.Cur
limits["NOFILE_hard"] = nofile.Max
// Get platform-specific limits
getPlatformLimits(limits)
return limits, nil
}
// getPlatformLimits adds platform-specific limits to the map
func getPlatformLimits(limits map[string]uint64) {
// Get virtual memory limit (AS)
var as syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_AS, &as); err == nil {
limits["AS_soft"] = as.Cur
limits["AS_hard"] = as.Max
}
// Get data segment limit
var data syscall.Rlimit
if err := syscall.Getrlimit(syscall.RLIMIT_DATA, &data); err == nil {
limits["DATA_soft"] = data.Cur
limits["DATA_hard"] = data.Max
}
// Try to get RLIMIT_NPROC (Linux only)
const RLIMIT_NPROC = 7
var nproc syscall.Rlimit
if err := syscall.Getrlimit(RLIMIT_NPROC, &nproc); err == nil {
limits["NPROC_soft"] = nproc.Cur
limits["NPROC_hard"] = nproc.Max
}
// Try to get RLIMIT_RSS (Linux only)
const RLIMIT_RSS = 5
var rss syscall.Rlimit
if err := syscall.Getrlimit(RLIMIT_RSS, &rss); err == nil {
limits["RSS_soft"] = rss.Cur
limits["RSS_hard"] = rss.Max
}
}