fetch_ml/internal/container/podman.go
Jeremie Fraeys 92aab06d76
feat(security): implement comprehensive security hardening phases 1-5,7
Implements defense-in-depth security for HIPAA and multi-tenant requirements:

**Phase 1 - File Ingestion Security:**
- SecurePathValidator with symlink resolution and path boundary enforcement
  in internal/fileutil/secure.go
- Magic bytes validation for ML artifacts (safetensors, GGUF, HDF5, numpy)
  in internal/fileutil/filetype.go
- Dangerous extension blocking (.pt, .pkl, .exe, .sh, .zip)
- Upload limits (10GB size, 100MB/s rate, 10 uploads/min)

**Phase 2 - Sandbox Hardening:**
- ApplySecurityDefaults() with secure-by-default principle
  - network_mode: none, read_only_root: true, no_new_privileges: true
  - drop_all_caps: true, user_ns: true, run_as_uid/gid: 1000
- PodmanSecurityConfig and BuildSecurityArgs() in internal/container/podman.go
- BuildPodmanCommand now accepts full security configuration
- Container executor passes SandboxConfig to Podman command builder
- configs/seccomp/default-hardened.json blocks dangerous syscalls
  (ptrace, mount, reboot, kexec_load, open_by_handle_at)

**Phase 3 - Secrets Management:**
- expandSecrets() for environment variable expansion using ${VAR} syntax
- validateNoPlaintextSecrets() with entropy-based detection
- Pattern matching for AWS, GitHub, GitLab, OpenAI, Stripe tokens
- Shannon entropy calculation (>4 bits/char triggers detection)
- Secrets expanded during LoadConfig() before validation

**Phase 5 - HIPAA Audit Logging:**
- Tamper-evident chain hashing with SHA-256 in internal/audit/audit.go
- Event struct extended with PrevHash, EventHash, SequenceNum
- File access event types: EventFileRead, EventFileWrite, EventFileDelete
- LogFileAccess() helper for HIPAA compliance
- VerifyChain() function for tamper detection

**Supporting Changes:**
- Add DeleteJob() and DeleteJobsByPrefix() to storage package
- Integrate SecurePathValidator in artifact scanning
2026-02-23 18:00:33 -05:00

707 lines
20 KiB
Go

// Package container provides Podman container management utilities.
package container
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"github.com/jfraeys/fetch_ml/internal/config"
"github.com/jfraeys/fetch_ml/internal/logging"
)
// PodmanManager manages Podman containers
type PodmanManager struct {
logger *logging.Logger
}
// NewPodmanManager creates a new Podman manager
func NewPodmanManager(logger *logging.Logger) (*PodmanManager, error) {
return &PodmanManager{
logger: logger,
}, nil
}
// ContainerConfig holds configuration for starting a container
type ContainerConfig struct {
Name string `json:"name"`
Image string `json:"image"`
Command []string `json:"command"`
Env map[string]string `json:"env"`
Secrets []PodmanSecret `json:"secrets"` // Sensitive data as Podman secrets
Volumes map[string]string `json:"volumes"`
Ports map[int]int `json:"ports"`
SecurityOpts []string `json:"security_opts"`
Resources ResourceConfig `json:"resources"`
Network NetworkConfig `json:"network"`
}
// ResourceConfig defines resource limits for containers
type ResourceConfig struct {
MemoryLimit string `json:"memory_limit"`
CPULimit string `json:"cpu_limit"`
GPUDevices []string `json:"gpu_devices"`
AppleGPU bool `json:"apple_gpu"`
}
// NetworkConfig defines network settings for containers
type NetworkConfig struct {
AllowNetwork bool `json:"allow_network"`
}
func podmanCgroupsMode() string {
return strings.TrimSpace(os.Getenv("FETCHML_PODMAN_CGROUPS"))
}
func BuildRunArgs(config *ContainerConfig) []string {
args := []string{"run", "-d"}
if podmanCgroupsMode() == "disabled" {
args = append(args, "--cgroups=disabled")
}
// Add name
if config.Name != "" {
args = append(args, "--name", config.Name)
}
// Add security options
for _, opt := range config.SecurityOpts {
args = append(args, "--security-opt", opt)
}
// Add resource limits
if config.Resources.MemoryLimit != "" {
args = append(args, "--memory", config.Resources.MemoryLimit)
}
if config.Resources.CPULimit != "" {
args = append(args, "--cpus", config.Resources.CPULimit)
}
if config.Resources.AppleGPU {
args = append(args, "--device", "/dev/metal")
args = append(args, "--device", "/dev/mps")
}
for _, device := range config.Resources.GPUDevices {
args = append(args, "--device", device)
}
// Add volumes
for hostPath, containerPath := range config.Volumes {
mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
args = append(args, "-v", mount)
}
// Add ports
for hostPort, containerPort := range config.Ports {
portMapping := fmt.Sprintf("%d:%d", hostPort, containerPort)
args = append(args, "-p", portMapping)
}
// Add environment variables
for key, value := range config.Env {
args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Add image and command
args = append(args, config.Image)
args = append(args, config.Command...)
return args
}
func ParseContainerID(output string) (string, error) {
out := strings.TrimSpace(output)
if out == "" {
return "", fmt.Errorf("no container ID returned")
}
lines := strings.Split(out, "\n")
for i := len(lines) - 1; i >= 0; i-- {
line := strings.TrimSpace(lines[i])
if line == "" {
continue
}
return line, nil
}
return "", fmt.Errorf("no container ID returned")
}
// StartContainer starts a new container with secret support
func (pm *PodmanManager) StartContainer(
ctx context.Context,
config *ContainerConfig,
) (string, error) {
// Create Podman secrets for sensitive data
for _, secret := range config.Secrets {
if err := pm.CreateSecret(ctx, secret.Name, secret.Data); err != nil {
// Clean up any secrets we already created
for _, s := range config.Secrets {
if s.Name == secret.Name {
break
}
_ = pm.DeleteSecret(ctx, s.Name)
}
return "", fmt.Errorf("failed to create secret %s: %w", secret.Name, err)
}
}
// Build run args including secrets
args := BuildRunArgs(config)
// Add secret mount arguments
secretArgs := BuildSecretArgs(config.Secrets)
// Insert secrets after "run -d" and before other args
if len(secretArgs) > 0 {
// args[0] = "run", args[1] = "-d"
newArgs := append([]string{args[0], args[1]}, secretArgs...)
newArgs = append(newArgs, args[2:]...)
args = newArgs
}
// Execute command
cmd := exec.CommandContext(ctx, "podman", args...)
output, err := cmd.CombinedOutput()
if err != nil {
// Clean up secrets on failure
for _, secret := range config.Secrets {
_ = pm.DeleteSecret(ctx, secret.Name)
}
return "", fmt.Errorf("failed to start container: %w, output: %s", err, string(output))
}
containerID, err := ParseContainerID(string(output))
if err != nil {
// Clean up secrets on failure
for _, secret := range config.Secrets {
_ = pm.DeleteSecret(ctx, secret.Name)
}
return "", err
}
pm.logger.Info("container started",
"container_id", containerID,
"name", config.Name,
"secrets", len(config.Secrets))
return containerID, nil
}
// StopContainer stops a container
func (pm *PodmanManager) StopContainer(ctx context.Context, containerID string) error {
cmd := exec.CommandContext(ctx, "podman", "stop", containerID)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to stop container: %w, output: %s", err, string(output))
}
pm.logger.Info("container stopped", "container_id", containerID)
return nil
}
// GetContainerStateStatus returns the container's lifecycle state from `podman inspect`.
// Typical values: running, exited, created, paused.
func (pm *PodmanManager) GetContainerStateStatus(
ctx context.Context,
containerID string,
) (string, error) {
// Validate containerID to prevent injection
if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
return "", fmt.Errorf("invalid container ID: %s", containerID)
}
cmd := exec.CommandContext(ctx, "podman", "inspect", "--format", "{{.State.Status}}", containerID) //nolint:gosec
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to inspect container: %w, output: %s", err, string(output))
}
status := strings.TrimSpace(string(output))
if status == "" {
return "unknown", nil
}
return status, nil
}
// RemoveContainer removes a container
func (pm *PodmanManager) RemoveContainer(ctx context.Context, containerID string) error {
cmd := exec.CommandContext(ctx, "podman", "rm", containerID)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to remove container: %w, output: %s", err, string(output))
}
pm.logger.Info("container removed", "container_id", containerID)
return nil
}
// GetContainerStatus gets the status of a container
func (pm *PodmanManager) GetContainerStatus(
ctx context.Context,
containerID string,
) (string, error) {
// Validate containerID to prevent injection
if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
return "", fmt.Errorf("invalid container ID: %s", containerID)
}
cmd := exec.CommandContext(ctx, "podman", "ps", "--filter", "id="+containerID,
"--format", "{{.Status}}") //nolint:gosec
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
}
status := strings.TrimSpace(string(output))
if status == "" {
// Container might be stopped, check all containers
cmd = exec.CommandContext(
ctx,
"podman",
"ps",
"-a",
"--filter",
"id="+containerID,
"--format",
"{{.Status}}",
) //nolint:gosec
output, err = cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
}
status = strings.TrimSpace(string(output))
if status == "" {
return "unknown", nil
}
}
return status, nil
}
// ExecContainer executes a command inside a running container and returns the output
func (pm *PodmanManager) ExecContainer(ctx context.Context, containerID string, command []string) (string, error) {
// Validate containerID to prevent injection
if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
return "", fmt.Errorf("invalid container ID: %s", containerID)
}
// Validate command to prevent injection
for _, arg := range command {
if strings.ContainsAny(arg, "&;|<>$`\"'") {
return "", fmt.Errorf("invalid command argument: %s", arg)
}
}
// Build podman exec command
args := []string{"exec", containerID}
args = append(args, command...)
cmd := exec.CommandContext(ctx, "podman", args...) //nolint:gosec
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("failed to execute command in container: %w, output: %s", err, string(output))
}
return string(output), nil
}
// PodmanConfig holds configuration for Podman container execution
type PodmanConfig struct {
Image string
Workspace string
Results string
ContainerWorkspace string
ContainerResults string
AppleGPU bool
GPUDevices []string
Env map[string]string
Volumes map[string]string
Memory string
CPUs string
Privileged bool // Security: must be false
Network string // Security: must not be "host"
ReadOnlyMounts bool // Security: true for dataset mounts
}
// PodmanResourceOverrides converts per-task resource requests into Podman-compatible
// `--cpus` and `--memory` flag values.
//
// cpu and memoryGB are treated as optional; values <= 0 return empty overrides.
func PodmanResourceOverrides(cpu int, memoryGB int) (cpus string, memory string) {
if cpu > 0 {
cpus = strconv.Itoa(cpu)
}
if memoryGB > 0 {
memory = fmt.Sprintf("%dg", memoryGB)
}
return cpus, memory
}
// PodmanSecurityConfig holds security configuration for Podman containers
type PodmanSecurityConfig struct {
NoNewPrivileges bool
DropAllCaps bool
AllowedCaps []string
UserNS bool
RunAsUID int
RunAsGID int
SeccompProfile string
ReadOnlyRoot bool
NetworkMode string
}
// BuildSecurityArgs builds security-related podman arguments from PodmanSecurityConfig
func BuildSecurityArgs(sandbox PodmanSecurityConfig) []string {
args := []string{}
// No new privileges
if sandbox.NoNewPrivileges {
args = append(args, "--security-opt", "no-new-privileges:true")
}
// Capability dropping
if sandbox.DropAllCaps {
args = append(args, "--cap-drop=all")
for _, cap := range sandbox.AllowedCaps {
if cap != "" {
args = append(args, "--cap-add="+cap)
}
}
}
// User namespace mapping
if sandbox.UserNS && sandbox.RunAsUID > 0 && sandbox.RunAsGID > 0 {
// Map container root to specified UID/GID on host
args = append(args, "--userns", "keep-id")
args = append(args, "--user", fmt.Sprintf("%d:%d", sandbox.RunAsUID, sandbox.RunAsGID))
}
// Seccomp profile
if sandbox.SeccompProfile != "" && sandbox.SeccompProfile != "unconfined" {
profilePath := GetSeccompProfilePath(sandbox.SeccompProfile)
if profilePath != "" {
args = append(args, "--security-opt", fmt.Sprintf("seccomp=%s", profilePath))
}
}
// Read-only root filesystem
if sandbox.ReadOnlyRoot {
args = append(args, "--read-only")
}
// Network mode (default: none)
networkMode := sandbox.NetworkMode
if networkMode == "" {
networkMode = "none"
}
args = append(args, "--network", networkMode)
return args
}
// GetSeccompProfilePath returns the filesystem path for a named seccomp profile
func GetSeccompProfilePath(profileName string) string {
// Check standard locations
searchPaths := []string{
filepath.Join("configs", "seccomp", profileName+".json"),
filepath.Join("/etc", "fetchml", "seccomp", profileName+".json"),
filepath.Join("/usr", "share", "fetchml", "seccomp", profileName+".json"),
}
for _, path := range searchPaths {
if _, err := os.Stat(path); err == nil {
return path
}
}
// If profileName is already a path, return it
if filepath.IsAbs(profileName) {
return profileName
}
return ""
}
// BuildPodmanCommand builds a Podman command for executing ML experiments with security options
func BuildPodmanCommand(
ctx context.Context,
cfg PodmanConfig,
sandbox PodmanSecurityConfig,
scriptPath, depsPath string,
extraArgs []string,
) *exec.Cmd {
args := []string{"run", "--rm"}
// Add security options from sandbox config
securityArgs := BuildSecurityArgs(sandbox)
args = append(args, securityArgs...)
// Resource limits
if cfg.Memory != "" {
args = append(args, "--memory", cfg.Memory)
} else {
args = append(args, "--memory", config.DefaultPodmanMemory)
}
if cfg.CPUs != "" {
args = append(args, "--cpus", cfg.CPUs)
} else {
args = append(args, "--cpus", config.DefaultPodmanCPUs)
}
// Mount workspace
workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
args = append(args, "-v", workspaceMount)
// Mount results
resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
args = append(args, "-v", resultsMount)
// Mount additional volumes
for hostPath, containerPath := range cfg.Volumes {
mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
args = append(args, "-v", mount)
}
// Use injected GPU device paths for Apple GPU or custom configurations
for _, device := range cfg.GPUDevices {
args = append(args, "--device", device)
}
// Add environment variables
for key, value := range cfg.Env {
args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Image and command
args = append(args, cfg.Image,
"--workspace", cfg.ContainerWorkspace,
"--deps", depsPath,
"--script", scriptPath,
)
// Add extra arguments via --args flag
if len(extraArgs) > 0 {
args = append(args, "--args")
args = append(args, extraArgs...)
}
return exec.CommandContext(ctx, "podman", args...)
}
// BuildPodmanCommandLegacy builds a Podman command using legacy security settings
// Deprecated: Use BuildPodmanCommand with SandboxConfig instead
func BuildPodmanCommandLegacy(
ctx context.Context,
cfg PodmanConfig,
scriptPath, depsPath string,
extraArgs []string,
) *exec.Cmd {
args := []string{
"run", "--rm",
"--security-opt", "no-new-privileges",
"--cap-drop", "ALL",
}
// Add network mode if specified
if cfg.Network != "" {
args = append(args, "--network", cfg.Network)
}
// Add read-only root filesystem
if cfg.ReadOnlyMounts {
args = append(args, "--read-only")
}
if cfg.Memory != "" {
args = append(args, "--memory", cfg.Memory)
} else {
args = append(args, "--memory", config.DefaultPodmanMemory)
}
if cfg.CPUs != "" {
args = append(args, "--cpus", cfg.CPUs)
} else {
args = append(args, "--cpus", config.DefaultPodmanCPUs)
}
args = append(args, "--userns", "keep-id")
// Mount workspace
workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
args = append(args, "-v", workspaceMount)
// Mount results
resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
args = append(args, "-v", resultsMount)
// Mount additional volumes
for hostPath, containerPath := range cfg.Volumes {
mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
args = append(args, "-v", mount)
}
// Use injected GPU device paths for Apple GPU or custom configurations
for _, device := range cfg.GPUDevices {
args = append(args, "--device", device)
}
// Add environment variables
for key, value := range cfg.Env {
args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
}
// Image and command
args = append(args, cfg.Image,
"--workspace", cfg.ContainerWorkspace,
"--deps", depsPath,
"--script", scriptPath,
)
// Add extra arguments via --args flag
if len(extraArgs) > 0 {
args = append(args, "--args")
args = append(args, extraArgs...)
}
return exec.CommandContext(ctx, "podman", args...)
}
// ValidateSecurityPolicy validates that the container configuration meets security requirements.
// Returns an error if the configuration violates security policies.
func ValidateSecurityPolicy(cfg PodmanConfig) error {
if cfg.Privileged {
return fmt.Errorf("privileged containers are not allowed: %w", ErrSecurityViolation)
}
if cfg.Network == "host" {
return fmt.Errorf("host network mode is not allowed: %w", ErrSecurityViolation)
}
// Validate volume mounts are read-only where required
if !cfg.ReadOnlyMounts {
// This is a warning-level issue, not a hard error
// but we document it for audit purposes
}
return nil
}
// PodmanSecret represents a secret to be mounted in a container
type PodmanSecret struct {
Name string // Secret name in Podman
Data []byte // Secret data (will be base64 encoded)
Target string // Mount path inside container
EnvVar string // Environment variable name (optional, if set mounts as env var instead of file)
}
// CreateSecret creates a Podman secret from the given data
func (pm *PodmanManager) CreateSecret(ctx context.Context, name string, data []byte) error {
// Create secret via podman command
// podman secret create name - << data
cmd := exec.CommandContext(ctx, "podman", "secret", "create", name, "-")
cmd.Stdin = strings.NewReader(string(data))
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to create secret %s: %w, output: %s", name, err, string(output))
}
pm.logger.Info("secret created", "name", name)
return nil
}
// DeleteSecret removes a Podman secret
func (pm *PodmanManager) DeleteSecret(ctx context.Context, name string) error {
cmd := exec.CommandContext(ctx, "podman", "secret", "rm", name)
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("failed to delete secret %s: %w, output: %s", name, err, string(output))
}
pm.logger.Info("secret deleted", "name", name)
return nil
}
// BuildSecretArgs builds podman run arguments for mounting secrets
func BuildSecretArgs(secrets []PodmanSecret) []string {
args := []string{}
for _, secret := range secrets {
if secret.EnvVar != "" {
// Mount as environment variable
args = append(args, "--secret", fmt.Sprintf("%s,type=env,target=%s", secret.Name, secret.EnvVar))
} else {
// Mount as file
target := secret.Target
if target == "" {
target = fmt.Sprintf("/run/secrets/%s", secret.Name)
}
args = append(args, "--secret", fmt.Sprintf("%s,type=mount,target=%s", secret.Name, target))
}
}
return args
}
// SanitizeContainerEnv removes sensitive values from env map and returns secrets to create
func SanitizeContainerEnv(env map[string]string, sensitiveKeys []string) ([]PodmanSecret, map[string]string) {
secrets := []PodmanSecret{}
cleanEnv := make(map[string]string)
for key, value := range env {
isSensitive := false
lowerKey := strings.ToLower(key)
for _, sensitive := range sensitiveKeys {
if strings.Contains(lowerKey, strings.ToLower(sensitive)) {
isSensitive = true
break
}
}
if isSensitive && value != "" {
// Create secret for this value
secretName := fmt.Sprintf("fetchml_%s_%d", strings.ToLower(key), os.Getpid())
secrets = append(secrets, PodmanSecret{
Name: secretName,
Data: []byte(value),
EnvVar: key, // Mount as env var to maintain compatibility
})
// Don't include in env - it will be mounted as secret
} else {
cleanEnv[key] = value
}
}
return secrets, cleanEnv
}
// ErrSecurityViolation is returned when a security policy is violated.
var ErrSecurityViolation = fmt.Errorf("security policy violation")
func SanitizePath(path string) (string, error) {
// Clean the path to remove any .. or . components
cleaned := filepath.Clean(path)
// Check for path traversal attempts
if strings.Contains(cleaned, "..") {
return "", fmt.Errorf("path traversal detected: %s", path)
}
return cleaned, nil
}
// ValidateJobName validates a job name is safe
func ValidateJobName(jobName string) error {
if jobName == "" {
return fmt.Errorf("job name cannot be empty")
}
// Check for dangerous characters
if strings.ContainsAny(jobName, "/\\<>:\"|?*") {
return fmt.Errorf("job name contains invalid characters: %s", jobName)
}
// Check for path traversal
if strings.Contains(jobName, "..") {
return fmt.Errorf("job name contains path traversal: %s", jobName)
}
return nil
}