fetch_ml/internal/container/podman.go

// Package container provides Podman container management utilities.
package container

import (
	"context"
	"fmt"
	"os"
	"os/exec"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/jfraeys/fetch_ml/internal/config"
	"github.com/jfraeys/fetch_ml/internal/logging"
)

// PodmanManager manages Podman containers
type PodmanManager struct {
	logger *logging.Logger
}

// NewPodmanManager creates a new Podman manager
func NewPodmanManager(logger *logging.Logger) (*PodmanManager, error) {
	return &PodmanManager{
		logger: logger,
	}, nil
}

// ContainerConfig holds configuration for starting a container
type ContainerConfig struct {
	Name         string            `json:"name"`
	Image        string            `json:"image"`
	Command      []string          `json:"command"`
	Env          map[string]string `json:"env"`
	Secrets      []PodmanSecret    `json:"secrets"` // Sensitive data as Podman secrets
	Volumes      map[string]string `json:"volumes"`
	Ports        map[int]int       `json:"ports"`
	SecurityOpts []string          `json:"security_opts"`
	Resources    ResourceConfig    `json:"resources"`
	Network      NetworkConfig     `json:"network"`
}

// ResourceConfig defines resource limits for containers
type ResourceConfig struct {
	MemoryLimit string   `json:"memory_limit"`
	CPULimit    string   `json:"cpu_limit"`
	GPUDevices  []string `json:"gpu_devices"`
	AppleGPU    bool     `json:"apple_gpu"`
}

// NetworkConfig defines network settings for containers
type NetworkConfig struct {
	AllowNetwork bool `json:"allow_network"`
}

func podmanCgroupsMode() string {
	return strings.TrimSpace(os.Getenv("FETCHML_PODMAN_CGROUPS"))
}

func BuildRunArgs(config *ContainerConfig) []string {
	args := []string{"run", "-d"}

	if podmanCgroupsMode() == "disabled" {
		args = append(args, "--cgroups=disabled")
	}

	// Add name
	if config.Name != "" {
		args = append(args, "--name", config.Name)
	}

	// Add security options
	for _, opt := range config.SecurityOpts {
		args = append(args, "--security-opt", opt)
	}

	// Add resource limits
	if config.Resources.MemoryLimit != "" {
		args = append(args, "--memory", config.Resources.MemoryLimit)
	}
	if config.Resources.CPULimit != "" {
		args = append(args, "--cpus", config.Resources.CPULimit)
	}
	if config.Resources.AppleGPU {
		args = append(args, "--device", "/dev/metal")
		args = append(args, "--device", "/dev/mps")
	}
	for _, device := range config.Resources.GPUDevices {
		args = append(args, "--device", device)
	}

	// Add volumes
	for hostPath, containerPath := range config.Volumes {
		mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
		args = append(args, "-v", mount)
	}

	// Add ports
	for hostPort, containerPort := range config.Ports {
		portMapping := fmt.Sprintf("%d:%d", hostPort, containerPort)
		args = append(args, "-p", portMapping)
	}

	// Add environment variables
	for key, value := range config.Env {
		args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
	}

	// Add image and command
	args = append(args, config.Image)
	args = append(args, config.Command...)
	return args
}

func ParseContainerID(output string) (string, error) {
	out := strings.TrimSpace(output)
	if out == "" {
		return "", fmt.Errorf("no container ID returned")
	}
	lines := strings.Split(out, "\n")
	for i := len(lines) - 1; i >= 0; i-- {
		line := strings.TrimSpace(lines[i])
		if line == "" {
			continue
		}
		return line, nil
	}
	return "", fmt.Errorf("no container ID returned")
}

// StartContainer starts a new container with secret support
func (pm *PodmanManager) StartContainer(
	ctx context.Context,
	config *ContainerConfig,
) (string, error) {
	// Create Podman secrets for sensitive data
	for _, secret := range config.Secrets {
		if err := pm.CreateSecret(ctx, secret.Name, secret.Data); err != nil {
			// Clean up any secrets we already created
			for _, s := range config.Secrets {
				if s.Name == secret.Name {
					break
				}
				_ = pm.DeleteSecret(ctx, s.Name)
			}
			return "", fmt.Errorf("failed to create secret %s: %w", secret.Name, err)
		}
	}

	// Build run args including secrets
	args := BuildRunArgs(config)

	// Add secret mount arguments
	secretArgs := BuildSecretArgs(config.Secrets)
	// Insert secrets after "run -d" and before other args
	if len(secretArgs) > 0 {
		// args[0] = "run", args[1] = "-d"
		newArgs := append([]string{args[0], args[1]}, secretArgs...)
		newArgs = append(newArgs, args[2:]...)
		args = newArgs
	}

	// Execute command
	cmd := exec.CommandContext(ctx, "podman", args...)
	output, err := cmd.CombinedOutput()
	if err != nil {
		// Clean up secrets on failure
		for _, secret := range config.Secrets {
			_ = pm.DeleteSecret(ctx, secret.Name)
		}
		return "", fmt.Errorf("failed to start container: %w, output: %s", err, string(output))
	}

	containerID, err := ParseContainerID(string(output))
	if err != nil {
		// Clean up secrets on failure
		for _, secret := range config.Secrets {
			_ = pm.DeleteSecret(ctx, secret.Name)
		}
		return "", err
	}

	pm.logger.Info("container started",
		"container_id", containerID,
		"name", config.Name,
		"secrets", len(config.Secrets))
	return containerID, nil
}

// StopContainer stops a container
func (pm *PodmanManager) StopContainer(ctx context.Context, containerID string) error {
	cmd := exec.CommandContext(ctx, "podman", "stop", containerID)
	output, err := cmd.CombinedOutput()
	if err != nil {
		return fmt.Errorf("failed to stop container: %w, output: %s", err, string(output))
	}

	pm.logger.Info("container stopped", "container_id", containerID)
	return nil
}

// GetContainerStateStatus returns the container's lifecycle state from `podman inspect`.
// Typical values: running, exited, created, paused.
func (pm *PodmanManager) GetContainerStateStatus(
	ctx context.Context,
	containerID string,
) (string, error) {
	// Validate containerID to prevent injection
	if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
		return "", fmt.Errorf("invalid container ID: %s", containerID)
	}

	cmd := exec.CommandContext(ctx, "podman", "inspect", "--format", "{{.State.Status}}", containerID) //nolint:gosec
	output, err := cmd.CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("failed to inspect container: %w, output: %s", err, string(output))
	}
	status := strings.TrimSpace(string(output))
	if status == "" {
		return "unknown", nil
	}
	return status, nil
}

// RemoveContainer removes a container
func (pm *PodmanManager) RemoveContainer(ctx context.Context, containerID string) error {
	cmd := exec.CommandContext(ctx, "podman", "rm", containerID)
	output, err := cmd.CombinedOutput()
	if err != nil {
		return fmt.Errorf("failed to remove container: %w, output: %s", err, string(output))
	}

	pm.logger.Info("container removed", "container_id", containerID)
	return nil
}

// GetContainerStatus gets the status of a container
func (pm *PodmanManager) GetContainerStatus(
	ctx context.Context,
	containerID string,
) (string, error) {
	// Validate containerID to prevent injection
	if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
		return "", fmt.Errorf("invalid container ID: %s", containerID)
	}

	cmd := exec.CommandContext(ctx, "podman", "ps", "--filter", "id="+containerID,
		"--format", "{{.Status}}") //nolint:gosec
	output, err := cmd.CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
	}

	status := strings.TrimSpace(string(output))
	if status == "" {
		// Container might be stopped, check all containers
		cmd = exec.CommandContext(
			ctx,
			"podman",
			"ps",
			"-a",
			"--filter",
			"id="+containerID,
			"--format",
			"{{.Status}}",
		) //nolint:gosec
		output, err = cmd.CombinedOutput()
		if err != nil {
			return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
		}
		status = strings.TrimSpace(string(output))
		if status == "" {
			return "unknown", nil
		}
	}

	return status, nil
}

// ExecContainer executes a command inside a running container and returns the output
func (pm *PodmanManager) ExecContainer(ctx context.Context, containerID string, command []string) (string, error) {
	// Validate containerID to prevent injection
	if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
		return "", fmt.Errorf("invalid container ID: %s", containerID)
	}

	// Validate command to prevent injection
	for _, arg := range command {
		if strings.ContainsAny(arg, "&;|<>$`\"'") {
			return "", fmt.Errorf("invalid command argument: %s", arg)
		}
	}

	// Build podman exec command
	args := []string{"exec", containerID}
	args = append(args, command...)

	cmd := exec.CommandContext(ctx, "podman", args...) //nolint:gosec
	output, err := cmd.CombinedOutput()
	if err != nil {
		return "", fmt.Errorf("failed to execute command in container: %w, output: %s", err, string(output))
	}

	return string(output), nil
}

// PodmanConfig holds configuration for Podman container execution
type PodmanConfig struct {
	Image              string
	Workspace          string
	Results            string
	ContainerWorkspace string
	ContainerResults   string
	AppleGPU           bool
	GPUDevices         []string
	Env                map[string]string
	Volumes            map[string]string
	Memory             string
	CPUs               string
	Privileged         bool   // Security: must be false
	Network            string // Security: must not be "host"
	ReadOnlyMounts     bool   // Security: true for dataset mounts
}

// PodmanResourceOverrides converts per-task resource requests into Podman-compatible
// `--cpus` and `--memory` flag values.
//
// cpu and memoryGB are treated as optional; values <= 0 return empty overrides.
func PodmanResourceOverrides(cpu int, memoryGB int) (cpus string, memory string) {
	if cpu > 0 {
		cpus = strconv.Itoa(cpu)
	}
	if memoryGB > 0 {
		memory = fmt.Sprintf("%dg", memoryGB)
	}
	return cpus, memory
}

// PodmanSecurityConfig holds security configuration for Podman containers
type PodmanSecurityConfig struct {
	NoNewPrivileges bool
	DropAllCaps     bool
	AllowedCaps     []string
	UserNS          bool
	RunAsUID        int
	RunAsGID        int
	SeccompProfile  string
	ReadOnlyRoot    bool
	NetworkMode     string
}

// BuildSecurityArgs builds security-related podman arguments from PodmanSecurityConfig
func BuildSecurityArgs(sandbox PodmanSecurityConfig) []string {
	args := []string{}

	// No new privileges
	if sandbox.NoNewPrivileges {
		args = append(args, "--security-opt", "no-new-privileges:true")
	}

	// Capability dropping
	if sandbox.DropAllCaps {
		args = append(args, "--cap-drop=all")
		for _, cap := range sandbox.AllowedCaps {
			if cap != "" {
				args = append(args, "--cap-add="+cap)
			}
		}
	}

	// User namespace mapping
	if sandbox.UserNS && sandbox.RunAsUID > 0 && sandbox.RunAsGID > 0 {
		// Map container root to specified UID/GID on host
		args = append(args, "--userns", "keep-id")
		args = append(args, "--user", fmt.Sprintf("%d:%d", sandbox.RunAsUID, sandbox.RunAsGID))
	}

	// Seccomp profile
	if sandbox.SeccompProfile != "" && sandbox.SeccompProfile != "unconfined" {
		profilePath := GetSeccompProfilePath(sandbox.SeccompProfile)
		if profilePath != "" {
			args = append(args, "--security-opt", fmt.Sprintf("seccomp=%s", profilePath))
		}
	}

	// Read-only root filesystem
	if sandbox.ReadOnlyRoot {
		args = append(args, "--read-only")
	}

	// Network mode (default: none)
	networkMode := sandbox.NetworkMode
	if networkMode == "" {
		networkMode = "none"
	}
	args = append(args, "--network", networkMode)

	return args
}

// GetSeccompProfilePath returns the filesystem path for a named seccomp profile
func GetSeccompProfilePath(profileName string) string {
	// Check standard locations
	searchPaths := []string{
		filepath.Join("configs", "seccomp", profileName+".json"),
		filepath.Join("/etc", "fetchml", "seccomp", profileName+".json"),
		filepath.Join("/usr", "share", "fetchml", "seccomp", profileName+".json"),
	}

	for _, path := range searchPaths {
		if _, err := os.Stat(path); err == nil {
			return path
		}
	}

	// If profileName is already a path, return it
	if filepath.IsAbs(profileName) {
		return profileName
	}

	return ""
}

// BuildPodmanCommand builds a Podman command for executing ML experiments with security options
func BuildPodmanCommand(
	ctx context.Context,
	cfg PodmanConfig,
	sandbox PodmanSecurityConfig,
	scriptPath, depsPath string,
	extraArgs []string,
) *exec.Cmd {
	args := []string{"run", "--rm"}

	// Add security options from sandbox config
	securityArgs := BuildSecurityArgs(sandbox)
	args = append(args, securityArgs...)

	// Resource limits
	if cfg.Memory != "" {
		args = append(args, "--memory", cfg.Memory)
	} else {
		args = append(args, "--memory", config.DefaultPodmanMemory)
	}

	if cfg.CPUs != "" {
		args = append(args, "--cpus", cfg.CPUs)
	} else {
		args = append(args, "--cpus", config.DefaultPodmanCPUs)
	}

	// Mount workspace
	workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
	args = append(args, "-v", workspaceMount)

	// Mount results
	resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
	args = append(args, "-v", resultsMount)

	// Mount additional volumes
	for hostPath, containerPath := range cfg.Volumes {
		mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
		args = append(args, "-v", mount)
	}

	// Use injected GPU device paths for Apple GPU or custom configurations
	for _, device := range cfg.GPUDevices {
		args = append(args, "--device", device)
	}

	// Add environment variables
	for key, value := range cfg.Env {
		args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
	}

	// Image and command
	args = append(args, cfg.Image,
		"--workspace", cfg.ContainerWorkspace,
		"--deps", depsPath,
		"--script", scriptPath,
	)

	// Add extra arguments via --args flag
	if len(extraArgs) > 0 {
		args = append(args, "--args")
		args = append(args, extraArgs...)
	}

	return exec.CommandContext(ctx, "podman", args...)
}

// BuildPodmanCommandLegacy builds a Podman command using legacy security settings
// Deprecated: Use BuildPodmanCommand with SandboxConfig instead
func BuildPodmanCommandLegacy(
	ctx context.Context,
	cfg PodmanConfig,
	scriptPath, depsPath string,
	extraArgs []string,
) *exec.Cmd {
	args := []string{
		"run", "--rm",
		"--security-opt", "no-new-privileges",
		"--cap-drop", "ALL",
	}

	// Add network mode if specified
	if cfg.Network != "" {
		args = append(args, "--network", cfg.Network)
	}

	// Add read-only root filesystem
	if cfg.ReadOnlyMounts {
		args = append(args, "--read-only")
	}

	if cfg.Memory != "" {
		args = append(args, "--memory", cfg.Memory)
	} else {
		args = append(args, "--memory", config.DefaultPodmanMemory)
	}

	if cfg.CPUs != "" {
		args = append(args, "--cpus", cfg.CPUs)
	} else {
		args = append(args, "--cpus", config.DefaultPodmanCPUs)
	}

	args = append(args, "--userns", "keep-id")

	// Mount workspace
	workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
	args = append(args, "-v", workspaceMount)

	// Mount results
	resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
	args = append(args, "-v", resultsMount)

	// Mount additional volumes
	for hostPath, containerPath := range cfg.Volumes {
		mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
		args = append(args, "-v", mount)
	}

	// Use injected GPU device paths for Apple GPU or custom configurations
	for _, device := range cfg.GPUDevices {
		args = append(args, "--device", device)
	}

	// Add environment variables
	for key, value := range cfg.Env {
		args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
	}

	// Image and command
	args = append(args, cfg.Image,
		"--workspace", cfg.ContainerWorkspace,
		"--deps", depsPath,
		"--script", scriptPath,
	)

	// Add extra arguments via --args flag
	if len(extraArgs) > 0 {
		args = append(args, "--args")
		args = append(args, extraArgs...)
	}

	return exec.CommandContext(ctx, "podman", args...)
}

// ValidateSecurityPolicy validates that the container configuration meets security requirements.
// Returns an error if the configuration violates security policies.
func ValidateSecurityPolicy(cfg PodmanConfig) error {
	if cfg.Privileged {
		return fmt.Errorf("privileged containers are not allowed: %w", ErrSecurityViolation)
	}

	if cfg.Network == "host" {
		return fmt.Errorf("host network mode is not allowed: %w", ErrSecurityViolation)
	}

	// Validate volume mounts are read-only where required
	if !cfg.ReadOnlyMounts {
		// This is a warning-level issue, not a hard error
		// but we document it for audit purposes
	}

	return nil
}

// PodmanSecret represents a secret to be mounted in a container
type PodmanSecret struct {
	Name   string // Secret name in Podman
	Data   []byte // Secret data (will be base64 encoded)
	Target string // Mount path inside container
	EnvVar string // Environment variable name (optional, if set mounts as env var instead of file)
}

// CreateSecret creates a Podman secret from the given data
func (pm *PodmanManager) CreateSecret(ctx context.Context, name string, data []byte) error {
	// Create secret via podman command
	// podman secret create name - << data
	cmd := exec.CommandContext(ctx, "podman", "secret", "create", name, "-")
	cmd.Stdin = strings.NewReader(string(data))
	output, err := cmd.CombinedOutput()
	if err != nil {
		return fmt.Errorf("failed to create secret %s: %w, output: %s", name, err, string(output))
	}

	pm.logger.Info("secret created", "name", name)
	return nil
}

// DeleteSecret removes a Podman secret
func (pm *PodmanManager) DeleteSecret(ctx context.Context, name string) error {
	cmd := exec.CommandContext(ctx, "podman", "secret", "rm", name)
	output, err := cmd.CombinedOutput()
	if err != nil {
		return fmt.Errorf("failed to delete secret %s: %w, output: %s", name, err, string(output))
	}

	pm.logger.Info("secret deleted", "name", name)
	return nil
}

// BuildSecretArgs builds podman run arguments for mounting secrets
func BuildSecretArgs(secrets []PodmanSecret) []string {
	args := []string{}
	for _, secret := range secrets {
		if secret.EnvVar != "" {
			// Mount as environment variable
			args = append(args, "--secret", fmt.Sprintf("%s,type=env,target=%s", secret.Name, secret.EnvVar))
		} else {
			// Mount as file
			target := secret.Target
			if target == "" {
				target = fmt.Sprintf("/run/secrets/%s", secret.Name)
			}
			args = append(args, "--secret", fmt.Sprintf("%s,type=mount,target=%s", secret.Name, target))
		}
	}
	return args
}

// SanitizeContainerEnv removes sensitive values from env map and returns secrets to create
func SanitizeContainerEnv(env map[string]string, sensitiveKeys []string) ([]PodmanSecret, map[string]string) {
	secrets := []PodmanSecret{}
	cleanEnv := make(map[string]string)

	for key, value := range env {
		isSensitive := false
		lowerKey := strings.ToLower(key)
		for _, sensitive := range sensitiveKeys {
			if strings.Contains(lowerKey, strings.ToLower(sensitive)) {
				isSensitive = true
				break
			}
		}

		if isSensitive && value != "" {
			// Create secret for this value
			secretName := fmt.Sprintf("fetchml_%s_%d", strings.ToLower(key), os.Getpid())
			secrets = append(secrets, PodmanSecret{
				Name:   secretName,
				Data:   []byte(value),
				EnvVar: key, // Mount as env var to maintain compatibility
			})
			// Don't include in env - it will be mounted as secret
		} else {
			cleanEnv[key] = value
		}
	}

	return secrets, cleanEnv
}

// ErrSecurityViolation is returned when a security policy is violated.
var ErrSecurityViolation = fmt.Errorf("security policy violation")

func SanitizePath(path string) (string, error) {
	// Clean the path to remove any .. or . components
	cleaned := filepath.Clean(path)

	// Check for path traversal attempts
	if strings.Contains(cleaned, "..") {
		return "", fmt.Errorf("path traversal detected: %s", path)
	}

	return cleaned, nil
}

// ValidateJobName validates a job name is safe
func ValidateJobName(jobName string) error {
	if jobName == "" {
		return fmt.Errorf("job name cannot be empty")
	}

	// Check for dangerous characters
	if strings.ContainsAny(jobName, "/\\<>:\"|?*") {
		return fmt.Errorf("job name contains invalid characters: %s", jobName)
	}

	// Check for path traversal
	if strings.Contains(jobName, "..") {
		return fmt.Errorf("job name contains path traversal: %s", jobName)
	}

	return nil
}