- Move ci-test.sh and setup.sh to scripts/ - Trim docs/src/zig-cli.md to current structure - Replace hardcoded secrets with placeholders in configs - Update .gitignore to block .env*, secrets/, keys, build artifacts - Slim README.md to reflect current CLI/TUI split - Add cleanup trap to ci-test.sh - Ensure no secrets are committed
267 lines
7.5 KiB
Go
267 lines
7.5 KiB
Go
// Package container provides Podman container management utilities.
|
|
package container
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/config"
|
|
"github.com/jfraeys/fetch_ml/internal/logging"
|
|
)
|
|
|
|
// PodmanManager manages Podman containers
|
|
type PodmanManager struct {
|
|
logger *logging.Logger
|
|
}
|
|
|
|
// NewPodmanManager creates a new Podman manager
|
|
func NewPodmanManager(logger *logging.Logger) (*PodmanManager, error) {
|
|
return &PodmanManager{
|
|
logger: logger,
|
|
}, nil
|
|
}
|
|
|
|
// ContainerConfig holds configuration for starting a container
|
|
type ContainerConfig struct {
|
|
Name string `json:"name"`
|
|
Image string `json:"image"`
|
|
Command []string `json:"command"`
|
|
Env map[string]string `json:"env"`
|
|
Volumes map[string]string `json:"volumes"`
|
|
Ports map[int]int `json:"ports"`
|
|
SecurityOpts []string `json:"security_opts"`
|
|
Resources ResourceConfig `json:"resources"`
|
|
Network NetworkConfig `json:"network"`
|
|
}
|
|
|
|
// ResourceConfig defines resource limits for containers
|
|
type ResourceConfig struct {
|
|
MemoryLimit string `json:"memory_limit"`
|
|
CPULimit string `json:"cpu_limit"`
|
|
GPUAccess bool `json:"gpu_access"`
|
|
}
|
|
|
|
// NetworkConfig defines network settings for containers
|
|
type NetworkConfig struct {
|
|
AllowNetwork bool `json:"allow_network"`
|
|
}
|
|
|
|
// StartContainer starts a new container
|
|
func (pm *PodmanManager) StartContainer(ctx context.Context, config *ContainerConfig) (string, error) {
|
|
args := []string{"run", "-d"}
|
|
|
|
// Add name
|
|
if config.Name != "" {
|
|
args = append(args, "--name", config.Name)
|
|
}
|
|
|
|
// Add security options
|
|
for _, opt := range config.SecurityOpts {
|
|
args = append(args, "--security-opt", opt)
|
|
}
|
|
|
|
// Add resource limits
|
|
if config.Resources.MemoryLimit != "" {
|
|
args = append(args, "--memory", config.Resources.MemoryLimit)
|
|
}
|
|
if config.Resources.CPULimit != "" {
|
|
args = append(args, "--cpus", config.Resources.CPULimit)
|
|
}
|
|
if config.Resources.GPUAccess {
|
|
args = append(args, "--device", "/dev/dri")
|
|
}
|
|
|
|
// Add volumes
|
|
for hostPath, containerPath := range config.Volumes {
|
|
mount := fmt.Sprintf("%s:%s", hostPath, containerPath)
|
|
args = append(args, "-v", mount)
|
|
}
|
|
|
|
// Add ports
|
|
for hostPort, containerPort := range config.Ports {
|
|
portMapping := fmt.Sprintf("%d:%d", hostPort, containerPort)
|
|
args = append(args, "-p", portMapping)
|
|
}
|
|
|
|
// Add environment variables
|
|
for key, value := range config.Env {
|
|
args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
|
|
}
|
|
|
|
// Add image and command
|
|
args = append(args, config.Image)
|
|
args = append(args, config.Command...)
|
|
|
|
// Execute command
|
|
cmd := exec.CommandContext(ctx, "podman", args...)
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to start container: %w, output: %s", err, string(output))
|
|
}
|
|
|
|
// Return container ID (first line of output)
|
|
containerID := strings.TrimSpace(string(output))
|
|
if containerID == "" {
|
|
return "", fmt.Errorf("no container ID returned")
|
|
}
|
|
|
|
pm.logger.Info("container started", "container_id", containerID, "name", config.Name)
|
|
return containerID, nil
|
|
}
|
|
|
|
// StopContainer stops a container
|
|
func (pm *PodmanManager) StopContainer(ctx context.Context, containerID string) error {
|
|
cmd := exec.CommandContext(ctx, "podman", "stop", containerID)
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to stop container: %w, output: %s", err, string(output))
|
|
}
|
|
|
|
pm.logger.Info("container stopped", "container_id", containerID)
|
|
return nil
|
|
}
|
|
|
|
// RemoveContainer removes a container
|
|
func (pm *PodmanManager) RemoveContainer(ctx context.Context, containerID string) error {
|
|
cmd := exec.CommandContext(ctx, "podman", "rm", containerID)
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to remove container: %w, output: %s", err, string(output))
|
|
}
|
|
|
|
pm.logger.Info("container removed", "container_id", containerID)
|
|
return nil
|
|
}
|
|
|
|
// GetContainerStatus gets the status of a container
|
|
func (pm *PodmanManager) GetContainerStatus(ctx context.Context, containerID string) (string, error) {
|
|
// Validate containerID to prevent injection
|
|
if containerID == "" || strings.ContainsAny(containerID, "&;|<>$`\"'") {
|
|
return "", fmt.Errorf("invalid container ID: %s", containerID)
|
|
}
|
|
|
|
cmd := exec.CommandContext(ctx, "podman", "ps", "--filter", "id="+containerID,
|
|
"--format", "{{.Status}}") //nolint:gosec
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
|
|
}
|
|
|
|
status := strings.TrimSpace(string(output))
|
|
if status == "" {
|
|
// Container might be stopped, check all containers
|
|
cmd = exec.CommandContext(ctx, "podman", "ps", "-a", "--filter", "id="+containerID, "--format", "{{.Status}}") //nolint:gosec
|
|
output, err = cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get container status: %w, output: %s", err, string(output))
|
|
}
|
|
status = strings.TrimSpace(string(output))
|
|
if status == "" {
|
|
return "unknown", nil
|
|
}
|
|
}
|
|
|
|
return status, nil
|
|
}
|
|
|
|
// PodmanConfig holds configuration for Podman container execution
|
|
type PodmanConfig struct {
|
|
Image string
|
|
Workspace string
|
|
Results string
|
|
ContainerWorkspace string
|
|
ContainerResults string
|
|
GPUAccess bool
|
|
Memory string
|
|
CPUs string
|
|
}
|
|
|
|
// BuildPodmanCommand builds a Podman command for executing ML experiments
|
|
func BuildPodmanCommand(
|
|
ctx context.Context,
|
|
cfg PodmanConfig,
|
|
scriptPath, requirementsPath string,
|
|
extraArgs []string,
|
|
) *exec.Cmd {
|
|
args := []string{
|
|
"run", "--rm",
|
|
"--security-opt", "no-new-privileges",
|
|
"--cap-drop", "ALL",
|
|
}
|
|
|
|
if cfg.Memory != "" {
|
|
args = append(args, "--memory", cfg.Memory)
|
|
} else {
|
|
args = append(args, "--memory", config.DefaultPodmanMemory)
|
|
}
|
|
|
|
if cfg.CPUs != "" {
|
|
args = append(args, "--cpus", cfg.CPUs)
|
|
} else {
|
|
args = append(args, "--cpus", config.DefaultPodmanCPUs)
|
|
}
|
|
|
|
args = append(args, "--userns", "keep-id")
|
|
|
|
// Mount workspace
|
|
workspaceMount := fmt.Sprintf("%s:%s:rw", cfg.Workspace, cfg.ContainerWorkspace)
|
|
args = append(args, "-v", workspaceMount)
|
|
|
|
// Mount results
|
|
resultsMount := fmt.Sprintf("%s:%s:rw", cfg.Results, cfg.ContainerResults)
|
|
args = append(args, "-v", resultsMount)
|
|
|
|
if cfg.GPUAccess {
|
|
args = append(args, "--device", "/dev/dri")
|
|
}
|
|
|
|
// Image and command
|
|
args = append(args, cfg.Image,
|
|
"--workspace", cfg.ContainerWorkspace,
|
|
"--requirements", requirementsPath,
|
|
"--script", scriptPath,
|
|
)
|
|
|
|
// Add extra arguments via --args flag
|
|
if len(extraArgs) > 0 {
|
|
args = append(args, "--args")
|
|
args = append(args, extraArgs...)
|
|
}
|
|
|
|
return exec.CommandContext(ctx, "podman", args...)
|
|
}
|
|
|
|
// SanitizePath ensures a path is safe to use (prevents path traversal)
|
|
func SanitizePath(path string) (string, error) {
|
|
// Clean the path to remove any .. or . components
|
|
cleaned := filepath.Clean(path)
|
|
|
|
// Check for path traversal attempts
|
|
if strings.Contains(cleaned, "..") {
|
|
return "", fmt.Errorf("path traversal detected: %s", path)
|
|
}
|
|
|
|
return cleaned, nil
|
|
}
|
|
|
|
// ValidateJobName validates a job name is safe
|
|
func ValidateJobName(jobName string) error {
|
|
if jobName == "" {
|
|
return fmt.Errorf("job name cannot be empty")
|
|
}
|
|
|
|
// Check for dangerous characters
|
|
if strings.ContainsAny(jobName, "/\\<>:\"|?*") {
|
|
return fmt.Errorf("job name contains invalid characters: %s", jobName)
|
|
}
|
|
|
|
// Check for path traversal
|
|
if strings.Contains(jobName, "..") {
|
|
return fmt.Errorf("job name contains path traversal: %s", jobName)
|
|
}
|
|
|
|
return nil
|
|
}
|