package jupyter import ( "context" "encoding/json" "fmt" "os" "path/filepath" "strconv" "strings" "time" "github.com/jfraeys/fetch_ml/internal/config" "github.com/jfraeys/fetch_ml/internal/container" "github.com/jfraeys/fetch_ml/internal/logging" ) // stripTokenFromURL removes the token query parameter from a URL for safe logging func stripTokenFromURL(url string) string { idx := strings.Index(url, "?token=") if idx == -1 { idx = strings.Index(url, "&token=") } if idx != -1 { return url[:idx] } return url } const ( serviceStatusRunning = "running" ) func workspaceBaseDir() string { // First check environment variable for backward compatibility if v := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_WORKSPACE_BASE")); v != "" { return v } // Use PathRegistry for consistent path management paths := config.FromEnv() return paths.ActiveDataDir() } func resolveWorkspacePath(workspace string) (string, error) { ws := strings.TrimSpace(workspace) if ws == "" { return "", fmt.Errorf("workspace is required") } clean := filepath.Clean(ws) // Reject obvious traversal attempts. if clean == ".." || strings.HasPrefix(clean, ".."+string(filepath.Separator)) { return "", fmt.Errorf("invalid workspace path: %s", workspace) } // For container deployments, relative paths refer to the workspace base directory. if !filepath.IsAbs(clean) { clean = strings.TrimPrefix(clean, "."+string(filepath.Separator)) clean = filepath.Join(workspaceBaseDir(), clean) } return clean, nil } func trashBaseDir() string { // First check environment variable for backward compatibility if v := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_TRASH_DIR")); v != "" { return v } // Use PathRegistry for consistent path management paths := config.FromEnv() return filepath.Join(paths.JupyterStateDir(), "trash") } type trashInfo struct { OriginalName string `json:"original_name"` DeletedAt time.Time `json:"deleted_at"` DeletedBy string `json:"deleted_by"` SizeBytes int64 `json:"size_bytes"` PurgeAfter time.Time `json:"purge_after"` Reason string `json:"reason"` } func (sm *ServiceManager) moveWorkspaceToTrash(workspacePath string, originalName string) (string, *trashInfo, error) { ws := strings.TrimSpace(workspacePath) if ws == "" { return "", nil, fmt.Errorf("workspace is required") } name := strings.TrimSpace(originalName) if name == "" { return "", nil, fmt.Errorf("original name is required") } wsResolved, err := resolveWorkspacePath(ws) if err == nil { ws = wsResolved } if err := os.MkdirAll(trashBaseDir(), 0o750); err != nil { return "", nil, fmt.Errorf("failed to create trash directory: %w", err) } ts := time.Now().UTC().Format("20060102_150405") destName := fmt.Sprintf("%s_%s", name, ts) dest := filepath.Join(trashBaseDir(), destName) sizeBytes, _ := dirSizeBytes(ws) info := &trashInfo{ OriginalName: name, DeletedAt: time.Now().UTC(), DeletedBy: "system", SizeBytes: sizeBytes, PurgeAfter: time.Now().UTC().Add(30 * 24 * time.Hour), Reason: "user_request", } if err := os.Rename(ws, dest); err != nil { return "", nil, fmt.Errorf("failed to move workspace to trash: %w", err) } b, err := json.MarshalIndent(info, "", " ") if err == nil { _ = os.WriteFile(filepath.Join(dest, ".trashinfo"), b, 0o600) } return dest, info, nil } func (sm *ServiceManager) RestoreWorkspace(ctx context.Context, name string) (string, error) { _ = ctx wsName := strings.TrimSpace(name) if wsName == "" { return "", fmt.Errorf("workspace name is required") } base := trashBaseDir() entries, err := os.ReadDir(base) if err != nil { if os.IsNotExist(err) { return "", fmt.Errorf("no trash directory found") } return "", fmt.Errorf("failed to read trash directory: %w", err) } prefix := wsName + "_" var best string var bestTs string for _, e := range entries { if !e.IsDir() { continue } n := e.Name() if !strings.HasPrefix(n, prefix) { continue } ts := strings.TrimPrefix(n, prefix) if best == "" || ts > bestTs { best = n bestTs = ts } } if best == "" { return "", fmt.Errorf("no trashed workspace found for %q", wsName) } src := filepath.Join(base, best) dest := filepath.Join(workspaceBaseDir(), wsName) if _, err := os.Stat(dest); err == nil { return "", fmt.Errorf("workspace %q already exists", wsName) } if err := os.MkdirAll(workspaceBaseDir(), 0o750); err != nil { return "", fmt.Errorf("failed to create workspace base directory: %w", err) } if err := os.Rename(src, dest); err != nil { return "", fmt.Errorf("failed to restore workspace: %w", err) } _ = os.Remove(filepath.Join(dest, ".trashinfo")) return dest, nil } func dirSizeBytes(path string) (int64, error) { var total int64 err := filepath.WalkDir(path, func(_ string, d os.DirEntry, err error) error { if err != nil { return err } if d.IsDir() { return nil } info, err := d.Info() if err != nil { return err } total += info.Size() return nil }) if err != nil { return 0, err } return total, nil } // ServiceManager manages standalone Jupyter services type ServiceManager struct { logger *logging.Logger podman *container.PodmanManager config *ServiceConfig services map[string]*JupyterService workspaceMetadataMgr *WorkspaceMetadataManager securityMgr *SecurityManager startupBlockedPkgs []string } func splitPackageList(value string) []string { value = strings.TrimSpace(value) if value == "" { return nil } parts := strings.Split(value, ",") out := make([]string, 0, len(parts)) for _, p := range parts { p = strings.TrimSpace(p) if p == "" { continue } out = append(out, p) } return out } func startupBlockedPackages(installBlocked []string) []string { val, ok := os.LookupEnv("FETCHML_JUPYTER_STARTUP_BLOCKED_PACKAGES") if !ok { return append([]string{}, installBlocked...) } val = strings.TrimSpace(val) if val == "" || strings.EqualFold(val, "off") || strings.EqualFold(val, "none") || strings.EqualFold(val, "disabled") { return []string{} } return splitPackageList(val) } // ServiceConfig holds configuration for Jupyter services type ServiceConfig struct { DefaultImage string `json:"default_image"` DefaultPort int `json:"default_port"` DefaultWorkspace string `json:"default_workspace"` MaxServices int `json:"max_services"` DefaultResources ResourceConfig `json:"default_resources"` SecuritySettings SecurityConfig `json:"security_settings"` NetworkConfig NetworkConfig `json:"network_config"` } // NetworkConfig defines network settings for Jupyter containers type NetworkConfig struct { HostPort int `json:"host_port"` ContainerPort int `json:"container_port"` BindAddress string `json:"bind_address"` EnableToken bool `json:"enable_token"` Token string `json:"token"` EnablePassword bool `json:"enable_password"` Password string `json:"password"` AllowRemote bool `json:"allow_remote"` NetworkName string `json:"network_name"` } // ResourceConfig defines resource limits for Jupyter containers type ResourceConfig struct { MemoryLimit string `json:"memory_limit"` CPULimit string `json:"cpu_limit"` GPUDevices []string `json:"gpu_devices"` } // SecurityConfig holds security settings for Jupyter services type SecurityConfig struct { AllowNetwork bool `json:"allow_network"` AllowedHosts []string `json:"allowed_hosts"` BlockedHosts []string `json:"blocked_hosts"` EnableFirewall bool `json:"enable_firewall"` TrustedChannels []string `json:"trusted_channels"` BlockedPackages []string `json:"blocked_packages"` AllowedPackages map[string]bool `json:"allowed_packages"` RequireApproval bool `json:"require_approval"` ReadOnlyRoot bool `json:"read_only_root"` DropCapabilities []string `json:"drop_capabilities"` RunAsNonRoot bool `json:"run_as_non_root"` EnableSeccomp bool `json:"enable_seccomp"` NoNewPrivileges bool `json:"no_new_privileges"` } // JupyterService represents a running Jupyter instance type JupyterService struct { ID string `json:"id"` Name string `json:"name"` Status string `json:"status"` ContainerID string `json:"container_id"` Port int `json:"port"` Workspace string `json:"workspace"` Image string `json:"image"` URL string `json:"url"` CreatedAt time.Time `json:"created_at"` LastAccess time.Time `json:"last_access"` Config ServiceConfig `json:"config"` Environment map[string]string `json:"environment"` Metadata map[string]string `json:"metadata"` } type InstalledPackage struct { Name string `json:"name"` Version string `json:"version"` Source string `json:"source"` } // StartRequest defines parameters for starting a Jupyter service type StartRequest struct { Name string `json:"name"` Workspace string `json:"workspace"` Image string `json:"image"` Port int `json:"port"` Resources ResourceConfig `json:"resources"` Security SecurityConfig `json:"security"` Network NetworkConfig `json:"network"` Environment map[string]string `json:"environment"` Metadata map[string]string `json:"metadata"` } // NewServiceManager creates a new Jupyter service manager func NewServiceManager(logger *logging.Logger, svcConfig *ServiceConfig) (*ServiceManager, error) { podman, err := container.NewPodmanManager(logger) if err != nil { return nil, fmt.Errorf("failed to create podman manager: %w", err) } // Initialize workspace metadata manager using PathRegistry paths := config.FromEnv() dataFile := paths.JupyterWorkspacesFile() if err := paths.EnsureDir(paths.JupyterStateDir()); err != nil { return nil, fmt.Errorf("failed to create jupyter state directory: %w", err) } workspaceMetadataMgr := NewWorkspaceMetadataManager(logger, dataFile) // Initialize security manager with enhanced config securityConfig := GetDefaultSecurityConfig() // Override blocked packages from environment variable if provided if blockedPkgs := os.Getenv("FETCHML_JUPYTER_BLOCKED_PACKAGES"); blockedPkgs != "" { securityConfig.BlockedPackages = strings.Split(strings.TrimSpace(blockedPkgs), ",") // Trim whitespace from each package name for i, pkg := range securityConfig.BlockedPackages { securityConfig.BlockedPackages[i] = strings.TrimSpace(pkg) } } // Override allowed packages from environment variable if provided if allowedPkgs := os.Getenv("FETCHML_JUPYTER_ALLOWED_PACKAGES"); allowedPkgs != "" { securityConfig.AllowedPackages = make(map[string]bool) allowed := strings.Split(strings.TrimSpace(allowedPkgs), ",") for _, pkg := range allowed { securityConfig.AllowedPackages[strings.TrimSpace(pkg)] = true } } securityMgr := NewSecurityManager(logger, securityConfig) startupBlockedPkgs := startupBlockedPackages(securityConfig.BlockedPackages) sm := &ServiceManager{ logger: logger, podman: podman, config: svcConfig, services: make(map[string]*JupyterService), workspaceMetadataMgr: workspaceMetadataMgr, securityMgr: securityMgr, startupBlockedPkgs: startupBlockedPkgs, } // Load existing services if err := sm.loadServices(); err != nil { logger.Warn("failed to load existing services", "error", err) } return sm, nil } // StartService starts a new Jupyter service func (sm *ServiceManager) StartService( ctx context.Context, req *StartRequest, ) (*JupyterService, error) { // Validate request if err := sm.validateStartRequest(req); err != nil { return nil, err } // Check service limit if len(sm.services) >= sm.config.MaxServices { return nil, fmt.Errorf("maximum number of services (%d) reached", sm.config.MaxServices) } // Generate service ID serviceID := sm.generateServiceID(req.Name) // Prepare container configuration containerConfig := sm.prepareContainerConfig(serviceID, req) // Start container containerID, err := sm.podman.StartContainer(ctx, containerConfig) if err != nil { return nil, fmt.Errorf("failed to start container: %w", err) } // Check for blacklisted packages in the container if err := sm.checkPackageBlacklist(ctx, containerID); err != nil { // Cleanup on blacklist violation _ = sm.podman.StopContainer(ctx, containerID) return nil, err } // Wait for Jupyter to be ready url, err := sm.waitForJupyterReady(ctx, containerID, req.Network) if err != nil { // Cleanup on failure _ = sm.podman.StopContainer(ctx, containerID) return nil, fmt.Errorf("jupyter failed to start: %w", err) } // Create service record service := &JupyterService{ ID: serviceID, Name: req.Name, Status: serviceStatusRunning, ContainerID: containerID, Port: req.Network.HostPort, Workspace: req.Workspace, Image: req.Image, URL: url, CreatedAt: time.Now(), LastAccess: time.Now(), Config: *sm.config, Environment: req.Environment, Metadata: req.Metadata, } // Store service sm.services[serviceID] = service // Check if workspace is linked with an experiment if workspaceMeta, err := sm.workspaceMetadataMgr.GetWorkspaceMetadata(req.Workspace); err == nil { service.Metadata["experiment_id"] = workspaceMeta.ExperimentID service.Metadata["linked_at"] = fmt.Sprintf("%d", workspaceMeta.LinkedAt.Unix()) sm.logger.Info("service started with linked experiment", "service_id", serviceID, "experiment_id", workspaceMeta.ExperimentID) } // Save services to disk if err := sm.saveServices(); err != nil { sm.logger.Warn("failed to save services", "error", err) } sm.logger.Info("jupyter service started", "service_id", serviceID, "name", req.Name, "url", stripTokenFromURL(url), "workspace", req.Workspace) return service, nil } // checkPackageBlacklist validates that no blacklisted packages are installed in the container func (sm *ServiceManager) checkPackageBlacklist(ctx context.Context, containerID string) error { if len(sm.startupBlockedPkgs) == 0 { return nil } // Get list of installed packages from the container // Try both pip and conda package managers packages, err := sm.getInstalledPackages(ctx, containerID) if err != nil { sm.logger.Warn("failed to get installed packages for blacklist check", "error", err) // Don't fail startup if we can't check packages, but log it return nil } // Check each installed package against the startup blacklist var blockedPackages []string for _, pkg := range packages { for _, blocked := range sm.startupBlockedPkgs { if strings.EqualFold(blocked, pkg) { blockedPackages = append(blockedPackages, pkg) break } } } // If any blocked packages are found, fail startup if len(blockedPackages) > 0 { return fmt.Errorf("container startup failed: blacklisted packages detected: %v. "+ "These packages are blocked by security policy. "+ "Remove them from the image or use FETCHML_JUPYTER_STARTUP_BLOCKED_PACKAGES to configure the startup blacklist", blockedPackages) } return nil } // getInstalledPackages retrieves the list of installed packages from the container func (sm *ServiceManager) getInstalledPackages(ctx context.Context, containerID string) ([]string, error) { var packages []string // Try pip list first pipOutput, err := sm.podman.ExecContainer(ctx, containerID, []string{"pip", "list", "--format=freeze"}) if err == nil && pipOutput != "" { pipPackages := sm.parsePipList(pipOutput) packages = append(packages, pipPackages...) } // Try conda list as well condaOutput, err := sm.podman.ExecContainer(ctx, containerID, []string{"conda", "list", "--export"}) if err == nil && condaOutput != "" { condaPackages := sm.parseCondaList(condaOutput) packages = append(packages, condaPackages...) } // Remove duplicates uniquePackages := make(map[string]bool) var result []string for _, pkg := range packages { if !uniquePackages[pkg] { uniquePackages[pkg] = true result = append(result, pkg) } } return result, nil } // parsePipList parses pip list --format=freeze output func (sm *ServiceManager) parsePipList(output string) []string { var packages []string lines := strings.Split(output, "\n") for _, line := range lines { line = strings.TrimSpace(line) if line != "" && !strings.HasPrefix(line, "#") { // Format: package==version parts := strings.Split(line, "==") if len(parts) > 0 { pkgName := strings.TrimSpace(parts[0]) if pkgName != "" { packages = append(packages, pkgName) } } } } return packages } func (sm *ServiceManager) serviceByName(name string) *JupyterService { name = strings.TrimSpace(name) if name == "" { return nil } for _, svc := range sm.services { if svc == nil { continue } if strings.EqualFold(strings.TrimSpace(svc.Name), name) { return svc } } return nil } func (sm *ServiceManager) listInstalledPackages(ctx context.Context, containerID string) ([]InstalledPackage, error) { var pkgs []InstalledPackage // pip pipJSON, err := sm.podman.ExecContainer(ctx, containerID, []string{"pip", "list", "--format=json"}) if err == nil { var parsed []struct { Name string `json:"name"` Version string `json:"version"` } if json.Unmarshal([]byte(pipJSON), &parsed) == nil { for _, p := range parsed { name := strings.TrimSpace(p.Name) if name == "" { continue } pkgs = append(pkgs, InstalledPackage{Name: name, Version: strings.TrimSpace(p.Version), Source: "pip"}) } } } // conda condaJSON, err := sm.podman.ExecContainer(ctx, containerID, []string{"conda", "list", "--json"}) if err == nil { var parsed []struct { Name string `json:"name"` Version string `json:"version"` } if json.Unmarshal([]byte(condaJSON), &parsed) == nil { for _, p := range parsed { name := strings.TrimSpace(p.Name) if name == "" { continue } pkgs = append(pkgs, InstalledPackage{Name: name, Version: strings.TrimSpace(p.Version), Source: "conda"}) } } } seen := make(map[string]bool) out := make([]InstalledPackage, 0, len(pkgs)) for _, p := range pkgs { key := strings.ToLower(strings.TrimSpace(p.Name)) + ":" + strings.ToLower(strings.TrimSpace(p.Source)) if seen[key] { continue } seen[key] = true out = append(out, p) } return out, nil } func (sm *ServiceManager) ListInstalledPackages(ctx context.Context, serviceName string) ([]InstalledPackage, error) { if sm == nil { return nil, fmt.Errorf("service manager is nil") } svc := sm.serviceByName(serviceName) if svc == nil { return nil, fmt.Errorf("service %s not found", strings.TrimSpace(serviceName)) } if strings.TrimSpace(svc.ContainerID) == "" { return nil, fmt.Errorf("service container not available") } return sm.listInstalledPackages(ctx, svc.ContainerID) } // parseCondaList parses conda list --export output func (sm *ServiceManager) parseCondaList(output string) []string { var packages []string lines := strings.Split(output, "\n") for _, line := range lines { line = strings.TrimSpace(line) if line != "" && !strings.HasPrefix(line, "#") { // Format: package=version=build parts := strings.Split(line, "=") if len(parts) > 0 { pkgName := strings.TrimSpace(parts[0]) if pkgName != "" { packages = append(packages, pkgName) } } } } return packages } // ParsePipList parses pip list --format=freeze output. func ParsePipList(output string) []string { sm := &ServiceManager{} return sm.parsePipList(output) } // ParseCondaList parses conda list --export output. func ParseCondaList(output string) []string { sm := &ServiceManager{} return sm.parseCondaList(output) } // PrepareContainerConfig builds the Podman container config for a start request. func PrepareContainerConfig(serviceID string, req *StartRequest) *container.ContainerConfig { sm := &ServiceManager{} return sm.prepareContainerConfig(serviceID, req) } // MoveWorkspaceToTrash moves a workspace directory to the configured trash directory. func MoveWorkspaceToTrash(workspacePath string, originalName string) (string, error) { sm := &ServiceManager{} trashPath, _, err := sm.moveWorkspaceToTrash(workspacePath, originalName) return trashPath, err } // RestoreWorkspace restores the most recently trashed workspace for the given name. func RestoreWorkspace(ctx context.Context, name string) (string, error) { sm := &ServiceManager{} return sm.RestoreWorkspace(ctx, name) } // StopService stops a Jupyter service func (sm *ServiceManager) StopService(ctx context.Context, serviceID string) error { service, exists := sm.services[serviceID] if !exists { return fmt.Errorf("service %s not found", serviceID) } // Stop container if err := sm.podman.StopContainer(ctx, service.ContainerID); err != nil { sm.logger.Warn("failed to stop container", "service_id", serviceID, "error", err) } // Remove container if err := sm.podman.RemoveContainer(ctx, service.ContainerID); err != nil { sm.logger.Warn("failed to remove container", "service_id", serviceID, "error", err) } // Update service status service.Status = "stopped" service.LastAccess = time.Now() // Remove from active services delete(sm.services, serviceID) // Save services to disk if err := sm.saveServices(); err != nil { sm.logger.Warn("failed to save services", "error", err) } sm.logger.Info("jupyter service stopped", "service_id", serviceID, "name", service.Name) return nil } // RemoveService removes a Jupyter service. If purge is false, it soft-deletes the workspace by moving it to trash. func (sm *ServiceManager) RemoveService(ctx context.Context, serviceID string, purge bool) error { service, exists := sm.services[serviceID] if !exists { return fmt.Errorf("service %s not found", serviceID) } // Stop container first if err := sm.podman.StopContainer(ctx, service.ContainerID); err != nil { sm.logger.Warn("failed to stop container before removal", "service_id", serviceID, "error", err) } // Remove container if err := sm.podman.RemoveContainer(ctx, service.ContainerID); err != nil { sm.logger.Warn("failed to remove container", "service_id", serviceID, "error", err) return fmt.Errorf("failed to remove container: %w", err) } // Best-effort: unlink workspace metadata. if sm.workspaceMetadataMgr != nil && strings.TrimSpace(service.Workspace) != "" { _ = sm.workspaceMetadataMgr.UnlinkWorkspace(service.Workspace) } // Workspace deletion policy. ws := strings.TrimSpace(service.Workspace) if ws != "" { wsPath, err := resolveWorkspacePath(ws) if err == nil { ws = wsPath } if purge { if err := os.RemoveAll(ws); err != nil && !os.IsNotExist(err) { return fmt.Errorf("failed to delete workspace: %w", err) } } else { dest, info, err := sm.moveWorkspaceToTrash(ws, strings.TrimSpace(service.Name)) if err != nil { return err } // Persist trash path for observability. if service.Metadata == nil { service.Metadata = make(map[string]string) } service.Metadata["trash_path"] = dest service.Metadata["purge_after"] = strconv.FormatInt(info.PurgeAfter.Unix(), 10) } } // Remove from active services delete(sm.services, serviceID) // Save services to disk if err := sm.saveServices(); err != nil { sm.logger.Warn("failed to save services", "error", err) } sm.logger.Info("jupyter service removed", "service_id", serviceID, "name", service.Name, "purge", purge) return nil } // GetService retrieves a service by ID func (sm *ServiceManager) GetService(serviceID string) (*JupyterService, error) { service, exists := sm.services[serviceID] if !exists { return nil, fmt.Errorf("service %s not found", serviceID) } // Update last access time service.LastAccess = time.Now() return service, nil } // ListServices returns all services func (sm *ServiceManager) ListServices() []*JupyterService { services := make([]*JupyterService, 0, len(sm.services)) for _, service := range sm.services { services = append(services, service) } return services } // GetServiceStatus returns the current status of a service func (sm *ServiceManager) GetServiceStatus(ctx context.Context, serviceID string) (string, error) { service, exists := sm.services[serviceID] if !exists { return "", fmt.Errorf("service %s not found", serviceID) } // Check container status status, err := sm.podman.GetContainerStatus(ctx, service.ContainerID) if err != nil { sm.logger.Warn("failed to get container status", "service_id", serviceID, "error", err) return "unknown", err } // Update service status if different if service.Status != status { service.Status = status service.LastAccess = time.Now() _ = sm.saveServices() } return status, nil } // validateStartRequest validates a start request func (sm *ServiceManager) validateStartRequest(req *StartRequest) error { if req.Name == "" { return fmt.Errorf("service name is required") } if req.Workspace == "" { req.Workspace = sm.config.DefaultWorkspace } // Resolve/normalize workspace path before comparing. wsPath, err := resolveWorkspacePath(req.Workspace) if err != nil { return err } req.Workspace = wsPath // Enforce reproducibility: do not allow creating a service/workspace with the same name // as any existing service (regardless of status). for _, svc := range sm.services { if svc == nil { continue } if strings.EqualFold(strings.TrimSpace(svc.Name), strings.TrimSpace(req.Name)) { return fmt.Errorf("a jupyter service/workspace named %q already exists", req.Name) } if svc.Workspace != "" && svc.Workspace == req.Workspace { return fmt.Errorf("workspace path %q is already in use by service %q", req.Workspace, svc.Name) } } // Ensure workspace directory exists. if err := os.MkdirAll(req.Workspace, 0o750); err != nil { return fmt.Errorf("failed to create workspace directory: %w", err) } if req.Image == "" { req.Image = sm.config.DefaultImage } if req.Network.HostPort == 0 { req.Network.HostPort = sm.config.DefaultPort } if req.Network.ContainerPort == 0 { req.Network.ContainerPort = 8888 } // Check for port conflicts for _, service := range sm.services { if service.Port == req.Network.HostPort && service.Status == serviceStatusRunning { return fmt.Errorf("port %d is already in use by service %s", req.Network.HostPort, service.Name) } } return nil } // generateServiceID generates a unique service ID func (sm *ServiceManager) generateServiceID(name string) string { timestamp := time.Now().Unix() sanitizedName := strings.ToLower(strings.ReplaceAll(name, " ", "-")) return fmt.Sprintf("jupyter-%s-%d", sanitizedName, timestamp) } // prepareContainerConfig prepares container configuration with secret support func (sm *ServiceManager) prepareContainerConfig( serviceID string, req *StartRequest, ) *container.ContainerConfig { imageLower := strings.ToLower(strings.TrimSpace(req.Image)) isPublicJupyter := strings.Contains(imageLower, "quay.io/jupyter/") || strings.Contains(imageLower, "jupyter/") // Prepare volume mounts volumes := map[string]string{} workspaceMount := "/workspace" if isPublicJupyter { workspaceMount = "/home/jovyan/work" } volumes[req.Workspace] = workspaceMount // Prepare environment variables (including sensitive ones) rawEnv := map[string]string{ "JUPYTER_ENABLE_LAB": "yes", } if req.Network.EnableToken && req.Network.Token != "" { rawEnv["JUPYTER_TOKEN"] = req.Network.Token } else { rawEnv["JUPYTER_TOKEN"] = "" // No token for development } if req.Network.EnablePassword && req.Network.Password != "" { rawEnv["JUPYTER_PASSWORD"] = req.Network.Password } // Add custom environment variables for k, v := range req.Environment { rawEnv[k] = v } // Sanitize environment - extract sensitive values as secrets sensitiveKeys := []string{"JUPYTER_TOKEN", "JUPYTER_PASSWORD", "SECRET", "PASSWORD", "API_KEY", "TOKEN"} secrets, cleanEnv := container.SanitizeContainerEnv(rawEnv, sensitiveKeys) // Prepare port mappings ports := map[int]int{ req.Network.HostPort: req.Network.ContainerPort, } // Prepare container command (uses cleanEnv variables) var cmd []string if isPublicJupyter { condaEnv := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_CONDA_ENV")) if condaEnv == "" { condaEnv = "base" } kernelName := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_KERNEL_NAME")) if kernelName == "" { kernelName = condaEnv } displayName := fmt.Sprintf("Python (%s)", kernelName) jupyterTokenArg := "" if !req.Network.EnableToken { jupyterTokenArg = " --NotebookApp.token= --ServerApp.token=" } script := fmt.Sprintf( "set -euo pipefail; "+ "if conda run -n %s python -c 'import ipykernel' >/dev/null 2>&1; then "+ "conda run -n %s python -m ipykernel install --user --name %s --display-name %q; "+ "fi; "+ "exec start-notebook.sh --ip=0.0.0.0 --port=%d --no-browser --notebook-dir=%s%s", condaEnv, condaEnv, kernelName, displayName, req.Network.ContainerPort, workspaceMount, jupyterTokenArg, ) cmd = []string{"bash", "-lc", script} } else { condaEnv := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_CONDA_ENV")) if condaEnv == "" { condaEnv = "ml_env" } kernelName := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_KERNEL_NAME")) if kernelName == "" { kernelName = condaEnv } displayName := fmt.Sprintf("Python (%s)", kernelName) jupyterTokenArg := "" if !req.Network.EnableToken { jupyterTokenArg = " --NotebookApp.token=" } script := fmt.Sprintf( "set -euo pipefail; "+ "if conda run -n %s python -c 'import ipykernel' >/dev/null 2>&1; then "+ "conda run -n %s python -m ipykernel install --user --name %s --display-name %q; "+ "fi; "+ "exec conda run -n %s jupyter notebook --no-browser --ip=0.0.0.0 --port=%d --NotebookApp.allow-root=True --NotebookApp.ip=0.0.0.0%s", condaEnv, condaEnv, kernelName, displayName, condaEnv, req.Network.ContainerPort, jupyterTokenArg, ) cmd = []string{"bash", "-lc", script} } // Prepare security options securityOpts := []string{} if req.Security.ReadOnlyRoot { securityOpts = append(securityOpts, "--read-only") } for _, cap := range req.Security.DropCapabilities { securityOpts = append(securityOpts, fmt.Sprintf("--cap-drop=%s", cap)) } return &container.ContainerConfig{ Name: serviceID, Image: req.Image, Command: cmd, Env: cleanEnv, Secrets: secrets, Volumes: volumes, Ports: ports, SecurityOpts: securityOpts, Resources: container.ResourceConfig{ MemoryLimit: req.Resources.MemoryLimit, CPULimit: req.Resources.CPULimit, GPUDevices: req.Resources.GPUDevices, }, Network: container.NetworkConfig{ AllowNetwork: req.Security.AllowNetwork, }, } } // waitForJupyterReady waits for Jupyter to be ready and returns the URL func (sm *ServiceManager) waitForJupyterReady( ctx context.Context, containerID string, networkConfig NetworkConfig, ) (string, error) { // Wait for container to be running maxWait := 60 * time.Second interval := 2 * time.Second deadline := time.Now().Add(maxWait) for time.Now().Before(deadline) { status, err := sm.podman.GetContainerStateStatus(ctx, containerID) if err != nil { return "", fmt.Errorf("failed to check container status: %w", err) } if status == serviceStatusRunning { break } if status == "exited" || status == "dead" { return "", fmt.Errorf("container failed to start (status: %s)", status) } select { case <-ctx.Done(): return "", ctx.Err() case <-time.After(interval): } } // Wait a bit more for Jupyter to initialize time.Sleep(5 * time.Second) // Construct URL url := fmt.Sprintf("http://localhost:%d", networkConfig.HostPort) if networkConfig.EnableToken && networkConfig.Token != "" { url += fmt.Sprintf("?token=%s", networkConfig.Token) } return url, nil } // loadServices loads existing services from disk using PathRegistry func (sm *ServiceManager) loadServices() error { paths := config.FromEnv() servicesFile := paths.JupyterServicesFile() if err := paths.EnsureDir(paths.JupyterStateDir()); err != nil { return fmt.Errorf("failed to create jupyter state directory: %w", err) } data, err := os.ReadFile(servicesFile) if err != nil { if os.IsNotExist(err) { return nil // No existing services } return err } var services map[string]*JupyterService if err := json.Unmarshal(data, &services); err != nil { return err } // Reset in-memory map before re-hydrating from disk. sm.services = make(map[string]*JupyterService) // Normalize service status + de-dupe by service name. // Prefer: running service > newest CreatedAt. byName := make(map[string]*JupyterService) for id, service := range services { if service == nil { continue } // Ensure stable ID even if old payloads didn't persist it. if strings.TrimSpace(service.ID) == "" { service.ID = id } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) status, err := sm.podman.GetContainerStateStatus(ctx, service.ContainerID) cancel() if err != nil { sm.logger.Warn("failed to check service status", "service_id", id, "error", err) service.Status = "stopped" } else if status == serviceStatusRunning { service.Status = serviceStatusRunning } else { service.Status = "stopped" } nameKey := strings.ToLower(strings.TrimSpace(service.Name)) if nameKey == "" { nameKey = service.ID } if existing, ok := byName[nameKey]; ok { // prefer running if existing.Status != serviceStatusRunning && service.Status == serviceStatusRunning { byName[nameKey] = service continue } if existing.Status == serviceStatusRunning && service.Status != serviceStatusRunning { continue } // otherwise prefer newest if service.CreatedAt.After(existing.CreatedAt) { byName[nameKey] = service } continue } byName[nameKey] = service } for _, service := range byName { if service == nil { continue } if strings.TrimSpace(service.ID) == "" { continue } sm.services[service.ID] = service } // Best-effort: persist the cleaned registry to avoid accumulating duplicates. _ = sm.saveServices() return nil } // saveServices saves services to disk using PathRegistry func (sm *ServiceManager) saveServices() error { paths := config.FromEnv() servicesFile := paths.JupyterServicesFile() if err := paths.EnsureDir(paths.JupyterStateDir()); err != nil { return fmt.Errorf("failed to create jupyter state directory: %w", err) } data, err := json.MarshalIndent(sm.services, "", " ") if err != nil { return err } return os.WriteFile(servicesFile, data, 0600) } // LinkWorkspaceWithExperiment links a workspace with an experiment func (sm *ServiceManager) LinkWorkspaceWithExperiment( workspacePath, experimentID, serviceID string, ) error { return sm.workspaceMetadataMgr.LinkWorkspace(workspacePath, experimentID, serviceID) } // GetWorkspaceMetadata retrieves metadata for a workspace func (sm *ServiceManager) GetWorkspaceMetadata(workspacePath string) (*WorkspaceMetadata, error) { return sm.workspaceMetadataMgr.GetWorkspaceMetadata(workspacePath) } // SyncWorkspaceWithExperiment synchronizes a workspace with an experiment func (sm *ServiceManager) SyncWorkspaceWithExperiment( _ context.Context, workspacePath, experimentID, direction string, ) error { // Update sync time in metadata if err := sm.workspaceMetadataMgr.UpdateSyncTime(workspacePath, direction); err != nil { sm.logger.Warn("failed to update sync time", "error", err) } // In a real implementation, this would perform actual synchronization: // - For "pull": Download experiment data/metrics to workspace // - For "push": Upload workspace notebooks/results to experiment sm.logger.Info("workspace sync completed", "workspace", workspacePath, "experiment_id", experimentID, "direction", direction) return nil } // ListLinkedWorkspaces returns all linked workspaces func (sm *ServiceManager) ListLinkedWorkspaces() []*WorkspaceMetadata { return sm.workspaceMetadataMgr.ListLinkedWorkspaces() } // GetWorkspacesForExperiment returns all workspaces linked to an experiment func (sm *ServiceManager) GetWorkspacesForExperiment(experimentID string) []*WorkspaceMetadata { return sm.workspaceMetadataMgr.GetWorkspacesForExperiment(experimentID) } // UnlinkWorkspace removes the link between workspace and experiment func (sm *ServiceManager) UnlinkWorkspace(workspacePath string) error { return sm.workspaceMetadataMgr.UnlinkWorkspace(workspacePath) } // ClearAllMetadata clears all workspace metadata (used for test isolation) func (sm *ServiceManager) ClearAllMetadata() error { return sm.workspaceMetadataMgr.ClearAllMetadata() } // SetAutoSync enables or disables auto-sync for a workspace func (sm *ServiceManager) SetAutoSync( workspacePath string, enabled bool, interval time.Duration, ) error { return sm.workspaceMetadataMgr.SetAutoSync(workspacePath, enabled, interval) } // AddTag adds a tag to workspace metadata func (sm *ServiceManager) AddTag(workspacePath, tag string) error { return sm.workspaceMetadataMgr.AddTag(workspacePath, tag) } // Close cleans up the service manager func (sm *ServiceManager) Close(ctx context.Context) error { // Stop all running services for _, service := range sm.services { if service.Status == serviceStatusRunning { if err := sm.StopService(ctx, service.ID); err != nil { sm.logger.Warn("failed to stop service during cleanup", "service_id", service.ID, "error", err) } } } return nil }