Update Jupyter integration for security and scheduler support: - Enhanced security configuration with audit logging - Health monitoring with scheduler event integration - Package manager with network policy enforcement - Service manager with lifecycle hooks - Network manager with tenant isolation - Workspace metadata with tenant tags - Config with resource limits - Podman container integration improvements - Experiment manager with tracking integration - Manifest runner with security checks
493 lines
15 KiB
Go
493 lines
15 KiB
Go
// Package jupyter provides Jupyter notebook service management and configuration
|
|
package jupyter
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/fileutil"
|
|
"github.com/jfraeys/fetch_ml/internal/logging"
|
|
)
|
|
|
|
var defaultBlockedPackages = []string{}
|
|
|
|
func DefaultBlockedPackages() []string {
|
|
return append([]string{}, defaultBlockedPackages...)
|
|
}
|
|
|
|
func DefaultEnhancedSecurityConfigFromEnv() *EnhancedSecurityConfig {
|
|
securityConfig := GetDefaultSecurityConfig()
|
|
|
|
if blockedPkgs := os.Getenv("FETCHML_JUPYTER_BLOCKED_PACKAGES"); blockedPkgs != "" {
|
|
securityConfig.BlockedPackages = strings.Split(strings.TrimSpace(blockedPkgs), ",")
|
|
for i, pkg := range securityConfig.BlockedPackages {
|
|
securityConfig.BlockedPackages[i] = strings.TrimSpace(pkg)
|
|
}
|
|
}
|
|
|
|
if allowedPkgs := os.Getenv("FETCHML_JUPYTER_ALLOWED_PACKAGES"); allowedPkgs != "" {
|
|
securityConfig.AllowedPackages = make(map[string]bool)
|
|
allowed := strings.Split(strings.TrimSpace(allowedPkgs), ",")
|
|
for _, pkg := range allowed {
|
|
securityConfig.AllowedPackages[strings.TrimSpace(pkg)] = true
|
|
}
|
|
}
|
|
|
|
return securityConfig
|
|
}
|
|
|
|
func envDefaultImage() string {
|
|
if v := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_DEFAULT_IMAGE")); v != "" {
|
|
return v
|
|
}
|
|
return "localhost/ml-tools-runner:latest"
|
|
}
|
|
|
|
// ConfigManager manages Jupyter service configuration
|
|
type ConfigManager struct {
|
|
logger *logging.Logger
|
|
configPath string
|
|
config *JupyterConfig
|
|
environment string
|
|
}
|
|
|
|
// JupyterConfig holds the complete Jupyter configuration
|
|
type JupyterConfig struct {
|
|
Version string `json:"version"`
|
|
Environment string `json:"environment"`
|
|
Logging LoggingConfig `json:"logging"`
|
|
Resources ResourceConfig `json:"resources"`
|
|
Security SecurityConfig `json:"security"`
|
|
Workspace WorkspaceConfig `json:"workspace"`
|
|
Network NetworkConfig `json:"network"`
|
|
DefaultSettings DefaultSettingsConfig `json:"default_settings"`
|
|
Service ServiceConfig `json:"service"`
|
|
AdvancedSettings AdvancedSettingsConfig `json:"advanced_settings"`
|
|
Health HealthConfig `json:"health"`
|
|
}
|
|
|
|
// WorkspaceConfig defines workspace configuration
|
|
type WorkspaceConfig struct {
|
|
MountOptions map[string]string `json:"mount_options"`
|
|
DefaultPath string `json:"default_path"`
|
|
MaxWorkspaceSize string `json:"max_workspace_size"`
|
|
AllowedPaths []string `json:"allowed_paths"`
|
|
DeniedPaths []string `json:"denied_paths"`
|
|
AutoCreate bool `json:"auto_create"`
|
|
}
|
|
|
|
// HealthConfig defines health monitoring configuration
|
|
type HealthConfig struct {
|
|
CheckInterval time.Duration `json:"check_interval"`
|
|
Timeout time.Duration `json:"timeout"`
|
|
RetryAttempts int `json:"retry_attempts"`
|
|
MaxServiceAge time.Duration `json:"max_service_age"`
|
|
Enabled bool `json:"enabled"`
|
|
AutoCleanup bool `json:"auto_cleanup"`
|
|
MetricsEnabled bool `json:"metrics_enabled"`
|
|
}
|
|
|
|
// LoggingConfig defines logging configuration
|
|
type LoggingConfig struct {
|
|
Level string `json:"level"`
|
|
Format string `json:"format"`
|
|
Output string `json:"output"`
|
|
MaxSize string `json:"max_size"`
|
|
MaxAge string `json:"max_age"`
|
|
MaxBackups int `json:"max_backups"`
|
|
}
|
|
|
|
// DefaultSettingsConfig defines default settings for new services
|
|
type DefaultSettingsConfig struct {
|
|
Environment map[string]string `json:"environment"`
|
|
Image string `json:"default_image"`
|
|
Workspace string `json:"default_workspace"`
|
|
ShutdownPolicy string `json:"shutdown_policy"`
|
|
Port int `json:"default_port"`
|
|
StopTimeout time.Duration `json:"stop_timeout"`
|
|
AutoStart bool `json:"auto_start"`
|
|
AutoStop bool `json:"auto_stop"`
|
|
}
|
|
|
|
// AdvancedSettingsConfig defines advanced configuration options
|
|
type AdvancedSettingsConfig struct {
|
|
ExperimentalFeatures []string `json:"experimental_features"`
|
|
MaxConcurrentServices int `json:"max_concurrent_services"`
|
|
ServiceTimeout time.Duration `json:"service_timeout"`
|
|
StartupTimeout time.Duration `json:"startup_timeout"`
|
|
GracefulShutdown bool `json:"graceful_shutdown"`
|
|
ForceCleanup bool `json:"force_cleanup"`
|
|
DebugMode bool `json:"debug_mode"`
|
|
}
|
|
|
|
// NewConfigManager creates a new configuration manager
|
|
func NewConfigManager(
|
|
logger *logging.Logger,
|
|
configPath string,
|
|
environment string,
|
|
) (*ConfigManager, error) {
|
|
cm := &ConfigManager{
|
|
logger: logger,
|
|
configPath: configPath,
|
|
environment: environment,
|
|
}
|
|
|
|
// Load configuration
|
|
if err := cm.LoadConfig(); err != nil {
|
|
return nil, fmt.Errorf("failed to load configuration: %w", err)
|
|
}
|
|
|
|
return cm, nil
|
|
}
|
|
|
|
// LoadConfig loads configuration from file
|
|
func (cm *ConfigManager) LoadConfig() error {
|
|
// Check if config file exists
|
|
if _, err := os.Stat(cm.configPath); os.IsNotExist(err) {
|
|
cm.logger.Info("configuration file not found, creating default", "path", cm.configPath)
|
|
cm.config = cm.getDefaultConfig()
|
|
return cm.SaveConfig()
|
|
}
|
|
|
|
// Read configuration file
|
|
data, err := os.ReadFile(cm.configPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to read config file: %w", err)
|
|
}
|
|
|
|
// Parse configuration
|
|
var config JupyterConfig
|
|
if err := json.Unmarshal(data, &config); err != nil {
|
|
return fmt.Errorf("failed to parse config file: %w", err)
|
|
}
|
|
|
|
// Apply environment-specific overrides
|
|
cm.applyEnvironmentOverrides(&config)
|
|
|
|
// Validate configuration
|
|
if err := cm.validateConfig(&config); err != nil {
|
|
return fmt.Errorf("invalid configuration: %w", err)
|
|
}
|
|
|
|
cm.config = &config
|
|
cm.logger.Info("configuration loaded successfully", "environment", cm.environment)
|
|
return nil
|
|
}
|
|
|
|
// SaveConfig saves configuration to file
|
|
func (cm *ConfigManager) SaveConfig() error {
|
|
// Ensure directory exists
|
|
if err := os.MkdirAll(filepath.Dir(cm.configPath), 0750); err != nil {
|
|
return fmt.Errorf("failed to create config directory: %w", err)
|
|
}
|
|
|
|
// Marshal configuration
|
|
data, err := json.MarshalIndent(cm.config, "", " ")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal config: %w", err)
|
|
}
|
|
|
|
// Write configuration file with crash safety (fsync)
|
|
if err := fileutil.WriteFileSafe(cm.configPath, data, 0600); err != nil {
|
|
return fmt.Errorf("failed to write config file: %w", err)
|
|
}
|
|
|
|
cm.logger.Info("configuration saved successfully", "path", cm.configPath)
|
|
return nil
|
|
}
|
|
|
|
// GetConfig returns the current configuration
|
|
func (cm *ConfigManager) GetConfig() *JupyterConfig {
|
|
return cm.config
|
|
}
|
|
|
|
// UpdateConfig updates the configuration
|
|
func (cm *ConfigManager) UpdateConfig(config *JupyterConfig) error {
|
|
// Validate new configuration
|
|
if err := cm.validateConfig(config); err != nil {
|
|
return fmt.Errorf("invalid configuration: %w", err)
|
|
}
|
|
|
|
cm.config = config
|
|
return cm.SaveConfig()
|
|
}
|
|
|
|
// GetServiceConfig returns the service configuration
|
|
func (cm *ConfigManager) GetServiceConfig() *ServiceConfig {
|
|
return &cm.config.Service
|
|
}
|
|
|
|
// GetNetworkConfig returns the network configuration
|
|
func (cm *ConfigManager) GetNetworkConfig() *NetworkConfig {
|
|
return &cm.config.Network
|
|
}
|
|
|
|
// GetWorkspaceConfig returns the workspace configuration
|
|
func (cm *ConfigManager) GetWorkspaceConfig() *WorkspaceConfig {
|
|
return &cm.config.Workspace
|
|
}
|
|
|
|
// GetSecurityConfig returns the security configuration
|
|
func (cm *ConfigManager) GetSecurityConfig() *SecurityConfig {
|
|
return &cm.config.Security
|
|
}
|
|
|
|
// GetResourcesConfig returns the resources configuration
|
|
func (cm *ConfigManager) GetResourcesConfig() *ResourceConfig {
|
|
return &cm.config.Resources
|
|
}
|
|
|
|
// GetHealthConfig returns the health configuration
|
|
func (cm *ConfigManager) GetHealthConfig() *HealthConfig {
|
|
return &cm.config.Health
|
|
}
|
|
|
|
// getDefaultConfig returns the default configuration
|
|
func (cm *ConfigManager) getDefaultConfig() *JupyterConfig {
|
|
return &JupyterConfig{
|
|
Version: "1.0.0",
|
|
Environment: cm.environment,
|
|
Service: ServiceConfig{
|
|
DefaultImage: envDefaultImage(),
|
|
DefaultPort: 8888,
|
|
DefaultWorkspace: "./workspace",
|
|
MaxServices: 5,
|
|
DefaultResources: ResourceConfig{
|
|
MemoryLimit: "8G",
|
|
CPULimit: "2",
|
|
GPUDevices: nil,
|
|
},
|
|
SecuritySettings: SecurityConfig{
|
|
AllowNetwork: false,
|
|
BlockedPackages: defaultBlockedPackages,
|
|
ReadOnlyRoot: false,
|
|
DropCapabilities: []string{"ALL"},
|
|
},
|
|
NetworkConfig: NetworkConfig{
|
|
HostPort: 8888,
|
|
ContainerPort: 8888,
|
|
BindAddress: "127.0.0.1",
|
|
EnableToken: false,
|
|
Token: "",
|
|
EnablePassword: false,
|
|
Password: "",
|
|
AllowRemote: false,
|
|
NetworkName: "jupyter-network",
|
|
},
|
|
},
|
|
Workspace: WorkspaceConfig{
|
|
DefaultPath: "./workspace",
|
|
AutoCreate: true,
|
|
MountOptions: map[string]string{"Z": ""},
|
|
AllowedPaths: []string{},
|
|
DeniedPaths: []string{"/etc", "/usr/bin", "/bin"},
|
|
MaxWorkspaceSize: "10G",
|
|
},
|
|
Network: NetworkConfig{
|
|
HostPort: 8888,
|
|
ContainerPort: 8888,
|
|
BindAddress: "127.0.0.1",
|
|
EnableToken: false,
|
|
Token: "",
|
|
EnablePassword: false,
|
|
Password: "",
|
|
AllowRemote: false,
|
|
NetworkName: "jupyter-network",
|
|
},
|
|
Security: SecurityConfig{
|
|
AllowNetwork: false,
|
|
BlockedPackages: defaultBlockedPackages,
|
|
ReadOnlyRoot: false,
|
|
DropCapabilities: []string{"ALL"},
|
|
},
|
|
Resources: ResourceConfig{
|
|
MemoryLimit: "8G",
|
|
CPULimit: "2",
|
|
GPUDevices: nil,
|
|
},
|
|
Health: HealthConfig{
|
|
Enabled: true,
|
|
CheckInterval: 30 * time.Second,
|
|
Timeout: 10 * time.Second,
|
|
RetryAttempts: 3,
|
|
MaxServiceAge: 24 * time.Hour,
|
|
AutoCleanup: true,
|
|
MetricsEnabled: true,
|
|
},
|
|
Logging: LoggingConfig{
|
|
Level: "info",
|
|
Format: "json",
|
|
Output: "stdout",
|
|
MaxSize: "100M",
|
|
MaxBackups: 3,
|
|
MaxAge: "7d",
|
|
},
|
|
DefaultSettings: DefaultSettingsConfig{
|
|
Image: envDefaultImage(),
|
|
Port: 8888,
|
|
Workspace: "./workspace",
|
|
Environment: map[string]string{"JUPYTER_ENABLE_LAB": "yes"},
|
|
AutoStart: false,
|
|
AutoStop: false,
|
|
StopTimeout: 30 * time.Second,
|
|
ShutdownPolicy: "graceful",
|
|
},
|
|
AdvancedSettings: AdvancedSettingsConfig{
|
|
MaxConcurrentServices: 10,
|
|
ServiceTimeout: 5 * time.Minute,
|
|
StartupTimeout: 2 * time.Minute,
|
|
GracefulShutdown: true,
|
|
ForceCleanup: false,
|
|
DebugMode: false,
|
|
ExperimentalFeatures: []string{},
|
|
},
|
|
}
|
|
}
|
|
|
|
// GetDefaultServiceConfig returns the default Jupyter service configuration.
|
|
func GetDefaultServiceConfig() *ServiceConfig {
|
|
cm := &ConfigManager{environment: ""}
|
|
cfg := cm.getDefaultConfig()
|
|
return &cfg.Service
|
|
}
|
|
|
|
// applyEnvironmentOverrides applies environment-specific configuration overrides
|
|
func (cm *ConfigManager) applyEnvironmentOverrides(config *JupyterConfig) {
|
|
switch cm.environment {
|
|
case "development":
|
|
config.Service.MaxServices = 10
|
|
config.Security.AllowNetwork = true
|
|
config.Health.CheckInterval = 10 * time.Second
|
|
config.AdvancedSettings.DebugMode = true
|
|
case "production":
|
|
config.Service.MaxServices = 3
|
|
config.Security.AllowNetwork = false
|
|
config.Health.CheckInterval = 60 * time.Second
|
|
config.AdvancedSettings.DebugMode = false
|
|
config.Logging.Level = "warn"
|
|
case "testing":
|
|
config.Service.MaxServices = 1
|
|
config.Health.Enabled = false
|
|
config.AdvancedSettings.DebugMode = true
|
|
}
|
|
}
|
|
|
|
// validateConfig validates the configuration
|
|
func (cm *ConfigManager) validateConfig(config *JupyterConfig) error {
|
|
// Validate service configuration
|
|
if config.Service.DefaultPort <= 0 || config.Service.DefaultPort > 65535 {
|
|
return fmt.Errorf("invalid default port: %d", config.Service.DefaultPort)
|
|
}
|
|
if config.Service.MaxServices <= 0 {
|
|
return fmt.Errorf("max services must be positive")
|
|
}
|
|
if config.Service.DefaultImage == "" {
|
|
return fmt.Errorf("default image cannot be empty")
|
|
}
|
|
|
|
// Validate network configuration
|
|
if config.Network.HostPort <= 0 || config.Network.HostPort > 65535 {
|
|
return fmt.Errorf("invalid host port: %d", config.Network.HostPort)
|
|
}
|
|
if config.Network.ContainerPort <= 0 || config.Network.ContainerPort > 65535 {
|
|
return fmt.Errorf("invalid container port: %d", config.Network.ContainerPort)
|
|
}
|
|
|
|
// Validate workspace configuration
|
|
if config.Workspace.DefaultPath == "" {
|
|
return fmt.Errorf("default workspace path cannot be empty")
|
|
}
|
|
|
|
// Validate resources configuration
|
|
if config.Resources.MemoryLimit == "" {
|
|
return fmt.Errorf("memory limit cannot be empty")
|
|
}
|
|
if config.Resources.CPULimit == "" {
|
|
return fmt.Errorf("CPU limit cannot be empty")
|
|
}
|
|
|
|
// Validate health configuration
|
|
if config.Health.Enabled {
|
|
if config.Health.CheckInterval <= 0 {
|
|
return fmt.Errorf("health check interval must be positive")
|
|
}
|
|
if config.Health.Timeout <= 0 {
|
|
return fmt.Errorf("health check timeout must be positive")
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetEnvironment updates the environment and reloads configuration
|
|
func (cm *ConfigManager) SetEnvironment(environment string) error {
|
|
cm.environment = environment
|
|
return cm.LoadConfig()
|
|
}
|
|
|
|
// GetEnvironment returns the current environment
|
|
func (cm *ConfigManager) GetEnvironment() string {
|
|
return cm.environment
|
|
}
|
|
|
|
// ExportConfig exports the configuration to JSON
|
|
func (cm *ConfigManager) ExportConfig() ([]byte, error) {
|
|
return json.MarshalIndent(cm.config, "", " ")
|
|
}
|
|
|
|
// ImportConfig imports configuration from JSON
|
|
func (cm *ConfigManager) ImportConfig(data []byte) error {
|
|
var config JupyterConfig
|
|
if err := json.Unmarshal(data, &config); err != nil {
|
|
return fmt.Errorf("failed to parse configuration: %w", err)
|
|
}
|
|
|
|
return cm.UpdateConfig(&config)
|
|
}
|
|
|
|
// ResetToDefaults resets configuration to defaults
|
|
func (cm *ConfigManager) ResetToDefaults() error {
|
|
cm.config = cm.getDefaultConfig()
|
|
return cm.SaveConfig()
|
|
}
|
|
|
|
// ValidateWorkspacePath checks if a workspace path is allowed
|
|
func (cm *ConfigManager) ValidateWorkspacePath(path string) error {
|
|
// Check denied paths
|
|
for _, denied := range cm.config.Workspace.DeniedPaths {
|
|
if strings.HasPrefix(filepath.Clean(path), filepath.Clean(denied)) {
|
|
return fmt.Errorf("workspace path %s is in denied path %s", path, denied)
|
|
}
|
|
}
|
|
|
|
// Check allowed paths (if specified)
|
|
if len(cm.config.Workspace.AllowedPaths) > 0 {
|
|
allowed := false
|
|
for _, allowedPath := range cm.config.Workspace.AllowedPaths {
|
|
if strings.HasPrefix(filepath.Clean(path), filepath.Clean(allowedPath)) {
|
|
allowed = true
|
|
break
|
|
}
|
|
}
|
|
if !allowed {
|
|
return fmt.Errorf("workspace path %s is not in allowed paths", path)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetEffectiveConfig returns the effective configuration after all overrides
|
|
func (cm *ConfigManager) GetEffectiveConfig() *JupyterConfig {
|
|
// Create a copy of the config
|
|
config := *cm.config
|
|
|
|
// Apply any runtime overrides
|
|
// This could include environment variables, command line flags, etc.
|
|
|
|
return &config
|
|
}
|