fetch_ml/internal/jupyter/config.go
Jeremie Fraeys 6b2c377680
refactor(jupyter): enhance security and scheduler integration
Update Jupyter integration for security and scheduler support:
- Enhanced security configuration with audit logging
- Health monitoring with scheduler event integration
- Package manager with network policy enforcement
- Service manager with lifecycle hooks
- Network manager with tenant isolation
- Workspace metadata with tenant tags
- Config with resource limits
- Podman container integration improvements
- Experiment manager with tracking integration
- Manifest runner with security checks
2026-02-26 12:06:35 -05:00

493 lines
15 KiB
Go

// Package jupyter provides Jupyter notebook service management and configuration
package jupyter
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/jfraeys/fetch_ml/internal/fileutil"
"github.com/jfraeys/fetch_ml/internal/logging"
)
var defaultBlockedPackages = []string{}
func DefaultBlockedPackages() []string {
return append([]string{}, defaultBlockedPackages...)
}
func DefaultEnhancedSecurityConfigFromEnv() *EnhancedSecurityConfig {
securityConfig := GetDefaultSecurityConfig()
if blockedPkgs := os.Getenv("FETCHML_JUPYTER_BLOCKED_PACKAGES"); blockedPkgs != "" {
securityConfig.BlockedPackages = strings.Split(strings.TrimSpace(blockedPkgs), ",")
for i, pkg := range securityConfig.BlockedPackages {
securityConfig.BlockedPackages[i] = strings.TrimSpace(pkg)
}
}
if allowedPkgs := os.Getenv("FETCHML_JUPYTER_ALLOWED_PACKAGES"); allowedPkgs != "" {
securityConfig.AllowedPackages = make(map[string]bool)
allowed := strings.Split(strings.TrimSpace(allowedPkgs), ",")
for _, pkg := range allowed {
securityConfig.AllowedPackages[strings.TrimSpace(pkg)] = true
}
}
return securityConfig
}
func envDefaultImage() string {
if v := strings.TrimSpace(os.Getenv("FETCHML_JUPYTER_DEFAULT_IMAGE")); v != "" {
return v
}
return "localhost/ml-tools-runner:latest"
}
// ConfigManager manages Jupyter service configuration
type ConfigManager struct {
logger *logging.Logger
configPath string
config *JupyterConfig
environment string
}
// JupyterConfig holds the complete Jupyter configuration
type JupyterConfig struct {
Version string `json:"version"`
Environment string `json:"environment"`
Logging LoggingConfig `json:"logging"`
Resources ResourceConfig `json:"resources"`
Security SecurityConfig `json:"security"`
Workspace WorkspaceConfig `json:"workspace"`
Network NetworkConfig `json:"network"`
DefaultSettings DefaultSettingsConfig `json:"default_settings"`
Service ServiceConfig `json:"service"`
AdvancedSettings AdvancedSettingsConfig `json:"advanced_settings"`
Health HealthConfig `json:"health"`
}
// WorkspaceConfig defines workspace configuration
type WorkspaceConfig struct {
MountOptions map[string]string `json:"mount_options"`
DefaultPath string `json:"default_path"`
MaxWorkspaceSize string `json:"max_workspace_size"`
AllowedPaths []string `json:"allowed_paths"`
DeniedPaths []string `json:"denied_paths"`
AutoCreate bool `json:"auto_create"`
}
// HealthConfig defines health monitoring configuration
type HealthConfig struct {
CheckInterval time.Duration `json:"check_interval"`
Timeout time.Duration `json:"timeout"`
RetryAttempts int `json:"retry_attempts"`
MaxServiceAge time.Duration `json:"max_service_age"`
Enabled bool `json:"enabled"`
AutoCleanup bool `json:"auto_cleanup"`
MetricsEnabled bool `json:"metrics_enabled"`
}
// LoggingConfig defines logging configuration
type LoggingConfig struct {
Level string `json:"level"`
Format string `json:"format"`
Output string `json:"output"`
MaxSize string `json:"max_size"`
MaxAge string `json:"max_age"`
MaxBackups int `json:"max_backups"`
}
// DefaultSettingsConfig defines default settings for new services
type DefaultSettingsConfig struct {
Environment map[string]string `json:"environment"`
Image string `json:"default_image"`
Workspace string `json:"default_workspace"`
ShutdownPolicy string `json:"shutdown_policy"`
Port int `json:"default_port"`
StopTimeout time.Duration `json:"stop_timeout"`
AutoStart bool `json:"auto_start"`
AutoStop bool `json:"auto_stop"`
}
// AdvancedSettingsConfig defines advanced configuration options
type AdvancedSettingsConfig struct {
ExperimentalFeatures []string `json:"experimental_features"`
MaxConcurrentServices int `json:"max_concurrent_services"`
ServiceTimeout time.Duration `json:"service_timeout"`
StartupTimeout time.Duration `json:"startup_timeout"`
GracefulShutdown bool `json:"graceful_shutdown"`
ForceCleanup bool `json:"force_cleanup"`
DebugMode bool `json:"debug_mode"`
}
// NewConfigManager creates a new configuration manager
func NewConfigManager(
logger *logging.Logger,
configPath string,
environment string,
) (*ConfigManager, error) {
cm := &ConfigManager{
logger: logger,
configPath: configPath,
environment: environment,
}
// Load configuration
if err := cm.LoadConfig(); err != nil {
return nil, fmt.Errorf("failed to load configuration: %w", err)
}
return cm, nil
}
// LoadConfig loads configuration from file
func (cm *ConfigManager) LoadConfig() error {
// Check if config file exists
if _, err := os.Stat(cm.configPath); os.IsNotExist(err) {
cm.logger.Info("configuration file not found, creating default", "path", cm.configPath)
cm.config = cm.getDefaultConfig()
return cm.SaveConfig()
}
// Read configuration file
data, err := os.ReadFile(cm.configPath)
if err != nil {
return fmt.Errorf("failed to read config file: %w", err)
}
// Parse configuration
var config JupyterConfig
if err := json.Unmarshal(data, &config); err != nil {
return fmt.Errorf("failed to parse config file: %w", err)
}
// Apply environment-specific overrides
cm.applyEnvironmentOverrides(&config)
// Validate configuration
if err := cm.validateConfig(&config); err != nil {
return fmt.Errorf("invalid configuration: %w", err)
}
cm.config = &config
cm.logger.Info("configuration loaded successfully", "environment", cm.environment)
return nil
}
// SaveConfig saves configuration to file
func (cm *ConfigManager) SaveConfig() error {
// Ensure directory exists
if err := os.MkdirAll(filepath.Dir(cm.configPath), 0750); err != nil {
return fmt.Errorf("failed to create config directory: %w", err)
}
// Marshal configuration
data, err := json.MarshalIndent(cm.config, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal config: %w", err)
}
// Write configuration file with crash safety (fsync)
if err := fileutil.WriteFileSafe(cm.configPath, data, 0600); err != nil {
return fmt.Errorf("failed to write config file: %w", err)
}
cm.logger.Info("configuration saved successfully", "path", cm.configPath)
return nil
}
// GetConfig returns the current configuration
func (cm *ConfigManager) GetConfig() *JupyterConfig {
return cm.config
}
// UpdateConfig updates the configuration
func (cm *ConfigManager) UpdateConfig(config *JupyterConfig) error {
// Validate new configuration
if err := cm.validateConfig(config); err != nil {
return fmt.Errorf("invalid configuration: %w", err)
}
cm.config = config
return cm.SaveConfig()
}
// GetServiceConfig returns the service configuration
func (cm *ConfigManager) GetServiceConfig() *ServiceConfig {
return &cm.config.Service
}
// GetNetworkConfig returns the network configuration
func (cm *ConfigManager) GetNetworkConfig() *NetworkConfig {
return &cm.config.Network
}
// GetWorkspaceConfig returns the workspace configuration
func (cm *ConfigManager) GetWorkspaceConfig() *WorkspaceConfig {
return &cm.config.Workspace
}
// GetSecurityConfig returns the security configuration
func (cm *ConfigManager) GetSecurityConfig() *SecurityConfig {
return &cm.config.Security
}
// GetResourcesConfig returns the resources configuration
func (cm *ConfigManager) GetResourcesConfig() *ResourceConfig {
return &cm.config.Resources
}
// GetHealthConfig returns the health configuration
func (cm *ConfigManager) GetHealthConfig() *HealthConfig {
return &cm.config.Health
}
// getDefaultConfig returns the default configuration
func (cm *ConfigManager) getDefaultConfig() *JupyterConfig {
return &JupyterConfig{
Version: "1.0.0",
Environment: cm.environment,
Service: ServiceConfig{
DefaultImage: envDefaultImage(),
DefaultPort: 8888,
DefaultWorkspace: "./workspace",
MaxServices: 5,
DefaultResources: ResourceConfig{
MemoryLimit: "8G",
CPULimit: "2",
GPUDevices: nil,
},
SecuritySettings: SecurityConfig{
AllowNetwork: false,
BlockedPackages: defaultBlockedPackages,
ReadOnlyRoot: false,
DropCapabilities: []string{"ALL"},
},
NetworkConfig: NetworkConfig{
HostPort: 8888,
ContainerPort: 8888,
BindAddress: "127.0.0.1",
EnableToken: false,
Token: "",
EnablePassword: false,
Password: "",
AllowRemote: false,
NetworkName: "jupyter-network",
},
},
Workspace: WorkspaceConfig{
DefaultPath: "./workspace",
AutoCreate: true,
MountOptions: map[string]string{"Z": ""},
AllowedPaths: []string{},
DeniedPaths: []string{"/etc", "/usr/bin", "/bin"},
MaxWorkspaceSize: "10G",
},
Network: NetworkConfig{
HostPort: 8888,
ContainerPort: 8888,
BindAddress: "127.0.0.1",
EnableToken: false,
Token: "",
EnablePassword: false,
Password: "",
AllowRemote: false,
NetworkName: "jupyter-network",
},
Security: SecurityConfig{
AllowNetwork: false,
BlockedPackages: defaultBlockedPackages,
ReadOnlyRoot: false,
DropCapabilities: []string{"ALL"},
},
Resources: ResourceConfig{
MemoryLimit: "8G",
CPULimit: "2",
GPUDevices: nil,
},
Health: HealthConfig{
Enabled: true,
CheckInterval: 30 * time.Second,
Timeout: 10 * time.Second,
RetryAttempts: 3,
MaxServiceAge: 24 * time.Hour,
AutoCleanup: true,
MetricsEnabled: true,
},
Logging: LoggingConfig{
Level: "info",
Format: "json",
Output: "stdout",
MaxSize: "100M",
MaxBackups: 3,
MaxAge: "7d",
},
DefaultSettings: DefaultSettingsConfig{
Image: envDefaultImage(),
Port: 8888,
Workspace: "./workspace",
Environment: map[string]string{"JUPYTER_ENABLE_LAB": "yes"},
AutoStart: false,
AutoStop: false,
StopTimeout: 30 * time.Second,
ShutdownPolicy: "graceful",
},
AdvancedSettings: AdvancedSettingsConfig{
MaxConcurrentServices: 10,
ServiceTimeout: 5 * time.Minute,
StartupTimeout: 2 * time.Minute,
GracefulShutdown: true,
ForceCleanup: false,
DebugMode: false,
ExperimentalFeatures: []string{},
},
}
}
// GetDefaultServiceConfig returns the default Jupyter service configuration.
func GetDefaultServiceConfig() *ServiceConfig {
cm := &ConfigManager{environment: ""}
cfg := cm.getDefaultConfig()
return &cfg.Service
}
// applyEnvironmentOverrides applies environment-specific configuration overrides
func (cm *ConfigManager) applyEnvironmentOverrides(config *JupyterConfig) {
switch cm.environment {
case "development":
config.Service.MaxServices = 10
config.Security.AllowNetwork = true
config.Health.CheckInterval = 10 * time.Second
config.AdvancedSettings.DebugMode = true
case "production":
config.Service.MaxServices = 3
config.Security.AllowNetwork = false
config.Health.CheckInterval = 60 * time.Second
config.AdvancedSettings.DebugMode = false
config.Logging.Level = "warn"
case "testing":
config.Service.MaxServices = 1
config.Health.Enabled = false
config.AdvancedSettings.DebugMode = true
}
}
// validateConfig validates the configuration
func (cm *ConfigManager) validateConfig(config *JupyterConfig) error {
// Validate service configuration
if config.Service.DefaultPort <= 0 || config.Service.DefaultPort > 65535 {
return fmt.Errorf("invalid default port: %d", config.Service.DefaultPort)
}
if config.Service.MaxServices <= 0 {
return fmt.Errorf("max services must be positive")
}
if config.Service.DefaultImage == "" {
return fmt.Errorf("default image cannot be empty")
}
// Validate network configuration
if config.Network.HostPort <= 0 || config.Network.HostPort > 65535 {
return fmt.Errorf("invalid host port: %d", config.Network.HostPort)
}
if config.Network.ContainerPort <= 0 || config.Network.ContainerPort > 65535 {
return fmt.Errorf("invalid container port: %d", config.Network.ContainerPort)
}
// Validate workspace configuration
if config.Workspace.DefaultPath == "" {
return fmt.Errorf("default workspace path cannot be empty")
}
// Validate resources configuration
if config.Resources.MemoryLimit == "" {
return fmt.Errorf("memory limit cannot be empty")
}
if config.Resources.CPULimit == "" {
return fmt.Errorf("CPU limit cannot be empty")
}
// Validate health configuration
if config.Health.Enabled {
if config.Health.CheckInterval <= 0 {
return fmt.Errorf("health check interval must be positive")
}
if config.Health.Timeout <= 0 {
return fmt.Errorf("health check timeout must be positive")
}
}
return nil
}
// SetEnvironment updates the environment and reloads configuration
func (cm *ConfigManager) SetEnvironment(environment string) error {
cm.environment = environment
return cm.LoadConfig()
}
// GetEnvironment returns the current environment
func (cm *ConfigManager) GetEnvironment() string {
return cm.environment
}
// ExportConfig exports the configuration to JSON
func (cm *ConfigManager) ExportConfig() ([]byte, error) {
return json.MarshalIndent(cm.config, "", " ")
}
// ImportConfig imports configuration from JSON
func (cm *ConfigManager) ImportConfig(data []byte) error {
var config JupyterConfig
if err := json.Unmarshal(data, &config); err != nil {
return fmt.Errorf("failed to parse configuration: %w", err)
}
return cm.UpdateConfig(&config)
}
// ResetToDefaults resets configuration to defaults
func (cm *ConfigManager) ResetToDefaults() error {
cm.config = cm.getDefaultConfig()
return cm.SaveConfig()
}
// ValidateWorkspacePath checks if a workspace path is allowed
func (cm *ConfigManager) ValidateWorkspacePath(path string) error {
// Check denied paths
for _, denied := range cm.config.Workspace.DeniedPaths {
if strings.HasPrefix(filepath.Clean(path), filepath.Clean(denied)) {
return fmt.Errorf("workspace path %s is in denied path %s", path, denied)
}
}
// Check allowed paths (if specified)
if len(cm.config.Workspace.AllowedPaths) > 0 {
allowed := false
for _, allowedPath := range cm.config.Workspace.AllowedPaths {
if strings.HasPrefix(filepath.Clean(path), filepath.Clean(allowedPath)) {
allowed = true
break
}
}
if !allowed {
return fmt.Errorf("workspace path %s is not in allowed paths", path)
}
}
return nil
}
// GetEffectiveConfig returns the effective configuration after all overrides
func (cm *ConfigManager) GetEffectiveConfig() *JupyterConfig {
// Create a copy of the config
config := *cm.config
// Apply any runtime overrides
// This could include environment variables, command line flags, etc.
return &config
}