// DataConfig holds the configuration for the data manager package main import ( "fmt" "github.com/jfraeys/fetch_ml/internal/auth" "github.com/jfraeys/fetch_ml/internal/config" "github.com/jfraeys/fetch_ml/internal/fileutil" "github.com/jfraeys/fetch_ml/internal/storage" "gopkg.in/yaml.v3" ) // DataConfig holds the configuration for the data manager type DataConfig struct { // ML Server (where training runs) MLHost string `yaml:"ml_host"` MLUser string `yaml:"ml_user"` MLSSHKey string `yaml:"ml_ssh_key"` MLPort int `yaml:"ml_port"` MLDataDir string `yaml:"ml_data_dir"` // e.g., /data/active // NAS (where datasets are stored) NASHost string `yaml:"nas_host"` NASUser string `yaml:"nas_user"` NASSSHKey string `yaml:"nas_ssh_key"` NASPort int `yaml:"nas_port"` NASDataDir string `yaml:"nas_data_dir"` // e.g., /mnt/datasets // Redis RedisAddr string `yaml:"redis_addr"` RedisPassword string `yaml:"redis_password"` RedisDB int `yaml:"redis_db"` // Authentication Auth auth.Config `yaml:"auth"` // Cleanup settings MaxAgeHours int `yaml:"max_age_hours"` // Delete data older than X hours MaxSizeGB int `yaml:"max_size_gb"` // Keep total size under X GB CleanupInterval int `yaml:"cleanup_interval_min"` // Run cleanup every X minutes // Podman integration PodmanImage string `yaml:"podman_image"` ContainerWorkspace string `yaml:"container_workspace"` ContainerResults string `yaml:"container_results"` GPUDevices []string `yaml:"gpu_devices"` } // LoadDataConfig loads data manager configuration from a YAML file. func LoadDataConfig(path string) (*DataConfig, error) { data, err := fileutil.SecureFileRead(path) if err != nil { return nil, err } var cfg DataConfig if err := yaml.Unmarshal(data, &cfg); err != nil { return nil, err } // Defaults if cfg.MLPort == 0 { cfg.MLPort = config.DefaultSSHPort } if cfg.NASPort == 0 { cfg.NASPort = config.DefaultSSHPort } if cfg.RedisAddr == "" { cfg.RedisAddr = config.DefaultRedisAddr } // Set default MLDataDir - use ./data/active for local/dev, /data/active for production if cfg.MLDataDir == "" { if cfg.MLHost == "" { // Local mode - use local data directory cfg.MLDataDir = config.DefaultLocalDataDir } else { // Production mode - use /data/active cfg.MLDataDir = config.DefaultDataDir } } if cfg.NASDataDir == "" { cfg.NASDataDir = config.DefaultNASDataDir } // Expand paths cfg.MLDataDir = storage.ExpandPath(cfg.MLDataDir) cfg.NASDataDir = storage.ExpandPath(cfg.NASDataDir) if cfg.MaxAgeHours == 0 { cfg.MaxAgeHours = config.DefaultMaxAgeHours } if cfg.MaxSizeGB == 0 { cfg.MaxSizeGB = config.DefaultMaxSizeGB } if cfg.CleanupInterval == 0 { cfg.CleanupInterval = config.DefaultCleanupInterval } return &cfg, nil } // Validate implements utils.Validator interface. func (c *DataConfig) Validate() error { if c.MLPort != 0 { if err := config.ValidatePort(c.MLPort); err != nil { return fmt.Errorf("invalid ML SSH port: %w", err) } } if c.NASPort != 0 { if err := config.ValidatePort(c.NASPort); err != nil { return fmt.Errorf("invalid NAS SSH port: %w", err) } } if c.RedisAddr != "" { if err := config.ValidateRedisAddr(c.RedisAddr); err != nil { return fmt.Errorf("invalid Redis configuration: %w", err) } } if c.MaxAgeHours < 1 { return fmt.Errorf("max_age_hours must be at least 1, got %d", c.MaxAgeHours) } if c.MaxSizeGB < 1 { return fmt.Errorf("max_size_gb must be at least 1, got %d", c.MaxSizeGB) } if c.CleanupInterval < 1 { return fmt.Errorf("cleanup_interval must be at least 1, got %d", c.CleanupInterval) } return nil }