fetch_ml/cmd/data_manager/data_manager_config.go

134 lines
3.6 KiB
Go

// DataConfig holds the configuration for the data manager
package main
import (
"fmt"
"github.com/jfraeys/fetch_ml/internal/auth"
"github.com/jfraeys/fetch_ml/internal/config"
"github.com/jfraeys/fetch_ml/internal/fileutil"
"gopkg.in/yaml.v3"
)
// DataConfig holds the configuration for the data manager
type DataConfig struct {
// ML Server (where training runs)
MLHost string `yaml:"ml_host"`
MLUser string `yaml:"ml_user"`
MLSSHKey string `yaml:"ml_ssh_key"`
MLPort int `yaml:"ml_port"`
MLDataDir string `yaml:"ml_data_dir"` // e.g., /data/active
// NAS (where datasets are stored)
NASHost string `yaml:"nas_host"`
NASUser string `yaml:"nas_user"`
NASSSHKey string `yaml:"nas_ssh_key"`
NASPort int `yaml:"nas_port"`
NASDataDir string `yaml:"nas_data_dir"` // e.g., /mnt/datasets
// Redis
RedisAddr string `yaml:"redis_addr"`
RedisPassword string `yaml:"redis_password"`
RedisDB int `yaml:"redis_db"`
// Authentication
Auth auth.Config `yaml:"auth"`
// Cleanup settings
MaxAgeHours int `yaml:"max_age_hours"` // Delete data older than X hours
MaxSizeGB int `yaml:"max_size_gb"` // Keep total size under X GB
CleanupInterval int `yaml:"cleanup_interval_min"` // Run cleanup every X minutes
// Podman integration
PodmanImage string `yaml:"podman_image"`
ContainerWorkspace string `yaml:"container_workspace"`
ContainerResults string `yaml:"container_results"`
GPUDevices []string `yaml:"gpu_devices"`
}
// LoadDataConfig loads data manager configuration from a YAML file.
func LoadDataConfig(path string) (*DataConfig, error) {
data, err := fileutil.SecureFileRead(path)
if err != nil {
return nil, err
}
var cfg DataConfig
if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, err
}
// Defaults
if cfg.MLPort == 0 {
cfg.MLPort = config.DefaultSSHPort
}
if cfg.NASPort == 0 {
cfg.NASPort = config.DefaultSSHPort
}
if cfg.RedisAddr == "" {
cfg.RedisAddr = config.DefaultRedisAddr
}
// Set default MLDataDir - use ./data/active for local/dev, /data/active for production
if cfg.MLDataDir == "" {
if cfg.MLHost == "" {
// Local mode - use local data directory
cfg.MLDataDir = config.DefaultLocalDataDir
} else {
// Production mode - use /data/active
cfg.MLDataDir = config.DefaultDataDir
}
}
if cfg.NASDataDir == "" {
cfg.NASDataDir = config.DefaultNASDataDir
}
// Expand paths
cfg.MLDataDir = config.ExpandPath(cfg.MLDataDir)
cfg.NASDataDir = config.ExpandPath(cfg.NASDataDir)
if cfg.MaxAgeHours == 0 {
cfg.MaxAgeHours = config.DefaultMaxAgeHours
}
if cfg.MaxSizeGB == 0 {
cfg.MaxSizeGB = config.DefaultMaxSizeGB
}
if cfg.CleanupInterval == 0 {
cfg.CleanupInterval = config.DefaultCleanupInterval
}
return &cfg, nil
}
// Validate implements utils.Validator interface.
func (c *DataConfig) Validate() error {
if c.MLPort != 0 {
if err := config.ValidatePort(c.MLPort); err != nil {
return fmt.Errorf("invalid ML SSH port: %w", err)
}
}
if c.NASPort != 0 {
if err := config.ValidatePort(c.NASPort); err != nil {
return fmt.Errorf("invalid NAS SSH port: %w", err)
}
}
if c.RedisAddr != "" {
if err := config.ValidateRedisAddr(c.RedisAddr); err != nil {
return fmt.Errorf("invalid Redis configuration: %w", err)
}
}
if c.MaxAgeHours < 1 {
return fmt.Errorf("max_age_hours must be at least 1, got %d", c.MaxAgeHours)
}
if c.MaxSizeGB < 1 {
return fmt.Errorf("max_size_gb must be at least 1, got %d", c.MaxSizeGB)
}
if c.CleanupInterval < 1 {
return fmt.Errorf("cleanup_interval must be at least 1, got %d", c.CleanupInterval)
}
return nil
}