Move ExpandPath function and path-related utilities from internal/config to internal/storage where they belong. Files updated: - internal/worker/config.go: use storage.ExpandPath - internal/network/ssh.go: use storage.ExpandPath - cmd/data_manager/data_manager_config.go: use storage.ExpandPath - internal/api/server_config.go: use storage.ExpandPath internal/storage/paths.go already contained the canonical implementation. Result: Path utilities now live in storage layer, config package focuses on configuration structs.
135 lines
3.7 KiB
Go
135 lines
3.7 KiB
Go
// DataConfig holds the configuration for the data manager
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/auth"
|
|
"github.com/jfraeys/fetch_ml/internal/config"
|
|
"github.com/jfraeys/fetch_ml/internal/fileutil"
|
|
"github.com/jfraeys/fetch_ml/internal/storage"
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// DataConfig holds the configuration for the data manager
|
|
type DataConfig struct {
|
|
// ML Server (where training runs)
|
|
MLHost string `yaml:"ml_host"`
|
|
MLUser string `yaml:"ml_user"`
|
|
MLSSHKey string `yaml:"ml_ssh_key"`
|
|
MLPort int `yaml:"ml_port"`
|
|
MLDataDir string `yaml:"ml_data_dir"` // e.g., /data/active
|
|
|
|
// NAS (where datasets are stored)
|
|
NASHost string `yaml:"nas_host"`
|
|
NASUser string `yaml:"nas_user"`
|
|
NASSSHKey string `yaml:"nas_ssh_key"`
|
|
NASPort int `yaml:"nas_port"`
|
|
NASDataDir string `yaml:"nas_data_dir"` // e.g., /mnt/datasets
|
|
|
|
// Redis
|
|
RedisAddr string `yaml:"redis_addr"`
|
|
RedisPassword string `yaml:"redis_password"`
|
|
RedisDB int `yaml:"redis_db"`
|
|
|
|
// Authentication
|
|
Auth auth.Config `yaml:"auth"`
|
|
|
|
// Cleanup settings
|
|
MaxAgeHours int `yaml:"max_age_hours"` // Delete data older than X hours
|
|
MaxSizeGB int `yaml:"max_size_gb"` // Keep total size under X GB
|
|
CleanupInterval int `yaml:"cleanup_interval_min"` // Run cleanup every X minutes
|
|
|
|
// Podman integration
|
|
PodmanImage string `yaml:"podman_image"`
|
|
ContainerWorkspace string `yaml:"container_workspace"`
|
|
ContainerResults string `yaml:"container_results"`
|
|
GPUDevices []string `yaml:"gpu_devices"`
|
|
}
|
|
|
|
// LoadDataConfig loads data manager configuration from a YAML file.
|
|
func LoadDataConfig(path string) (*DataConfig, error) {
|
|
data, err := fileutil.SecureFileRead(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var cfg DataConfig
|
|
if err := yaml.Unmarshal(data, &cfg); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Defaults
|
|
if cfg.MLPort == 0 {
|
|
cfg.MLPort = config.DefaultSSHPort
|
|
}
|
|
if cfg.NASPort == 0 {
|
|
cfg.NASPort = config.DefaultSSHPort
|
|
}
|
|
if cfg.RedisAddr == "" {
|
|
cfg.RedisAddr = config.DefaultRedisAddr
|
|
}
|
|
// Set default MLDataDir - use ./data/active for local/dev, /data/active for production
|
|
if cfg.MLDataDir == "" {
|
|
if cfg.MLHost == "" {
|
|
// Local mode - use local data directory
|
|
cfg.MLDataDir = config.DefaultLocalDataDir
|
|
} else {
|
|
// Production mode - use /data/active
|
|
cfg.MLDataDir = config.DefaultDataDir
|
|
}
|
|
}
|
|
if cfg.NASDataDir == "" {
|
|
cfg.NASDataDir = config.DefaultNASDataDir
|
|
}
|
|
|
|
// Expand paths
|
|
cfg.MLDataDir = storage.ExpandPath(cfg.MLDataDir)
|
|
cfg.NASDataDir = storage.ExpandPath(cfg.NASDataDir)
|
|
if cfg.MaxAgeHours == 0 {
|
|
cfg.MaxAgeHours = config.DefaultMaxAgeHours
|
|
}
|
|
if cfg.MaxSizeGB == 0 {
|
|
cfg.MaxSizeGB = config.DefaultMaxSizeGB
|
|
}
|
|
if cfg.CleanupInterval == 0 {
|
|
cfg.CleanupInterval = config.DefaultCleanupInterval
|
|
}
|
|
|
|
return &cfg, nil
|
|
}
|
|
|
|
// Validate implements utils.Validator interface.
|
|
func (c *DataConfig) Validate() error {
|
|
if c.MLPort != 0 {
|
|
if err := config.ValidatePort(c.MLPort); err != nil {
|
|
return fmt.Errorf("invalid ML SSH port: %w", err)
|
|
}
|
|
}
|
|
|
|
if c.NASPort != 0 {
|
|
if err := config.ValidatePort(c.NASPort); err != nil {
|
|
return fmt.Errorf("invalid NAS SSH port: %w", err)
|
|
}
|
|
}
|
|
|
|
if c.RedisAddr != "" {
|
|
if err := config.ValidateRedisAddr(c.RedisAddr); err != nil {
|
|
return fmt.Errorf("invalid Redis configuration: %w", err)
|
|
}
|
|
}
|
|
|
|
if c.MaxAgeHours < 1 {
|
|
return fmt.Errorf("max_age_hours must be at least 1, got %d", c.MaxAgeHours)
|
|
}
|
|
|
|
if c.MaxSizeGB < 1 {
|
|
return fmt.Errorf("max_size_gb must be at least 1, got %d", c.MaxSizeGB)
|
|
}
|
|
|
|
if c.CleanupInterval < 1 {
|
|
return fmt.Errorf("cleanup_interval must be at least 1, got %d", c.CleanupInterval)
|
|
}
|
|
|
|
return nil
|
|
}
|