$schema: "http://json-schema.org/draft-07/schema#" title: "Fetch ML Worker Configuration" type: object additionalProperties: false required: - base_path - worker_id - redis_addr - podman_image - container_workspace - container_results - train_script properties: host: type: string description: SSH host for remote worker user: type: string description: SSH user for remote worker ssh_key: type: string description: Path to SSH private key port: type: integer minimum: 1 maximum: 65535 description: SSH port base_path: type: string description: Base path for worker operations train_script: type: string description: Path to training script redis_addr: type: string description: Redis server address redis_password: type: string description: Redis password redis_db: type: integer minimum: 0 default: 0 description: Redis database number known_hosts: type: string description: Path to SSH known hosts file worker_id: type: string minLength: 1 description: Unique worker identifier max_workers: type: integer minimum: 1 description: Maximum number of concurrent workers poll_interval_seconds: type: integer minimum: 1 description: Polling interval in seconds local_mode: type: boolean default: false description: Run in local mode without SSH resources: type: object description: Resource configuration additionalProperties: false properties: max_workers: type: integer minimum: 1 desired_rps_per_worker: type: integer minimum: 1 requests_per_sec: type: integer minimum: 1 podman_cpus: type: string podman_memory: type: string request_burst: type: integer minimum: 1 auth: type: object description: Authentication configuration additionalProperties: true metrics: type: object description: Metrics configuration additionalProperties: false properties: enabled: type: boolean default: false listen_addr: type: string default: ":9100" metrics_flush_interval: type: string description: Duration string (e.g., "500ms") default: "500ms" data_manager_path: type: string description: Path to data manager default: "./data_manager" auto_fetch_data: type: boolean default: false description: Automatically fetch data data_dir: type: string description: Data directory dataset_cache_ttl: type: string description: Dataset cache TTL duration default: "30m" podman_image: type: string minLength: 1 description: Podman image to use container_workspace: type: string description: Container workspace path container_results: type: string description: Container results path gpu_access: type: boolean default: false description: Enable GPU access task_lease_duration: type: string description: Task lease duration default: "30m" heartbeat_interval: type: string description: Heartbeat interval default: "1m" max_retries: type: integer minimum: 0 default: 3 description: Maximum retry attempts graceful_timeout: type: string description: Graceful shutdown timeout default: "5m"