fetch_ml/configs/schema/worker_config_schema.yaml
Jeremie Fraeys cd5640ebd2 Slim and secure: move scripts, clean configs, remove secrets
- Move ci-test.sh and setup.sh to scripts/
- Trim docs/src/zig-cli.md to current structure
- Replace hardcoded secrets with placeholders in configs
- Update .gitignore to block .env*, secrets/, keys, build artifacts
- Slim README.md to reflect current CLI/TUI split
- Add cleanup trap to ci-test.sh
- Ensure no secrets are committed
2025-12-07 13:57:51 -05:00

149 lines
3.3 KiB
YAML

$schema: "http://json-schema.org/draft-07/schema#"
title: "Fetch ML Worker Configuration"
type: object
additionalProperties: false
required:
- base_path
- worker_id
- redis_addr
- podman_image
- container_workspace
- container_results
- train_script
properties:
host:
type: string
description: SSH host for remote worker
user:
type: string
description: SSH user for remote worker
ssh_key:
type: string
description: Path to SSH private key
port:
type: integer
minimum: 1
maximum: 65535
description: SSH port
base_path:
type: string
description: Base path for worker operations
train_script:
type: string
description: Path to training script
redis_addr:
type: string
description: Redis server address
redis_password:
type: string
description: Redis password
redis_db:
type: integer
minimum: 0
default: 0
description: Redis database number
known_hosts:
type: string
description: Path to SSH known hosts file
worker_id:
type: string
minLength: 1
description: Unique worker identifier
max_workers:
type: integer
minimum: 1
description: Maximum number of concurrent workers
poll_interval_seconds:
type: integer
minimum: 1
description: Polling interval in seconds
local_mode:
type: boolean
default: false
description: Run in local mode without SSH
resources:
type: object
description: Resource configuration
additionalProperties: false
properties:
max_workers:
type: integer
minimum: 1
desired_rps_per_worker:
type: integer
minimum: 1
requests_per_sec:
type: integer
minimum: 1
podman_cpus:
type: string
podman_memory:
type: string
request_burst:
type: integer
minimum: 1
auth:
type: object
description: Authentication configuration
additionalProperties: true
metrics:
type: object
description: Metrics configuration
additionalProperties: false
properties:
enabled:
type: boolean
default: false
listen_addr:
type: string
default: ":9100"
metrics_flush_interval:
type: string
description: Duration string (e.g., "500ms")
default: "500ms"
data_manager_path:
type: string
description: Path to data manager
default: "./data_manager"
auto_fetch_data:
type: boolean
default: false
description: Automatically fetch data
data_dir:
type: string
description: Data directory
dataset_cache_ttl:
type: string
description: Dataset cache TTL duration
default: "30m"
podman_image:
type: string
minLength: 1
description: Podman image to use
container_workspace:
type: string
description: Container workspace path
container_results:
type: string
description: Container results path
gpu_access:
type: boolean
default: false
description: Enable GPU access
task_lease_duration:
type: string
description: Task lease duration
default: "30m"
heartbeat_interval:
type: string
description: Heartbeat interval
default: "1m"
max_retries:
type: integer
minimum: 0
default: 3
description: Maximum retry attempts
graceful_timeout:
type: string
description: Graceful shutdown timeout
default: "5m"