worker_id: "docker-worker" base_path: "/tmp/fetchml-jobs" entrypoint: "train.py" redis_url: "redis://redis:6379/0" local_mode: true max_workers: 1 poll_interval_seconds: 2 auto_fetch_data: false data_manager_path: "./data_manager" dataset_cache_ttl: "30m" data_dir: "/data/active" snapshot_store: enabled: true endpoint: "blizzard.jfraeys.com" secure: true bucket: "fetchml-snapshots" prefix: "snapshots" timeout: "5m" max_retries: 3 podman_image: "python:3.9-slim" container_workspace: "/workspace" container_results: "/results" gpu_vendor: "nvidia" gpu_visible_devices: [0] gpu_devices: ["/dev/nvidia0"] resources: max_workers: 1 desired_rps_per_worker: 2 podman_cpus: "2" podman_memory: "4Gi" metrics: enabled: true listen_addr: ":9100" metrics_flush_interval: "500ms" task_lease_duration: "30m" heartbeat_interval: "1m" max_retries: 3 graceful_timeout: "5m" # Plugin Configuration plugins: # Jupyter Notebook/Lab Service jupyter: enabled: true image: "quay.io/jupyter/base-notebook:latest" default_port: 8888 # Security settings security: trusted_channels: - "conda-forge" - "defaults" - "pytorch" blocked_packages: - "requests" - "urllib3" - "httpx" require_password: true # Resource limits (enforced by scheduler quota system) max_gpu_per_instance: 1 max_memory_per_instance: "8Gi" # vLLM Inference Service vllm: enabled: true image: "vllm/vllm-openai:latest" default_port: 8000 # Model cache location model_cache: "/models" # Supported quantization methods: awq, gptq, fp8, squeezellm default_quantization: "" # empty = no quantization # Resource limits max_gpu_per_instance: 4 max_model_len: 4096 # Environment variables passed to container env: - "HF_HOME=/models" - "VLLM_WORKER_MULTIPROC_METHOD=spawn"