diff --git a/configs/api/homelab-secure.yaml b/configs/api/homelab-secure.yaml index 2e66039..269ed05 100644 --- a/configs/api/homelab-secure.yaml +++ b/configs/api/homelab-secure.yaml @@ -62,7 +62,26 @@ database: logging: level: "info" file: "/logs/fetch_ml.log" - audit_log: "" + # Audit logging (HIPAA-compliant with tamper-evident chain hashing) + audit: + enabled: true + file: "/var/log/fetch_ml/audit.log" # Separate file for audit events + chain_hashing: true # Enable tamper-evident logging + retention_days: 2555 # 7 years for HIPAA compliance + log_ip_address: true # Include source IP in audit events + log_user_agent: true # Include user agent in audit events + # Sensitive events to always log + events: + - "authentication_success" + - "authentication_failure" + - "file_access" + - "file_write" + - "file_delete" + - "job_queued" + - "job_started" + - "job_completed" + - "experiment_created" + - "experiment_deleted" resources: max_workers: 1 diff --git a/configs/scheduler/scheduler.yaml.example b/configs/scheduler/scheduler.yaml.example index 704df59..b7732db 100644 --- a/configs/scheduler/scheduler.yaml.example +++ b/configs/scheduler/scheduler.yaml.example @@ -30,3 +30,30 @@ scheduler: token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32" - id: "worker-02" token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32" + + # Plugin GPU Quota Configuration + # Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.) + plugin_quota: + enabled: false # Enable quota enforcement (default: false) + total_gpus: 16 # Global GPU limit across all plugins (0 = unlimited) + per_user_gpus: 4 # Default per-user GPU limit (0 = unlimited) + per_user_services: 2 # Default per-user service count limit (0 = unlimited) + + # Plugin-specific limits (optional) + per_plugin_limits: + vllm: + max_gpus: 8 # Max GPUs for vLLM across all users + max_services: 4 # Max vLLM service instances + jupyter: + max_gpus: 4 # Max GPUs for Jupyter across all users + max_services: 10 # Max Jupyter service instances + + # Per-user overrides (optional) + user_overrides: + admin: + max_gpus: 8 # Admin gets more GPUs + max_services: 5 # Admin can run more services + allowed_plugins: ["jupyter", "vllm"] # Restrict which plugins user can use + researcher1: + max_gpus: 2 # Limited GPU access + max_services: 1 # Single service limit diff --git a/configs/worker/dev-local.yaml b/configs/worker/dev-local.yaml index 6b5c223..187e53d 100644 --- a/configs/worker/dev-local.yaml +++ b/configs/worker/dev-local.yaml @@ -48,6 +48,39 @@ queue: native: data_dir: "data/dev/queue" +# Plugin Configuration (for local development) +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" + # Security settings + security: + trusted_channels: + - "conda-forge" + - "defaults" + blocked_packages: [] # Less restrictive for local dev + require_password: false # No password for local dev + # Resource limits + max_gpu_per_instance: 1 + max_memory_per_instance: "4Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + # Model cache location + model_cache: "data/dev/models" + # Supported quantization methods: awq, gptq, fp8, squeezellm + default_quantization: "" # No quantization for dev (better quality) + # Resource limits + max_gpu_per_instance: 1 + max_model_len: 2048 + tensor_parallel_size: 1 + task_lease_duration: "30m" heartbeat_interval: "1m" max_retries: 3 diff --git a/configs/worker/docker-dev.yaml b/configs/worker/docker-dev.yaml index e688ccd..b0cd77e 100644 --- a/configs/worker/docker-dev.yaml +++ b/configs/worker/docker-dev.yaml @@ -50,7 +50,40 @@ resources: metrics: enabled: true listen_addr: ":9100" -metrics_flush_interval: "500ms" + metrics_flush_interval: "500ms" + +# Plugin Configuration +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" + # Security settings + security: + trusted_channels: + - "conda-forge" + - "defaults" + blocked_packages: [] # Dev environment - less restrictive + require_password: false # No password for dev + # Resource limits + max_gpu_per_instance: 1 + max_memory_per_instance: "4Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + # Model cache location + model_cache: "/models" + # Supported quantization methods: awq, gptq, fp8, squeezellm + default_quantization: "" # No quantization for dev + # Resource limits + max_gpu_per_instance: 1 + max_model_len: 2048 + tensor_parallel_size: 1 task_lease_duration: "30m" heartbeat_interval: "1m" diff --git a/configs/worker/docker-prod.yaml b/configs/worker/docker-prod.yaml index be7f372..371b218 100644 --- a/configs/worker/docker-prod.yaml +++ b/configs/worker/docker-prod.yaml @@ -48,3 +48,42 @@ task_lease_duration: "30m" heartbeat_interval: "1m" max_retries: 3 graceful_timeout: "5m" + +# Plugin Configuration +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + # Security settings + security: + trusted_channels: + - "conda-forge" + - "defaults" + - "pytorch" + blocked_packages: + - "requests" + - "urllib3" + - "httpx" + require_password: true + # Resource limits (enforced by scheduler quota system) + max_gpu_per_instance: 1 + max_memory_per_instance: "8Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + # Model cache location + model_cache: "/models" + # Supported quantization methods: awq, gptq, fp8, squeezellm + default_quantization: "" # empty = no quantization + # Resource limits + max_gpu_per_instance: 4 + max_model_len: 4096 + # Environment variables passed to container + env: + - "HF_HOME=/models" + - "VLLM_WORKER_MULTIPROC_METHOD=spawn" diff --git a/configs/worker/homelab-sandbox.yaml b/configs/worker/homelab-sandbox.yaml index 8f071d4..11d5d09 100644 --- a/configs/worker/homelab-sandbox.yaml +++ b/configs/worker/homelab-sandbox.yaml @@ -48,6 +48,46 @@ queue: backend: "redis" redis_url: "redis://localhost:6379/0" +# Plugin Configuration +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" # "lab" or "notebook" + # Security settings + security: + trusted_channels: + - "conda-forge" + - "defaults" + - "pytorch" + - "nvidia" + blocked_packages: + - "requests" + - "urllib3" + - "httpx" + - "socket" + - "subprocess" + require_password: true + # Resource limits + max_gpu_per_instance: 1 + max_memory_per_instance: "16Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + # Model cache location (should be on fast storage) + model_cache: "/var/lib/fetchml/models" + # Supported quantization methods: awq, gptq, fp8, squeezellm + default_quantization: "" + # Resource limits + max_gpu_per_instance: 2 + max_model_len: 4096 + tensor_parallel_size: 1 + # Snapshot store (optional) snapshot_store: enabled: false diff --git a/configs/worker/homelab-secure.yaml b/configs/worker/homelab-secure.yaml index e03445c..009cd36 100644 --- a/configs/worker/homelab-secure.yaml +++ b/configs/worker/homelab-secure.yaml @@ -45,3 +45,42 @@ task_lease_duration: "30m" heartbeat_interval: "1m" max_retries: 3 graceful_timeout: "5m" + +# Plugin Configuration +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" + # Security settings (strict for secure config) + security: + trusted_channels: + - "conda-forge" + - "defaults" + blocked_packages: + - "requests" + - "urllib3" + - "httpx" + - "socket" + - "subprocess" + - "os.system" + require_password: true + # Resource limits + max_gpu_per_instance: 1 + max_memory_per_instance: "8Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + # Model cache location + model_cache: "/models" + # Supported quantization methods: awq, gptq, fp8, squeezellm + default_quantization: "" + # Resource limits + max_gpu_per_instance: 1 + max_model_len: 4096 + tensor_parallel_size: 1 diff --git a/configs/worker/worker-prod.toml b/configs/worker/worker-prod.toml index bc5f208..a6c97d5 100644 --- a/configs/worker/worker-prod.toml +++ b/configs/worker/worker-prod.toml @@ -45,3 +45,34 @@ podman_memory = "16g" [metrics] enabled = true listen_addr = ":9100" + +# Plugin Configuration +[plugins] + +[plugins.jupyter] +enabled = true +image = "quay.io/jupyter/base-notebook:latest" +default_port = 8888 +mode = "lab" +max_gpu_per_instance = 1 +max_memory_per_instance = "8Gi" + +[plugins.jupyter.security] +require_password = true +trusted_channels = ["conda-forge", "defaults", "pytorch"] +blocked_packages = ["requests", "urllib3", "httpx"] + +[plugins.vllm] +enabled = true +image = "vllm/vllm-openai:latest" +default_port = 8000 +model_cache = "/models" +default_quantization = "" # Options: awq, gptq, fp8, squeezellm +max_gpu_per_instance = 2 +max_model_len = 4096 +tensor_parallel_size = 1 + +# Environment variables for vLLM +[plugins.vllm.env] +HF_HOME = "/models" +VLLM_WORKER_MULTIPROC_METHOD = "spawn" diff --git a/deployments/README.md b/deployments/README.md index 26a5ba0..d0366d0 100644 --- a/deployments/README.md +++ b/deployments/README.md @@ -110,6 +110,36 @@ TLS_KEY_PATH=/app/ssl/key.pem | Prometheus | 9090 | - | - | | Grafana | 3000 | - | - | | Loki | 3100 | - | - | +| JupyterLab | 8888* | 8888* | - | +| vLLM | 8000* | 8000* | - | + +*Plugin service ports are dynamically allocated from the 8000-9000 range by the scheduler. + +## Plugin Services + +The deployment configurations include support for interactive ML services: + +### Jupyter Notebook/Lab +- **Image**: `quay.io/jupyter/base-notebook:latest` +- **Security**: Trusted channels (conda-forge, defaults), blocked packages (http clients) +- **Resources**: Configurable GPU/memory limits +- **Access**: Via scheduler-assigned port (8000-9000 range) + +### vLLM Inference +- **Image**: `vllm/vllm-openai:latest` +- **Features**: OpenAI-compatible API, quantization support (AWQ, GPTQ, FP8) +- **Model Cache**: Configurable path for model storage +- **Resources**: Multi-GPU tensor parallelism support + +## Scheduler GPU Quotas + +The scheduler supports GPU quota management for plugin services: +- **Global Limit**: Total GPUs across all plugins +- **Per-User Limits**: GPU and service count per user +- **Per-Plugin Limits**: vLLM and Jupyter-specific limits +- **User Overrides**: Special permissions for admins/researchers + +See `configs/scheduler/scheduler.yaml.example` for quota configuration. ## Monitoring @@ -122,3 +152,4 @@ TLS_KEY_PATH=/app/ssl/key.pem - If you need HTTPS externally, terminate TLS at a reverse proxy. - API keys should be managed via environment variables - Database credentials should use secrets management in production +- **HIPAA deployments**: Plugins are disabled by default for compliance diff --git a/deployments/configs/worker/docker-dev.yaml b/deployments/configs/worker/docker-dev.yaml index fbad8e1..ffa3669 100644 --- a/deployments/configs/worker/docker-dev.yaml +++ b/deployments/configs/worker/docker-dev.yaml @@ -29,3 +29,30 @@ max_artifact_total_bytes: 1073741824 # 1GB # Provenance (disabled in dev for speed) provenance_best_effort: false + +# Plugin Configuration (development mode) +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" + security: + trusted_channels: + - "conda-forge" + - "defaults" + blocked_packages: [] # No restrictions in dev + require_password: false # No password for dev + max_gpu_per_instance: 1 + max_memory_per_instance: "4Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + model_cache: "/tmp/models" # Temp location for dev + default_quantization: "" # No quantization for dev + max_gpu_per_instance: 1 + max_model_len: 2048 diff --git a/deployments/configs/worker/docker-hipaa.yaml b/deployments/configs/worker/docker-hipaa.yaml index 3fbd6b4..83d5d61 100644 --- a/deployments/configs/worker/docker-hipaa.yaml +++ b/deployments/configs/worker/docker-hipaa.yaml @@ -51,3 +51,12 @@ ssh_key: ${SSH_KEY_PATH} # Config hash computation enabled (required for audit) # This is automatically computed by Validate() + +# Plugin Configuration (DISABLED for HIPAA compliance) +# Jupyter and vLLM services are disabled in HIPAA mode to ensure +# no unauthorized network access or data processing +plugins: + jupyter: + enabled: false # Disabled: HIPAA requires strict network isolation + vllm: + enabled: false # Disabled: External model downloads violate PHI controls diff --git a/deployments/configs/worker/docker-standard.yaml b/deployments/configs/worker/docker-standard.yaml index c121476..675e293 100644 --- a/deployments/configs/worker/docker-standard.yaml +++ b/deployments/configs/worker/docker-standard.yaml @@ -33,3 +33,32 @@ max_artifact_total_bytes: 536870912 # 512MB # Provenance (enabled) provenance_best_effort: true + +# Plugin Configuration +plugins: + # Jupyter Notebook/Lab Service + jupyter: + enabled: true + image: "quay.io/jupyter/base-notebook:latest" + default_port: 8888 + mode: "lab" + security: + trusted_channels: + - "conda-forge" + - "defaults" + blocked_packages: + - "requests" + - "urllib3" + require_password: true + max_gpu_per_instance: 1 + max_memory_per_instance: "8Gi" + + # vLLM Inference Service + vllm: + enabled: true + image: "vllm/vllm-openai:latest" + default_port: 8000 + model_cache: "/models" + default_quantization: "" + max_gpu_per_instance: 1 + max_model_len: 4096