config: add Plugin GPU Quota, plugins, and audit logging to configs

- Add Plugin GPU Quota config section to scheduler.yaml.example - Add audit logging config to homelab-secure.yaml (HIPAA-compliant) - Add Jupyter and vLLM plugin configs to all worker configs: - Security settings (passwords, trusted channels, blocked packages) - Resource limits (GPU, memory, CPU) - Model cache paths and quantization options for vLLM - Disable plugins in HIPAA deployment mode for compliance - Update deployments README with plugin services and GPU quotas
2026-02-26 14:34:42 -05:00 · 2026-02-26 14:34:42 -05:00 · b3a0c78903
commit b3a0c78903
parent 90ea18555c
12 changed files with 359 additions and 2 deletions
--- a/configs/api/homelab-secure.yaml
+++ b/configs/api/homelab-secure.yaml
@ -62,7 +62,26 @@ database:
 logging:
  level: "info"
  file: "/logs/fetch_ml.log"
-  audit_log: ""
+  # Audit logging (HIPAA-compliant with tamper-evident chain hashing)
+  audit:
+    enabled: true
+    file: "/var/log/fetch_ml/audit.log"  # Separate file for audit events
+    chain_hashing: true                   # Enable tamper-evident logging
+    retention_days: 2555                  # 7 years for HIPAA compliance
+    log_ip_address: true                  # Include source IP in audit events
+    log_user_agent: true                  # Include user agent in audit events
+    # Sensitive events to always log
+    events:
+      - "authentication_success"
+      - "authentication_failure"
+      - "file_access"
+      - "file_write"
+      - "file_delete"
+      - "job_queued"
+      - "job_started"
+      - "job_completed"
+      - "experiment_created"
+      - "experiment_deleted"

 resources:
  max_workers: 1
--- a/configs/scheduler/scheduler.yaml.example
+++ b/configs/scheduler/scheduler.yaml.example
@ -30,3 +30,30 @@ scheduler:
      token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
    - id: "worker-02"
      token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
+
+  # Plugin GPU Quota Configuration
+  # Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.)
+  plugin_quota:
+    enabled: false              # Enable quota enforcement (default: false)
+    total_gpus: 16              # Global GPU limit across all plugins (0 = unlimited)
+    per_user_gpus: 4            # Default per-user GPU limit (0 = unlimited)
+    per_user_services: 2        # Default per-user service count limit (0 = unlimited)
+    
+    # Plugin-specific limits (optional)
+    per_plugin_limits:
+      vllm:
+        max_gpus: 8             # Max GPUs for vLLM across all users
+        max_services: 4         # Max vLLM service instances
+      jupyter:
+        max_gpus: 4             # Max GPUs for Jupyter across all users
+        max_services: 10        # Max Jupyter service instances
+    
+    # Per-user overrides (optional)
+    user_overrides:
+      admin:
+        max_gpus: 8             # Admin gets more GPUs
+        max_services: 5         # Admin can run more services
+        allowed_plugins: ["jupyter", "vllm"]  # Restrict which plugins user can use
+      researcher1:
+        max_gpus: 2             # Limited GPU access
+        max_services: 1         # Single service limit
--- a/configs/worker/dev-local.yaml
+++ b/configs/worker/dev-local.yaml
@ -48,6 +48,39 @@ queue:
  native:
    data_dir: "data/dev/queue"

+# Plugin Configuration (for local development)
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"
+    # Security settings
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+      blocked_packages: []  # Less restrictive for local dev
+      require_password: false  # No password for local dev
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "4Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    # Model cache location
+    model_cache: "data/dev/models"
+    # Supported quantization methods: awq, gptq, fp8, squeezellm
+    default_quantization: ""  # No quantization for dev (better quality)
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_model_len: 2048
+    tensor_parallel_size: 1
+
 task_lease_duration: "30m"
 heartbeat_interval: "1m"
 max_retries: 3
--- a/configs/worker/docker-dev.yaml
+++ b/configs/worker/docker-dev.yaml
@ -50,7 +50,40 @@ resources:
 metrics:
  enabled: true
  listen_addr: ":9100"
-metrics_flush_interval: "500ms"
+  metrics_flush_interval: "500ms"
+
+# Plugin Configuration
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"
+    # Security settings
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+      blocked_packages: []  # Dev environment - less restrictive
+      require_password: false  # No password for dev
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "4Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    # Model cache location
+    model_cache: "/models"
+    # Supported quantization methods: awq, gptq, fp8, squeezellm
+    default_quantization: ""  # No quantization for dev
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_model_len: 2048
+    tensor_parallel_size: 1

 task_lease_duration: "30m"
 heartbeat_interval: "1m"
--- a/configs/worker/docker-prod.yaml
+++ b/configs/worker/docker-prod.yaml
@ -48,3 +48,42 @@ task_lease_duration: "30m"
 heartbeat_interval: "1m"
 max_retries: 3
 graceful_timeout: "5m"
+
+# Plugin Configuration
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    # Security settings
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+        - "pytorch"
+      blocked_packages:
+        - "requests"
+        - "urllib3"
+        - "httpx"
+      require_password: true
+    # Resource limits (enforced by scheduler quota system)
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "8Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    # Model cache location
+    model_cache: "/models"
+    # Supported quantization methods: awq, gptq, fp8, squeezellm
+    default_quantization: ""  # empty = no quantization
+    # Resource limits
+    max_gpu_per_instance: 4
+    max_model_len: 4096
+    # Environment variables passed to container
+    env:
+      - "HF_HOME=/models"
+      - "VLLM_WORKER_MULTIPROC_METHOD=spawn"
--- a/configs/worker/homelab-sandbox.yaml
+++ b/configs/worker/homelab-sandbox.yaml
@ -48,6 +48,46 @@ queue:
  backend: "redis"
  redis_url: "redis://localhost:6379/0"

+# Plugin Configuration
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"  # "lab" or "notebook"
+    # Security settings
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+        - "pytorch"
+        - "nvidia"
+      blocked_packages:
+        - "requests"
+        - "urllib3"
+        - "httpx"
+        - "socket"
+        - "subprocess"
+      require_password: true
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "16Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    # Model cache location (should be on fast storage)
+    model_cache: "/var/lib/fetchml/models"
+    # Supported quantization methods: awq, gptq, fp8, squeezellm
+    default_quantization: ""
+    # Resource limits
+    max_gpu_per_instance: 2
+    max_model_len: 4096
+    tensor_parallel_size: 1
+
 # Snapshot store (optional)
 snapshot_store:
  enabled: false
--- a/configs/worker/homelab-secure.yaml
+++ b/configs/worker/homelab-secure.yaml
@ -45,3 +45,42 @@ task_lease_duration: "30m"
 heartbeat_interval: "1m"
 max_retries: 3
 graceful_timeout: "5m"
+
+# Plugin Configuration
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"
+    # Security settings (strict for secure config)
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+      blocked_packages:
+        - "requests"
+        - "urllib3"
+        - "httpx"
+        - "socket"
+        - "subprocess"
+        - "os.system"
+      require_password: true
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "8Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    # Model cache location
+    model_cache: "/models"
+    # Supported quantization methods: awq, gptq, fp8, squeezellm
+    default_quantization: ""
+    # Resource limits
+    max_gpu_per_instance: 1
+    max_model_len: 4096
+    tensor_parallel_size: 1
--- a/configs/worker/worker-prod.toml
+++ b/configs/worker/worker-prod.toml
@ -45,3 +45,34 @@ podman_memory = "16g"
 [metrics]
 enabled = true
 listen_addr = ":9100"
+
+# Plugin Configuration
+[plugins]
+
+[plugins.jupyter]
+enabled = true
+image = "quay.io/jupyter/base-notebook:latest"
+default_port = 8888
+mode = "lab"
+max_gpu_per_instance = 1
+max_memory_per_instance = "8Gi"
+
+[plugins.jupyter.security]
+require_password = true
+trusted_channels = ["conda-forge", "defaults", "pytorch"]
+blocked_packages = ["requests", "urllib3", "httpx"]
+
+[plugins.vllm]
+enabled = true
+image = "vllm/vllm-openai:latest"
+default_port = 8000
+model_cache = "/models"
+default_quantization = ""  # Options: awq, gptq, fp8, squeezellm
+max_gpu_per_instance = 2
+max_model_len = 4096
+tensor_parallel_size = 1
+
+# Environment variables for vLLM
+[plugins.vllm.env]
+HF_HOME = "/models"
+VLLM_WORKER_MULTIPROC_METHOD = "spawn"
--- a/deployments/README.md
+++ b/deployments/README.md
@ -110,6 +110,36 @@ TLS_KEY_PATH=/app/ssl/key.pem
 | Prometheus | 9090 | - | - |
 | Grafana | 3000 | - | - |
 | Loki | 3100 | - | - |
+| JupyterLab | 8888* | 8888* | - |
+| vLLM | 8000* | 8000* | - |
+
+*Plugin service ports are dynamically allocated from the 8000-9000 range by the scheduler.
+
+## Plugin Services
+
+The deployment configurations include support for interactive ML services:
+
+### Jupyter Notebook/Lab
+- **Image**: `quay.io/jupyter/base-notebook:latest`
+- **Security**: Trusted channels (conda-forge, defaults), blocked packages (http clients)
+- **Resources**: Configurable GPU/memory limits
+- **Access**: Via scheduler-assigned port (8000-9000 range)
+
+### vLLM Inference
+- **Image**: `vllm/vllm-openai:latest`
+- **Features**: OpenAI-compatible API, quantization support (AWQ, GPTQ, FP8)
+- **Model Cache**: Configurable path for model storage
+- **Resources**: Multi-GPU tensor parallelism support
+
+## Scheduler GPU Quotas
+
+The scheduler supports GPU quota management for plugin services:
+- **Global Limit**: Total GPUs across all plugins
+- **Per-User Limits**: GPU and service count per user
+- **Per-Plugin Limits**: vLLM and Jupyter-specific limits
+- **User Overrides**: Special permissions for admins/researchers
+
+See `configs/scheduler/scheduler.yaml.example` for quota configuration.

 ## Monitoring

@ -122,3 +152,4 @@ TLS_KEY_PATH=/app/ssl/key.pem
 - If you need HTTPS externally, terminate TLS at a reverse proxy.
 - API keys should be managed via environment variables
 - Database credentials should use secrets management in production
+- **HIPAA deployments**: Plugins are disabled by default for compliance
--- a/deployments/configs/worker/docker-dev.yaml
+++ b/deployments/configs/worker/docker-dev.yaml
@ -29,3 +29,30 @@ max_artifact_total_bytes: 1073741824  # 1GB

 # Provenance (disabled in dev for speed)
 provenance_best_effort: false
+
+# Plugin Configuration (development mode)
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+      blocked_packages: []  # No restrictions in dev
+      require_password: false  # No password for dev
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "4Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    model_cache: "/tmp/models"  # Temp location for dev
+    default_quantization: ""  # No quantization for dev
+    max_gpu_per_instance: 1
+    max_model_len: 2048
--- a/deployments/configs/worker/docker-hipaa.yaml
+++ b/deployments/configs/worker/docker-hipaa.yaml
@ -51,3 +51,12 @@ ssh_key: ${SSH_KEY_PATH}

 # Config hash computation enabled (required for audit)
 # This is automatically computed by Validate()
+
+# Plugin Configuration (DISABLED for HIPAA compliance)
+# Jupyter and vLLM services are disabled in HIPAA mode to ensure
+# no unauthorized network access or data processing
+plugins:
+  jupyter:
+    enabled: false  # Disabled: HIPAA requires strict network isolation
+  vllm:
+    enabled: false  # Disabled: External model downloads violate PHI controls
--- a/deployments/configs/worker/docker-standard.yaml
+++ b/deployments/configs/worker/docker-standard.yaml
@ -33,3 +33,32 @@ max_artifact_total_bytes: 536870912  # 512MB

 # Provenance (enabled)
 provenance_best_effort: true
+
+# Plugin Configuration
+plugins:
+  # Jupyter Notebook/Lab Service
+  jupyter:
+    enabled: true
+    image: "quay.io/jupyter/base-notebook:latest"
+    default_port: 8888
+    mode: "lab"
+    security:
+      trusted_channels:
+        - "conda-forge"
+        - "defaults"
+      blocked_packages:
+        - "requests"
+        - "urllib3"
+      require_password: true
+    max_gpu_per_instance: 1
+    max_memory_per_instance: "8Gi"
+  
+  # vLLM Inference Service
+  vllm:
+    enabled: true
+    image: "vllm/vllm-openai:latest"
+    default_port: 8000
+    model_cache: "/models"
+    default_quantization: ""
+    max_gpu_per_instance: 1
+    max_model_len: 4096