config: add Plugin GPU Quota, plugins, and audit logging to configs

- Add Plugin GPU Quota config section to scheduler.yaml.example

- Add audit logging config to homelab-secure.yaml (HIPAA-compliant)

- Add Jupyter and vLLM plugin configs to all worker configs:

  - Security settings (passwords, trusted channels, blocked packages)

  - Resource limits (GPU, memory, CPU)

  - Model cache paths and quantization options for vLLM

- Disable plugins in HIPAA deployment mode for compliance

- Update deployments README with plugin services and GPU quotas
This commit is contained in:
Jeremie Fraeys 2026-02-26 14:34:42 -05:00
parent 90ea18555c
commit b3a0c78903
No known key found for this signature in database
12 changed files with 359 additions and 2 deletions

View file

@ -62,7 +62,26 @@ database:
logging:
level: "info"
file: "/logs/fetch_ml.log"
audit_log: ""
# Audit logging (HIPAA-compliant with tamper-evident chain hashing)
audit:
enabled: true
file: "/var/log/fetch_ml/audit.log" # Separate file for audit events
chain_hashing: true # Enable tamper-evident logging
retention_days: 2555 # 7 years for HIPAA compliance
log_ip_address: true # Include source IP in audit events
log_user_agent: true # Include user agent in audit events
# Sensitive events to always log
events:
- "authentication_success"
- "authentication_failure"
- "file_access"
- "file_write"
- "file_delete"
- "job_queued"
- "job_started"
- "job_completed"
- "experiment_created"
- "experiment_deleted"
resources:
max_workers: 1

View file

@ -30,3 +30,30 @@ scheduler:
token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
- id: "worker-02"
token: "wkr_PLACEHOLDER_GENERATE_WITH_OPENSSL_RAND_HEX_32"
# Plugin GPU Quota Configuration
# Controls GPU allocation for plugin-based services (Jupyter, vLLM, etc.)
plugin_quota:
enabled: false # Enable quota enforcement (default: false)
total_gpus: 16 # Global GPU limit across all plugins (0 = unlimited)
per_user_gpus: 4 # Default per-user GPU limit (0 = unlimited)
per_user_services: 2 # Default per-user service count limit (0 = unlimited)
# Plugin-specific limits (optional)
per_plugin_limits:
vllm:
max_gpus: 8 # Max GPUs for vLLM across all users
max_services: 4 # Max vLLM service instances
jupyter:
max_gpus: 4 # Max GPUs for Jupyter across all users
max_services: 10 # Max Jupyter service instances
# Per-user overrides (optional)
user_overrides:
admin:
max_gpus: 8 # Admin gets more GPUs
max_services: 5 # Admin can run more services
allowed_plugins: ["jupyter", "vllm"] # Restrict which plugins user can use
researcher1:
max_gpus: 2 # Limited GPU access
max_services: 1 # Single service limit

View file

@ -48,6 +48,39 @@ queue:
native:
data_dir: "data/dev/queue"
# Plugin Configuration (for local development)
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
# Security settings
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages: [] # Less restrictive for local dev
require_password: false # No password for local dev
# Resource limits
max_gpu_per_instance: 1
max_memory_per_instance: "4Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location
model_cache: "data/dev/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: "" # No quantization for dev (better quality)
# Resource limits
max_gpu_per_instance: 1
max_model_len: 2048
tensor_parallel_size: 1
task_lease_duration: "30m"
heartbeat_interval: "1m"
max_retries: 3

View file

@ -50,7 +50,40 @@ resources:
metrics:
enabled: true
listen_addr: ":9100"
metrics_flush_interval: "500ms"
metrics_flush_interval: "500ms"
# Plugin Configuration
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
# Security settings
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages: [] # Dev environment - less restrictive
require_password: false # No password for dev
# Resource limits
max_gpu_per_instance: 1
max_memory_per_instance: "4Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location
model_cache: "/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: "" # No quantization for dev
# Resource limits
max_gpu_per_instance: 1
max_model_len: 2048
tensor_parallel_size: 1
task_lease_duration: "30m"
heartbeat_interval: "1m"

View file

@ -48,3 +48,42 @@ task_lease_duration: "30m"
heartbeat_interval: "1m"
max_retries: 3
graceful_timeout: "5m"
# Plugin Configuration
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
# Security settings
security:
trusted_channels:
- "conda-forge"
- "defaults"
- "pytorch"
blocked_packages:
- "requests"
- "urllib3"
- "httpx"
require_password: true
# Resource limits (enforced by scheduler quota system)
max_gpu_per_instance: 1
max_memory_per_instance: "8Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location
model_cache: "/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: "" # empty = no quantization
# Resource limits
max_gpu_per_instance: 4
max_model_len: 4096
# Environment variables passed to container
env:
- "HF_HOME=/models"
- "VLLM_WORKER_MULTIPROC_METHOD=spawn"

View file

@ -48,6 +48,46 @@ queue:
backend: "redis"
redis_url: "redis://localhost:6379/0"
# Plugin Configuration
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab" # "lab" or "notebook"
# Security settings
security:
trusted_channels:
- "conda-forge"
- "defaults"
- "pytorch"
- "nvidia"
blocked_packages:
- "requests"
- "urllib3"
- "httpx"
- "socket"
- "subprocess"
require_password: true
# Resource limits
max_gpu_per_instance: 1
max_memory_per_instance: "16Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location (should be on fast storage)
model_cache: "/var/lib/fetchml/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: ""
# Resource limits
max_gpu_per_instance: 2
max_model_len: 4096
tensor_parallel_size: 1
# Snapshot store (optional)
snapshot_store:
enabled: false

View file

@ -45,3 +45,42 @@ task_lease_duration: "30m"
heartbeat_interval: "1m"
max_retries: 3
graceful_timeout: "5m"
# Plugin Configuration
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
# Security settings (strict for secure config)
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages:
- "requests"
- "urllib3"
- "httpx"
- "socket"
- "subprocess"
- "os.system"
require_password: true
# Resource limits
max_gpu_per_instance: 1
max_memory_per_instance: "8Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
# Model cache location
model_cache: "/models"
# Supported quantization methods: awq, gptq, fp8, squeezellm
default_quantization: ""
# Resource limits
max_gpu_per_instance: 1
max_model_len: 4096
tensor_parallel_size: 1

View file

@ -45,3 +45,34 @@ podman_memory = "16g"
[metrics]
enabled = true
listen_addr = ":9100"
# Plugin Configuration
[plugins]
[plugins.jupyter]
enabled = true
image = "quay.io/jupyter/base-notebook:latest"
default_port = 8888
mode = "lab"
max_gpu_per_instance = 1
max_memory_per_instance = "8Gi"
[plugins.jupyter.security]
require_password = true
trusted_channels = ["conda-forge", "defaults", "pytorch"]
blocked_packages = ["requests", "urllib3", "httpx"]
[plugins.vllm]
enabled = true
image = "vllm/vllm-openai:latest"
default_port = 8000
model_cache = "/models"
default_quantization = "" # Options: awq, gptq, fp8, squeezellm
max_gpu_per_instance = 2
max_model_len = 4096
tensor_parallel_size = 1
# Environment variables for vLLM
[plugins.vllm.env]
HF_HOME = "/models"
VLLM_WORKER_MULTIPROC_METHOD = "spawn"

View file

@ -110,6 +110,36 @@ TLS_KEY_PATH=/app/ssl/key.pem
| Prometheus | 9090 | - | - |
| Grafana | 3000 | - | - |
| Loki | 3100 | - | - |
| JupyterLab | 8888* | 8888* | - |
| vLLM | 8000* | 8000* | - |
*Plugin service ports are dynamically allocated from the 8000-9000 range by the scheduler.
## Plugin Services
The deployment configurations include support for interactive ML services:
### Jupyter Notebook/Lab
- **Image**: `quay.io/jupyter/base-notebook:latest`
- **Security**: Trusted channels (conda-forge, defaults), blocked packages (http clients)
- **Resources**: Configurable GPU/memory limits
- **Access**: Via scheduler-assigned port (8000-9000 range)
### vLLM Inference
- **Image**: `vllm/vllm-openai:latest`
- **Features**: OpenAI-compatible API, quantization support (AWQ, GPTQ, FP8)
- **Model Cache**: Configurable path for model storage
- **Resources**: Multi-GPU tensor parallelism support
## Scheduler GPU Quotas
The scheduler supports GPU quota management for plugin services:
- **Global Limit**: Total GPUs across all plugins
- **Per-User Limits**: GPU and service count per user
- **Per-Plugin Limits**: vLLM and Jupyter-specific limits
- **User Overrides**: Special permissions for admins/researchers
See `configs/scheduler/scheduler.yaml.example` for quota configuration.
## Monitoring
@ -122,3 +152,4 @@ TLS_KEY_PATH=/app/ssl/key.pem
- If you need HTTPS externally, terminate TLS at a reverse proxy.
- API keys should be managed via environment variables
- Database credentials should use secrets management in production
- **HIPAA deployments**: Plugins are disabled by default for compliance

View file

@ -29,3 +29,30 @@ max_artifact_total_bytes: 1073741824 # 1GB
# Provenance (disabled in dev for speed)
provenance_best_effort: false
# Plugin Configuration (development mode)
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages: [] # No restrictions in dev
require_password: false # No password for dev
max_gpu_per_instance: 1
max_memory_per_instance: "4Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
model_cache: "/tmp/models" # Temp location for dev
default_quantization: "" # No quantization for dev
max_gpu_per_instance: 1
max_model_len: 2048

View file

@ -51,3 +51,12 @@ ssh_key: ${SSH_KEY_PATH}
# Config hash computation enabled (required for audit)
# This is automatically computed by Validate()
# Plugin Configuration (DISABLED for HIPAA compliance)
# Jupyter and vLLM services are disabled in HIPAA mode to ensure
# no unauthorized network access or data processing
plugins:
jupyter:
enabled: false # Disabled: HIPAA requires strict network isolation
vllm:
enabled: false # Disabled: External model downloads violate PHI controls

View file

@ -33,3 +33,32 @@ max_artifact_total_bytes: 536870912 # 512MB
# Provenance (enabled)
provenance_best_effort: true
# Plugin Configuration
plugins:
# Jupyter Notebook/Lab Service
jupyter:
enabled: true
image: "quay.io/jupyter/base-notebook:latest"
default_port: 8888
mode: "lab"
security:
trusted_channels:
- "conda-forge"
- "defaults"
blocked_packages:
- "requests"
- "urllib3"
require_password: true
max_gpu_per_instance: 1
max_memory_per_instance: "8Gi"
# vLLM Inference Service
vllm:
enabled: true
image: "vllm/vllm-openai:latest"
default_port: 8000
model_cache: "/models"
default_quantization: ""
max_gpu_per_instance: 1
max_model_len: 4096