Enable ML tools integration for data scientists

- Add MLflow, WandB, Streamlit, Dash, Panel, Bokeh to environment.yml
- Update security policy to allow network access for ML tools
- Modify secure_runner.py to check tool permissions
- Add test script and usage guide
- Enable localhost network access for dashboard tools
This commit is contained in:
Jeremie Fraeys 2025-12-06 15:49:21 -05:00
parent 69dc9e6af4
commit 3178cdf575
5 changed files with 169 additions and 5 deletions

68
podman/ML_TOOLS_GUIDE.md Normal file
View file

@ -0,0 +1,68 @@
# ML Tools Integration Guide
Data scientists can now use their preferred ML tools securely!
## Available Tools
- **MLflow** - Experiment tracking and model registry
- **Weights & Biases** - Experiment tracking and visualization
- **Streamlit** - Interactive web apps
- **Dash** - Plotly-based web dashboards
- **Panel** - Data apps and dashboards
- **Bokeh** - Interactive visualizations
## Quick Start
### 1. Test Tools Available
```bash
cd podman
python test_ml_tools.py
```
### 2. Use MLflow
```python
import mlflow
# Start tracking
with mlflow.start_run():
mlflow.log_param("epochs", 10)
mlflow.log_metric("accuracy", 0.95)
```
### 3. Launch Streamlit App
```bash
# In your secure container
streamlit run my_app.py --server.port 8501
```
### 4. Use Dash Dashboard
```python
import dash
import dash_core_components as dcc
import dash_html_components as html
app = dash.Dash(__name__)
app.run_server(debug=True, host='0.0.0.0', port=8050)
```
## Security Features
- Network access limited to localhost only
- Tools pre-approved in security policy
- Container isolation maintained
- No external internet access
## Custom Requirements
Add your own tools via `requirements.txt`:
```
mlflow==2.7.0
wandb==0.16.0
streamlit==1.28.0
```
## Access URLs
- Streamlit: http://localhost:8501
- Dash: http://localhost:8050
- MLflow UI: http://localhost:5000

View file

@ -35,3 +35,14 @@ dependencies:
- statsmodels>=0.13.0
- plotly>=5.0.0
- dash>=2.0.0
# ML Experiment Tracking
- mlflow>=2.0.0
- wandb>=0.13.0
# Dashboard & Visualization
- streamlit>=1.20.0
- panel>=1.0.0
- bokeh>=3.0.0
# Data Science Tools
- dvc>=3.0.0
- optuna>=3.0.0
- hyperopt>=0.2.0

View file

@ -60,21 +60,32 @@ class SecurityPolicy:
def check_package_safety(self, package_name: str) -> bool:
"""Check if a package is allowed"""
# Always allow ML tools even if they might be in blocked list
allowed_tools = self.policy.get("allowed_network_tools", [])
if package_name in allowed_tools:
return True
if package_name in self.policy.get("blocked_packages", []):
return False
return True
def check_network_access(self, domain: str | None) -> bool:
def check_network_access(self, domain: str | None = None) -> bool:
"""Check if network access is allowed"""
if not self.policy.get("allow_network", False):
return False
# Check if domain is in whitelist
if domain:
allowed_domains = self.policy.get("allowed_domains", [])
return domain in allowed_domains
whitelist = self.policy.get("network_whitelist", [])
return any(allowed in domain for allowed in whitelist)
return True
def check_tool_allowed(self, tool_name: str) -> bool:
"""Check if a specific tool is allowed network access"""
allowed_tools = self.policy.get("allowed_network_tools", [])
return tool_name in allowed_tools
class CondaRunner:
"""Secure experiment runner with Conda + Mamba"""

View file

@ -1,5 +1,18 @@
{
"allow_network": false,
"allow_network": true,
"allowed_network_tools": [
"mlflow",
"wandb",
"streamlit",
"dash",
"panel",
"bokeh"
],
"network_whitelist": [
"localhost",
"127.0.0.1",
"0.0.0.0"
],
"blocked_packages": [
"requests",
"urllib3",

61
podman/test_ml_tools.py Normal file
View file

@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""
Test script to verify ML tools integration works
"""
import subprocess
import sys
import os
def test_tool_import(tool_name):
"""Test if a tool can be imported"""
try:
if tool_name == "mlflow":
import mlflow
print(f"{tool_name}: {mlflow.__version__}")
elif tool_name == "wandb":
import wandb
print(f"{tool_name}: {wandb.__version__}")
elif tool_name == "streamlit":
import streamlit
print(f"{tool_name}: {streamlit.__version__}")
elif tool_name == "dash":
import dash
print(f"{tool_name}: {dash.__version__}")
elif tool_name == "panel":
import panel
print(f"{tool_name}: {panel.__version__}")
elif tool_name == "bokeh":
import bokeh
print(f"{tool_name}: {bokeh.__version__}")
else:
print(f"{tool_name}: Unknown tool")
return True
except ImportError as e:
print(f"{tool_name}: {e}")
return False
def main():
print("🧪 Testing ML Tools Integration")
print("=" * 40)
tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"]
results = []
for tool in tools:
results.append(test_tool_import(tool))
print("\n" + "=" * 40)
success_count = sum(results)
total_count = len(results)
print(f"📊 Results: {success_count}/{total_count} tools available")
if success_count == total_count:
print("🎉 All ML tools are ready to use!")
return 0
else:
print("⚠️ Some tools are missing. Check environment.yml")
return 1
if __name__ == "__main__":
sys.exit(main())