Enable ML tools integration for data scientists
- Add MLflow, WandB, Streamlit, Dash, Panel, Bokeh to environment.yml - Update security policy to allow network access for ML tools - Modify secure_runner.py to check tool permissions - Add test script and usage guide - Enable localhost network access for dashboard tools
This commit is contained in:
parent
69dc9e6af4
commit
3178cdf575
5 changed files with 169 additions and 5 deletions
68
podman/ML_TOOLS_GUIDE.md
Normal file
68
podman/ML_TOOLS_GUIDE.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# ML Tools Integration Guide
|
||||
|
||||
Data scientists can now use their preferred ML tools securely!
|
||||
|
||||
## Available Tools
|
||||
|
||||
- **MLflow** - Experiment tracking and model registry
|
||||
- **Weights & Biases** - Experiment tracking and visualization
|
||||
- **Streamlit** - Interactive web apps
|
||||
- **Dash** - Plotly-based web dashboards
|
||||
- **Panel** - Data apps and dashboards
|
||||
- **Bokeh** - Interactive visualizations
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Test Tools Available
|
||||
```bash
|
||||
cd podman
|
||||
python test_ml_tools.py
|
||||
```
|
||||
|
||||
### 2. Use MLflow
|
||||
```python
|
||||
import mlflow
|
||||
|
||||
# Start tracking
|
||||
with mlflow.start_run():
|
||||
mlflow.log_param("epochs", 10)
|
||||
mlflow.log_metric("accuracy", 0.95)
|
||||
```
|
||||
|
||||
### 3. Launch Streamlit App
|
||||
```bash
|
||||
# In your secure container
|
||||
streamlit run my_app.py --server.port 8501
|
||||
```
|
||||
|
||||
### 4. Use Dash Dashboard
|
||||
```python
|
||||
import dash
|
||||
import dash_core_components as dcc
|
||||
import dash_html_components as html
|
||||
|
||||
app = dash.Dash(__name__)
|
||||
app.run_server(debug=True, host='0.0.0.0', port=8050)
|
||||
```
|
||||
|
||||
## Security Features
|
||||
|
||||
- Network access limited to localhost only
|
||||
- Tools pre-approved in security policy
|
||||
- Container isolation maintained
|
||||
- No external internet access
|
||||
|
||||
## Custom Requirements
|
||||
|
||||
Add your own tools via `requirements.txt`:
|
||||
```
|
||||
mlflow==2.7.0
|
||||
wandb==0.16.0
|
||||
streamlit==1.28.0
|
||||
```
|
||||
|
||||
## Access URLs
|
||||
|
||||
- Streamlit: http://localhost:8501
|
||||
- Dash: http://localhost:8050
|
||||
- MLflow UI: http://localhost:5000
|
||||
|
|
@ -35,3 +35,14 @@ dependencies:
|
|||
- statsmodels>=0.13.0
|
||||
- plotly>=5.0.0
|
||||
- dash>=2.0.0
|
||||
# ML Experiment Tracking
|
||||
- mlflow>=2.0.0
|
||||
- wandb>=0.13.0
|
||||
# Dashboard & Visualization
|
||||
- streamlit>=1.20.0
|
||||
- panel>=1.0.0
|
||||
- bokeh>=3.0.0
|
||||
# Data Science Tools
|
||||
- dvc>=3.0.0
|
||||
- optuna>=3.0.0
|
||||
- hyperopt>=0.2.0
|
||||
|
|
|
|||
|
|
@ -60,21 +60,32 @@ class SecurityPolicy:
|
|||
|
||||
def check_package_safety(self, package_name: str) -> bool:
|
||||
"""Check if a package is allowed"""
|
||||
# Always allow ML tools even if they might be in blocked list
|
||||
allowed_tools = self.policy.get("allowed_network_tools", [])
|
||||
if package_name in allowed_tools:
|
||||
return True
|
||||
|
||||
if package_name in self.policy.get("blocked_packages", []):
|
||||
return False
|
||||
return True
|
||||
|
||||
def check_network_access(self, domain: str | None) -> bool:
|
||||
def check_network_access(self, domain: str | None = None) -> bool:
|
||||
"""Check if network access is allowed"""
|
||||
if not self.policy.get("allow_network", False):
|
||||
return False
|
||||
|
||||
# Check if domain is in whitelist
|
||||
if domain:
|
||||
allowed_domains = self.policy.get("allowed_domains", [])
|
||||
return domain in allowed_domains
|
||||
|
||||
whitelist = self.policy.get("network_whitelist", [])
|
||||
return any(allowed in domain for allowed in whitelist)
|
||||
|
||||
return True
|
||||
|
||||
def check_tool_allowed(self, tool_name: str) -> bool:
|
||||
"""Check if a specific tool is allowed network access"""
|
||||
allowed_tools = self.policy.get("allowed_network_tools", [])
|
||||
return tool_name in allowed_tools
|
||||
|
||||
|
||||
class CondaRunner:
|
||||
"""Secure experiment runner with Conda + Mamba"""
|
||||
|
|
|
|||
|
|
@ -1,5 +1,18 @@
|
|||
{
|
||||
"allow_network": false,
|
||||
"allow_network": true,
|
||||
"allowed_network_tools": [
|
||||
"mlflow",
|
||||
"wandb",
|
||||
"streamlit",
|
||||
"dash",
|
||||
"panel",
|
||||
"bokeh"
|
||||
],
|
||||
"network_whitelist": [
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"0.0.0.0"
|
||||
],
|
||||
"blocked_packages": [
|
||||
"requests",
|
||||
"urllib3",
|
||||
|
|
|
|||
61
podman/test_ml_tools.py
Normal file
61
podman/test_ml_tools.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify ML tools integration works
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
def test_tool_import(tool_name):
|
||||
"""Test if a tool can be imported"""
|
||||
try:
|
||||
if tool_name == "mlflow":
|
||||
import mlflow
|
||||
print(f"✅ {tool_name}: {mlflow.__version__}")
|
||||
elif tool_name == "wandb":
|
||||
import wandb
|
||||
print(f"✅ {tool_name}: {wandb.__version__}")
|
||||
elif tool_name == "streamlit":
|
||||
import streamlit
|
||||
print(f"✅ {tool_name}: {streamlit.__version__}")
|
||||
elif tool_name == "dash":
|
||||
import dash
|
||||
print(f"✅ {tool_name}: {dash.__version__}")
|
||||
elif tool_name == "panel":
|
||||
import panel
|
||||
print(f"✅ {tool_name}: {panel.__version__}")
|
||||
elif tool_name == "bokeh":
|
||||
import bokeh
|
||||
print(f"✅ {tool_name}: {bokeh.__version__}")
|
||||
else:
|
||||
print(f"❓ {tool_name}: Unknown tool")
|
||||
return True
|
||||
except ImportError as e:
|
||||
print(f"❌ {tool_name}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
print("🧪 Testing ML Tools Integration")
|
||||
print("=" * 40)
|
||||
|
||||
tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"]
|
||||
|
||||
results = []
|
||||
for tool in tools:
|
||||
results.append(test_tool_import(tool))
|
||||
|
||||
print("\n" + "=" * 40)
|
||||
success_count = sum(results)
|
||||
total_count = len(results)
|
||||
|
||||
print(f"📊 Results: {success_count}/{total_count} tools available")
|
||||
|
||||
if success_count == total_count:
|
||||
print("🎉 All ML tools are ready to use!")
|
||||
return 0
|
||||
else:
|
||||
print("⚠️ Some tools are missing. Check environment.yml")
|
||||
return 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Reference in a new issue