diff --git a/podman/ML_TOOLS_GUIDE.md b/podman/ML_TOOLS_GUIDE.md new file mode 100644 index 0000000..d856346 --- /dev/null +++ b/podman/ML_TOOLS_GUIDE.md @@ -0,0 +1,68 @@ +# ML Tools Integration Guide + +Data scientists can now use their preferred ML tools securely! + +## Available Tools + +- **MLflow** - Experiment tracking and model registry +- **Weights & Biases** - Experiment tracking and visualization +- **Streamlit** - Interactive web apps +- **Dash** - Plotly-based web dashboards +- **Panel** - Data apps and dashboards +- **Bokeh** - Interactive visualizations + +## Quick Start + +### 1. Test Tools Available +```bash +cd podman +python test_ml_tools.py +``` + +### 2. Use MLflow +```python +import mlflow + +# Start tracking +with mlflow.start_run(): + mlflow.log_param("epochs", 10) + mlflow.log_metric("accuracy", 0.95) +``` + +### 3. Launch Streamlit App +```bash +# In your secure container +streamlit run my_app.py --server.port 8501 +``` + +### 4. Use Dash Dashboard +```python +import dash +import dash_core_components as dcc +import dash_html_components as html + +app = dash.Dash(__name__) +app.run_server(debug=True, host='0.0.0.0', port=8050) +``` + +## Security Features + +- Network access limited to localhost only +- Tools pre-approved in security policy +- Container isolation maintained +- No external internet access + +## Custom Requirements + +Add your own tools via `requirements.txt`: +``` +mlflow==2.7.0 +wandb==0.16.0 +streamlit==1.28.0 +``` + +## Access URLs + +- Streamlit: http://localhost:8501 +- Dash: http://localhost:8050 +- MLflow UI: http://localhost:5000 diff --git a/podman/environment.yml b/podman/environment.yml index 4fbf1e5..f920f13 100644 --- a/podman/environment.yml +++ b/podman/environment.yml @@ -35,3 +35,14 @@ dependencies: - statsmodels>=0.13.0 - plotly>=5.0.0 - dash>=2.0.0 + # ML Experiment Tracking + - mlflow>=2.0.0 + - wandb>=0.13.0 + # Dashboard & Visualization + - streamlit>=1.20.0 + - panel>=1.0.0 + - bokeh>=3.0.0 + # Data Science Tools + - dvc>=3.0.0 + - optuna>=3.0.0 + - hyperopt>=0.2.0 diff --git a/podman/secure_runner.py b/podman/secure_runner.py index 37e8b68..d78cc93 100644 --- a/podman/secure_runner.py +++ b/podman/secure_runner.py @@ -60,21 +60,32 @@ class SecurityPolicy: def check_package_safety(self, package_name: str) -> bool: """Check if a package is allowed""" + # Always allow ML tools even if they might be in blocked list + allowed_tools = self.policy.get("allowed_network_tools", []) + if package_name in allowed_tools: + return True + if package_name in self.policy.get("blocked_packages", []): return False return True - def check_network_access(self, domain: str | None) -> bool: + def check_network_access(self, domain: str | None = None) -> bool: """Check if network access is allowed""" if not self.policy.get("allow_network", False): return False + # Check if domain is in whitelist if domain: - allowed_domains = self.policy.get("allowed_domains", []) - return domain in allowed_domains - + whitelist = self.policy.get("network_whitelist", []) + return any(allowed in domain for allowed in whitelist) + return True + def check_tool_allowed(self, tool_name: str) -> bool: + """Check if a specific tool is allowed network access""" + allowed_tools = self.policy.get("allowed_network_tools", []) + return tool_name in allowed_tools + class CondaRunner: """Secure experiment runner with Conda + Mamba""" diff --git a/podman/security_policy.json b/podman/security_policy.json index 47db848..da9c3de 100644 --- a/podman/security_policy.json +++ b/podman/security_policy.json @@ -1,5 +1,18 @@ { - "allow_network": false, + "allow_network": true, + "allowed_network_tools": [ + "mlflow", + "wandb", + "streamlit", + "dash", + "panel", + "bokeh" + ], + "network_whitelist": [ + "localhost", + "127.0.0.1", + "0.0.0.0" + ], "blocked_packages": [ "requests", "urllib3", diff --git a/podman/test_ml_tools.py b/podman/test_ml_tools.py new file mode 100644 index 0000000..bdb80cb --- /dev/null +++ b/podman/test_ml_tools.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +Test script to verify ML tools integration works +""" +import subprocess +import sys +import os + +def test_tool_import(tool_name): + """Test if a tool can be imported""" + try: + if tool_name == "mlflow": + import mlflow + print(f"✅ {tool_name}: {mlflow.__version__}") + elif tool_name == "wandb": + import wandb + print(f"✅ {tool_name}: {wandb.__version__}") + elif tool_name == "streamlit": + import streamlit + print(f"✅ {tool_name}: {streamlit.__version__}") + elif tool_name == "dash": + import dash + print(f"✅ {tool_name}: {dash.__version__}") + elif tool_name == "panel": + import panel + print(f"✅ {tool_name}: {panel.__version__}") + elif tool_name == "bokeh": + import bokeh + print(f"✅ {tool_name}: {bokeh.__version__}") + else: + print(f"❓ {tool_name}: Unknown tool") + return True + except ImportError as e: + print(f"❌ {tool_name}: {e}") + return False + +def main(): + print("🧪 Testing ML Tools Integration") + print("=" * 40) + + tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"] + + results = [] + for tool in tools: + results.append(test_tool_import(tool)) + + print("\n" + "=" * 40) + success_count = sum(results) + total_count = len(results) + + print(f"📊 Results: {success_count}/{total_count} tools available") + + if success_count == total_count: + print("🎉 All ML tools are ready to use!") + return 0 + else: + print("⚠️ Some tools are missing. Check environment.yml") + return 1 + +if __name__ == "__main__": + sys.exit(main())