chore: cleanup and miscellaneous updates
- .gitignore: Add reports/ and .api-keys - examples/jupyter_experiment_integration.py: Update for new API - podman/scripts/: CLI integration, secure runner, ML tool testing - tools/: Performance regression detector, profiler utilities
This commit is contained in:
parent
c74e91dd69
commit
7eee31d721
7 changed files with 130 additions and 94 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -280,14 +280,16 @@ db/*.db-shm
|
|||
db/*.db-wal
|
||||
db/*.db
|
||||
|
||||
# Security files
|
||||
.api-keys
|
||||
# Security scan reports
|
||||
reports/
|
||||
.env.secure
|
||||
.env.dev
|
||||
ssl/
|
||||
*.pem
|
||||
*.key
|
||||
|
||||
.api-keys
|
||||
|
||||
# AI assitant files
|
||||
AGENTS.md
|
||||
.windsurf/*
|
||||
|
|
|
|||
|
|
@ -5,13 +5,14 @@ This script shows how to use the FetchML CLI to manage Jupyter workspaces
|
|||
linked with experiments.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
def run_command(cmd, capture_output=True):
|
||||
|
||||
def run_command(cmd: str, capture_output: bool = True) -> subprocess.CompletedProcess:
|
||||
"""Run a shell command and return the result."""
|
||||
print(f"Running: {cmd}")
|
||||
result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
|
||||
|
|
@ -21,18 +22,21 @@ def run_command(cmd, capture_output=True):
|
|||
print(f"Error: {result.stderr}")
|
||||
return result
|
||||
|
||||
def create_sample_workspace(workspace_path):
|
||||
|
||||
def create_sample_workspace(workspace_path: str) -> None:
|
||||
"""Create a sample Jupyter workspace with notebooks and scripts."""
|
||||
workspace = Path(workspace_path)
|
||||
workspace.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
# Create a simple notebook
|
||||
notebook_content = {
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
|
||||
"source": [
|
||||
"# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."
|
||||
],
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
|
|
@ -69,29 +73,26 @@ def create_sample_workspace(workspace_path):
|
|||
" mlflow.log_metric('accuracy', accuracy)\n",
|
||||
" \n",
|
||||
" print(f'Accuracy: {accuracy:.4f}')\n",
|
||||
" print(f'Run ID: {run.info.run_id}')"
|
||||
]
|
||||
}
|
||||
" print(f'Run ID: {run.info.run_id}')",
|
||||
],
|
||||
},
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python3",
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.8.0"
|
||||
}
|
||||
"language_info": {"name": "python", "version": "3.8.0"},
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
"nbformat_minor": 4,
|
||||
}
|
||||
|
||||
|
||||
notebook_path = workspace / "experiment_demo.ipynb"
|
||||
with open(notebook_path, 'w') as f:
|
||||
with open(notebook_path, "w") as f:
|
||||
json.dump(notebook_content, f, indent=2)
|
||||
|
||||
|
||||
# Create a Python script for queue execution
|
||||
script_content = '''#!/usr/bin/env python3
|
||||
"""
|
||||
|
|
@ -113,130 +114,140 @@ def main():
|
|||
parser.add_argument('--experiment-id', help='Experiment ID to log to')
|
||||
parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
print(f"Starting experiment: {args.run_name}")
|
||||
if args.experiment_id:
|
||||
print(f"Linked to experiment: {args.experiment_id}")
|
||||
|
||||
|
||||
# Generate sample data
|
||||
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
|
||||
|
||||
# Train model with MLflow tracking
|
||||
with mlflow.start_run(run_name=args.run_name) as run:
|
||||
# Log parameters
|
||||
mlflow.log_param('model_type', 'RandomForest')
|
||||
mlflow.log_param('n_estimators', 100)
|
||||
mlflow.log_param('data_samples', len(X))
|
||||
|
||||
|
||||
# Train model
|
||||
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
|
||||
# Make predictions
|
||||
y_pred = model.predict(X_test)
|
||||
accuracy = accuracy_score(y_test, y_pred)
|
||||
|
||||
|
||||
# Log metrics
|
||||
mlflow.log_metric('accuracy', accuracy)
|
||||
mlflow.log_metric('train_samples', len(X_train))
|
||||
mlflow.log_metric('test_samples', len(X_test))
|
||||
|
||||
|
||||
print(f'Accuracy: {accuracy:.4f}')
|
||||
print(f'Run ID: {run.info.run_id}')
|
||||
|
||||
|
||||
# Log model
|
||||
mlflow.sklearn.log_model(model, "model")
|
||||
|
||||
|
||||
print("Experiment completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
'''
|
||||
|
||||
|
||||
script_path = workspace / "run_experiment.py"
|
||||
with open(script_path, 'w') as f:
|
||||
with open(script_path, "w") as f:
|
||||
f.write(script_content)
|
||||
|
||||
|
||||
# Make script executable
|
||||
os.chmod(script_path, 0o755)
|
||||
|
||||
|
||||
# Create requirements.txt
|
||||
requirements = """mlflow>=1.20.0
|
||||
scikit-learn>=1.0.0
|
||||
numpy>=1.20.0
|
||||
pandas>=1.3.0"""
|
||||
|
||||
|
||||
req_path = workspace / "requirements.txt"
|
||||
with open(req_path, 'w') as f:
|
||||
with open(req_path, "w") as f:
|
||||
f.write(requirements)
|
||||
|
||||
|
||||
print(f"Created sample workspace at: {workspace_path}")
|
||||
print(f" - Notebook: {notebook_path}")
|
||||
print(f" - Script: {script_path}")
|
||||
print(f" - Requirements: {req_path}")
|
||||
|
||||
def main():
|
||||
|
||||
def main() -> None:
|
||||
"""Main demonstration function."""
|
||||
print("=== FetchML Jupyter-Experiment Integration Demo ===\n")
|
||||
|
||||
|
||||
# Create sample workspace
|
||||
workspace_path = "./demo_workspace"
|
||||
create_sample_workspace(workspace_path)
|
||||
|
||||
|
||||
print("\n1. Starting Jupyter service...")
|
||||
# Start Jupyter service
|
||||
result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
|
||||
if result.returncode != 0:
|
||||
print("Failed to start Jupyter service")
|
||||
return
|
||||
|
||||
|
||||
print("\n2. Creating experiment...")
|
||||
# Create a new experiment
|
||||
experiment_id = f"jupyter_demo_{int(time.time())}"
|
||||
print(f"Experiment ID: {experiment_id}")
|
||||
|
||||
|
||||
print("\n3. Linking workspace with experiment...")
|
||||
# Link workspace with experiment
|
||||
link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
|
||||
link_result = run_command(
|
||||
f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}"
|
||||
)
|
||||
if link_result.returncode != 0:
|
||||
print("Failed to link workspace with experiment")
|
||||
return
|
||||
|
||||
|
||||
print("\n4. Checking experiment status...")
|
||||
# Check experiment status
|
||||
status_result = run_command(f"ml jupyter experiment status {workspace_path}")
|
||||
|
||||
|
||||
print("\n5. Queuing experiment from workspace...")
|
||||
# Queue experiment from workspace
|
||||
queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
|
||||
queue_result = run_command(
|
||||
f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run"
|
||||
)
|
||||
if queue_result.returncode != 0:
|
||||
print("Failed to queue experiment")
|
||||
return
|
||||
|
||||
|
||||
print("\n6. Syncing workspace with experiment...")
|
||||
# Sync workspace with experiment
|
||||
sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
|
||||
sync_result = run_command(
|
||||
f"ml jupyter experiment sync --workspace {workspace_path} --direction push"
|
||||
)
|
||||
if sync_result.returncode != 0:
|
||||
print("Failed to sync workspace")
|
||||
return
|
||||
|
||||
|
||||
print("\n7. Listing Jupyter services...")
|
||||
# List running services
|
||||
list_result = run_command("ml jupyter list")
|
||||
|
||||
|
||||
print("\n8. Stopping Jupyter service...")
|
||||
# Stop Jupyter service (commented out for demo)
|
||||
# stop_result = run_command("ml jupyter stop demo")
|
||||
|
||||
|
||||
print("\n=== Demo Complete ===")
|
||||
print(f"Workspace: {workspace_path}")
|
||||
print(f"Experiment ID: {experiment_id}")
|
||||
print("\nNext steps:")
|
||||
print("1. Open the Jupyter notebook in your browser to experiment interactively")
|
||||
print("2. Use 'ml experiment show' to view experiment results")
|
||||
print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
|
||||
print(
|
||||
"3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data"
|
||||
)
|
||||
print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
# CLI-Jupyter Integration Helper
|
||||
import subprocess
|
||||
import os
|
||||
def sync_project(project_path):
|
||||
|
||||
|
||||
def sync_project(project_path: str) -> bool:
|
||||
"""Sync project using CLI"""
|
||||
cmd = ["../cli/zig-out/bin/ml", "sync", project_path, "--queue"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
|
|
|||
|
|
@ -7,18 +7,16 @@ Optimized for data scientists with maximum speed
|
|||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class SecurityPolicy:
|
||||
"""Manages security policies for experiment execution"""
|
||||
|
||||
def __init__(
|
||||
self, policy_file: str = "/etc/ml_runner/security_policy.json"
|
||||
):
|
||||
def __init__(self, policy_file: str = "/etc/ml_runner/security_policy.json"):
|
||||
self.policy_file = policy_file
|
||||
self.policy = self._load_policy()
|
||||
|
||||
|
|
@ -64,7 +62,7 @@ class SecurityPolicy:
|
|||
allowed_tools = self.policy.get("allowed_network_tools", [])
|
||||
if package_name in allowed_tools:
|
||||
return True
|
||||
|
||||
|
||||
if package_name in self.policy.get("blocked_packages", []):
|
||||
return False
|
||||
return True
|
||||
|
|
@ -78,7 +76,7 @@ class SecurityPolicy:
|
|||
if domain:
|
||||
whitelist = self.policy.get("network_whitelist", [])
|
||||
return any(allowed in domain for allowed in whitelist)
|
||||
|
||||
|
||||
return True
|
||||
|
||||
def check_tool_allowed(self, tool_name: str) -> bool:
|
||||
|
|
@ -127,7 +125,9 @@ class CondaRunner:
|
|||
str(deps_file),
|
||||
"-y",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=900
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"[ERROR] Failed to apply environment file: {result.stderr}")
|
||||
return False
|
||||
|
|
@ -139,7 +139,9 @@ class CondaRunner:
|
|||
print("[ERROR] poetry.lock provided but pyproject.toml is missing")
|
||||
return False
|
||||
|
||||
print(f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}")
|
||||
print(
|
||||
f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}"
|
||||
)
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
|
|
@ -156,7 +158,9 @@ class CondaRunner:
|
|||
env=env,
|
||||
)
|
||||
if check.returncode != 0:
|
||||
print("[ERROR] Poetry is not available in the container environment")
|
||||
print(
|
||||
"[ERROR] Poetry is not available in the container environment"
|
||||
)
|
||||
print(check.stderr)
|
||||
return False
|
||||
|
||||
|
|
@ -198,9 +202,13 @@ class CondaRunner:
|
|||
str(self.workspace_dir),
|
||||
"--no-cache-dir",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
|
||||
result = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=900
|
||||
)
|
||||
if result.returncode != 0:
|
||||
print(f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}")
|
||||
print(
|
||||
f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}"
|
||||
)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
|
@ -240,18 +248,14 @@ class CondaRunner:
|
|||
"-c",
|
||||
f"import {package_name.replace('-', '_')}",
|
||||
]
|
||||
result = subprocess.run(
|
||||
check_cmd, capture_output=True, text=True
|
||||
)
|
||||
result = subprocess.run(check_cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"[OK] {package_name} already installed in conda env")
|
||||
continue
|
||||
|
||||
# Try conda-forge first (faster and more reliable)
|
||||
print(
|
||||
f"[INSTALL] Installing {req} with {self.package_manager}..."
|
||||
)
|
||||
print(f"[INSTALL] Installing {req} with {self.package_manager}...")
|
||||
install_cmd = [
|
||||
self.package_manager,
|
||||
"install",
|
||||
|
|
@ -295,7 +299,9 @@ class CondaRunner:
|
|||
return True
|
||||
|
||||
print(f"[ERROR] Unsupported dependency manifest: {deps_file}")
|
||||
print("Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt")
|
||||
print(
|
||||
"Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt"
|
||||
)
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -317,12 +323,12 @@ class CondaRunner:
|
|||
env.update(
|
||||
{
|
||||
"CONDA_DEFAULT_ENV": self.conda_env,
|
||||
"CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", ""), # Allow GPU access
|
||||
"CUDA_VISIBLE_DEVICES": os.environ.get(
|
||||
"CUDA_VISIBLE_DEVICES", ""
|
||||
), # Allow GPU access
|
||||
"SECURE_MODE": "1",
|
||||
"NETWORK_ACCESS": (
|
||||
"1"
|
||||
if self.security_policy.check_network_access(None)
|
||||
else "0"
|
||||
"1" if self.security_policy.check_network_access(None) else "0"
|
||||
),
|
||||
"CONDA_MODE": "1",
|
||||
}
|
||||
|
|
@ -348,9 +354,7 @@ class CondaRunner:
|
|||
|
||||
# Run with timeout and resource limits
|
||||
start_time = time.time()
|
||||
max_time = self.security_policy.policy.get(
|
||||
"max_execution_time", 3600
|
||||
)
|
||||
max_time = self.security_policy.policy.get("max_execution_time", 3600)
|
||||
|
||||
print(f"[RUN] Starting experiment: {train_script.name}")
|
||||
print(f"[TIME] Time limit: {max_time}s")
|
||||
|
|
@ -433,12 +437,13 @@ class CondaRunner:
|
|||
return False
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Secure ML Experiment Runner")
|
||||
parser.add_argument("--workspace", default="/workspace", help="Workspace directory")
|
||||
parser.add_argument(
|
||||
"--workspace", default="/workspace", help="Workspace directory"
|
||||
"--deps",
|
||||
help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)",
|
||||
)
|
||||
parser.add_argument("--deps", help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)")
|
||||
parser.add_argument("--requirements", help="Deprecated alias for --deps")
|
||||
parser.add_argument("--script", help="Training script path")
|
||||
parser.add_argument(
|
||||
|
|
@ -450,9 +455,7 @@ def main():
|
|||
default=[],
|
||||
help="Additional script arguments",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--check-gpu", action="store_true", help="Check GPU access"
|
||||
)
|
||||
parser.add_argument("--check-gpu", action="store_true", help="Check GPU access")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
|
|
|||
|
|
@ -2,30 +2,36 @@
|
|||
"""
|
||||
Test script to verify ML tools integration works
|
||||
"""
|
||||
import subprocess
|
||||
import sys
|
||||
import os
|
||||
|
||||
def test_tool_import(tool_name):
|
||||
import sys
|
||||
|
||||
|
||||
def test_tool_import(tool_name: str) -> bool:
|
||||
"""Test if a tool can be imported"""
|
||||
try:
|
||||
if tool_name == "mlflow":
|
||||
import mlflow
|
||||
|
||||
print(f"✅ {tool_name}: {mlflow.__version__}")
|
||||
elif tool_name == "wandb":
|
||||
import wandb
|
||||
|
||||
print(f"✅ {tool_name}: {wandb.__version__}")
|
||||
elif tool_name == "streamlit":
|
||||
import streamlit
|
||||
|
||||
print(f"✅ {tool_name}: {streamlit.__version__}")
|
||||
elif tool_name == "dash":
|
||||
import dash
|
||||
|
||||
print(f"✅ {tool_name}: {dash.__version__}")
|
||||
elif tool_name == "panel":
|
||||
import panel
|
||||
|
||||
print(f"✅ {tool_name}: {panel.__version__}")
|
||||
elif tool_name == "bokeh":
|
||||
import bokeh
|
||||
|
||||
print(f"✅ {tool_name}: {bokeh.__version__}")
|
||||
else:
|
||||
print(f"❓ {tool_name}: Unknown tool")
|
||||
|
|
@ -34,22 +40,23 @@ def test_tool_import(tool_name):
|
|||
print(f"❌ {tool_name}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
|
||||
def main() -> int:
|
||||
print("🧪 Testing ML Tools Integration")
|
||||
print("=" * 40)
|
||||
|
||||
|
||||
tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"]
|
||||
|
||||
|
||||
results = []
|
||||
for tool in tools:
|
||||
results.append(test_tool_import(tool))
|
||||
|
||||
|
||||
print("\n" + "=" * 40)
|
||||
success_count = sum(results)
|
||||
total_count = len(results)
|
||||
|
||||
|
||||
print(f"📊 Results: {success_count}/{total_count} tools available")
|
||||
|
||||
|
||||
if success_count == total_count:
|
||||
print("🎉 All ML tools are ready to use!")
|
||||
return 0
|
||||
|
|
@ -57,5 +64,6 @@ def main():
|
|||
print("⚠️ Some tools are missing. Check environment.yml")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ type PerformanceRegressionDetector struct {
|
|||
|
||||
// ParseGoBenchFile reads a file containing `go test -bench` output and returns parsed benchmark results.
|
||||
func ParseGoBenchFile(path string) ([]BenchmarkResult, error) {
|
||||
// #nosec G304 -- path is for benchmark output files, internally controlled
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open benchmark file: %w", err)
|
||||
|
|
|
|||
|
|
@ -210,8 +210,18 @@ func (p *Profiler) AnalyzeProfiles() (*ProfileAnalysis, error) {
|
|||
// Get GC statistics
|
||||
var gcStats debug.GCStats
|
||||
debug.ReadGCStats(&gcStats)
|
||||
// Validate before conversion to prevent overflow
|
||||
numGC := uint32(0)
|
||||
if gcStats.NumGC > 0 {
|
||||
maxUint32 := int64(^uint32(0))
|
||||
if gcStats.NumGC > maxUint32 {
|
||||
numGC = ^uint32(0) // Max uint32 if value is too large
|
||||
} else {
|
||||
numGC = uint32(gcStats.NumGC)
|
||||
}
|
||||
}
|
||||
analysis.GCStats = GCStats{
|
||||
NumGC: uint32(gcStats.NumGC),
|
||||
NumGC: numGC,
|
||||
GCCPUFraction: 0.0, // Not available in this Go version
|
||||
PauseTotal: gcStats.PauseTotal,
|
||||
Pause: gcStats.Pause[0:], // Copy slice to avoid reference issues
|
||||
|
|
|
|||
Loading…
Reference in a new issue