chore: cleanup and miscellaneous updates

- .gitignore: Add reports/ and .api-keys
- examples/jupyter_experiment_integration.py: Update for new API
- podman/scripts/: CLI integration, secure runner, ML tool testing
- tools/: Performance regression detector, profiler utilities
This commit is contained in:
Jeremie Fraeys 2026-03-08 13:04:01 -04:00
parent c74e91dd69
commit 7eee31d721
No known key found for this signature in database
7 changed files with 130 additions and 94 deletions

6
.gitignore vendored
View file

@ -280,14 +280,16 @@ db/*.db-shm
db/*.db-wal
db/*.db
# Security files
.api-keys
# Security scan reports
reports/
.env.secure
.env.dev
ssl/
*.pem
*.key
.api-keys
# AI assitant files
AGENTS.md
.windsurf/*

View file

@ -5,13 +5,14 @@ This script shows how to use the FetchML CLI to manage Jupyter workspaces
linked with experiments.
"""
import json
import os
import subprocess
import json
import time
from pathlib import Path
def run_command(cmd, capture_output=True):
def run_command(cmd: str, capture_output: bool = True) -> subprocess.CompletedProcess:
"""Run a shell command and return the result."""
print(f"Running: {cmd}")
result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
@ -21,18 +22,21 @@ def run_command(cmd, capture_output=True):
print(f"Error: {result.stderr}")
return result
def create_sample_workspace(workspace_path):
def create_sample_workspace(workspace_path: str) -> None:
"""Create a sample Jupyter workspace with notebooks and scripts."""
workspace = Path(workspace_path)
workspace.mkdir(exist_ok=True)
# Create a simple notebook
notebook_content = {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
"source": [
"# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."
],
},
{
"cell_type": "code",
@ -69,29 +73,26 @@ def create_sample_workspace(workspace_path):
" mlflow.log_metric('accuracy', accuracy)\n",
" \n",
" print(f'Accuracy: {accuracy:.4f}')\n",
" print(f'Run ID: {run.info.run_id}')"
]
}
" print(f'Run ID: {run.info.run_id}')",
],
},
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"name": "python3",
},
"language_info": {
"name": "python",
"version": "3.8.0"
}
"language_info": {"name": "python", "version": "3.8.0"},
},
"nbformat": 4,
"nbformat_minor": 4
"nbformat_minor": 4,
}
notebook_path = workspace / "experiment_demo.ipynb"
with open(notebook_path, 'w') as f:
with open(notebook_path, "w") as f:
json.dump(notebook_content, f, indent=2)
# Create a Python script for queue execution
script_content = '''#!/usr/bin/env python3
"""
@ -113,130 +114,140 @@ def main():
parser.add_argument('--experiment-id', help='Experiment ID to log to')
parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
args = parser.parse_args()
print(f"Starting experiment: {args.run_name}")
if args.experiment_id:
print(f"Linked to experiment: {args.experiment_id}")
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model with MLflow tracking
with mlflow.start_run(run_name=args.run_name) as run:
# Log parameters
mlflow.log_param('model_type', 'RandomForest')
mlflow.log_param('n_estimators', 100)
mlflow.log_param('data_samples', len(X))
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Log metrics
mlflow.log_metric('accuracy', accuracy)
mlflow.log_metric('train_samples', len(X_train))
mlflow.log_metric('test_samples', len(X_test))
print(f'Accuracy: {accuracy:.4f}')
print(f'Run ID: {run.info.run_id}')
# Log model
mlflow.sklearn.log_model(model, "model")
print("Experiment completed successfully!")
if __name__ == "__main__":
main()
'''
script_path = workspace / "run_experiment.py"
with open(script_path, 'w') as f:
with open(script_path, "w") as f:
f.write(script_content)
# Make script executable
os.chmod(script_path, 0o755)
# Create requirements.txt
requirements = """mlflow>=1.20.0
scikit-learn>=1.0.0
numpy>=1.20.0
pandas>=1.3.0"""
req_path = workspace / "requirements.txt"
with open(req_path, 'w') as f:
with open(req_path, "w") as f:
f.write(requirements)
print(f"Created sample workspace at: {workspace_path}")
print(f" - Notebook: {notebook_path}")
print(f" - Script: {script_path}")
print(f" - Requirements: {req_path}")
def main():
def main() -> None:
"""Main demonstration function."""
print("=== FetchML Jupyter-Experiment Integration Demo ===\n")
# Create sample workspace
workspace_path = "./demo_workspace"
create_sample_workspace(workspace_path)
print("\n1. Starting Jupyter service...")
# Start Jupyter service
result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
if result.returncode != 0:
print("Failed to start Jupyter service")
return
print("\n2. Creating experiment...")
# Create a new experiment
experiment_id = f"jupyter_demo_{int(time.time())}"
print(f"Experiment ID: {experiment_id}")
print("\n3. Linking workspace with experiment...")
# Link workspace with experiment
link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
link_result = run_command(
f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}"
)
if link_result.returncode != 0:
print("Failed to link workspace with experiment")
return
print("\n4. Checking experiment status...")
# Check experiment status
status_result = run_command(f"ml jupyter experiment status {workspace_path}")
print("\n5. Queuing experiment from workspace...")
# Queue experiment from workspace
queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
queue_result = run_command(
f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run"
)
if queue_result.returncode != 0:
print("Failed to queue experiment")
return
print("\n6. Syncing workspace with experiment...")
# Sync workspace with experiment
sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
sync_result = run_command(
f"ml jupyter experiment sync --workspace {workspace_path} --direction push"
)
if sync_result.returncode != 0:
print("Failed to sync workspace")
return
print("\n7. Listing Jupyter services...")
# List running services
list_result = run_command("ml jupyter list")
print("\n8. Stopping Jupyter service...")
# Stop Jupyter service (commented out for demo)
# stop_result = run_command("ml jupyter stop demo")
print("\n=== Demo Complete ===")
print(f"Workspace: {workspace_path}")
print(f"Experiment ID: {experiment_id}")
print("\nNext steps:")
print("1. Open the Jupyter notebook in your browser to experiment interactively")
print("2. Use 'ml experiment show' to view experiment results")
print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
print(
"3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data"
)
print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")
if __name__ == "__main__":
main()

View file

@ -1,7 +1,8 @@
# CLI-Jupyter Integration Helper
import subprocess
import os
def sync_project(project_path):
def sync_project(project_path: str) -> bool:
"""Sync project using CLI"""
cmd = ["../cli/zig-out/bin/ml", "sync", project_path, "--queue"]
result = subprocess.run(cmd, capture_output=True, text=True)

View file

@ -7,18 +7,16 @@ Optimized for data scientists with maximum speed
import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
import time
from pathlib import Path
class SecurityPolicy:
"""Manages security policies for experiment execution"""
def __init__(
self, policy_file: str = "/etc/ml_runner/security_policy.json"
):
def __init__(self, policy_file: str = "/etc/ml_runner/security_policy.json"):
self.policy_file = policy_file
self.policy = self._load_policy()
@ -64,7 +62,7 @@ class SecurityPolicy:
allowed_tools = self.policy.get("allowed_network_tools", [])
if package_name in allowed_tools:
return True
if package_name in self.policy.get("blocked_packages", []):
return False
return True
@ -78,7 +76,7 @@ class SecurityPolicy:
if domain:
whitelist = self.policy.get("network_whitelist", [])
return any(allowed in domain for allowed in whitelist)
return True
def check_tool_allowed(self, tool_name: str) -> bool:
@ -127,7 +125,9 @@ class CondaRunner:
str(deps_file),
"-y",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=900
)
if result.returncode != 0:
print(f"[ERROR] Failed to apply environment file: {result.stderr}")
return False
@ -139,7 +139,9 @@ class CondaRunner:
print("[ERROR] poetry.lock provided but pyproject.toml is missing")
return False
print(f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}")
print(
f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}"
)
env = os.environ.copy()
env.update(
{
@ -156,7 +158,9 @@ class CondaRunner:
env=env,
)
if check.returncode != 0:
print("[ERROR] Poetry is not available in the container environment")
print(
"[ERROR] Poetry is not available in the container environment"
)
print(check.stderr)
return False
@ -198,9 +202,13 @@ class CondaRunner:
str(self.workspace_dir),
"--no-cache-dir",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=900
)
if result.returncode != 0:
print(f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}")
print(
f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}"
)
return False
return True
@ -240,18 +248,14 @@ class CondaRunner:
"-c",
f"import {package_name.replace('-', '_')}",
]
result = subprocess.run(
check_cmd, capture_output=True, text=True
)
result = subprocess.run(check_cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"[OK] {package_name} already installed in conda env")
continue
# Try conda-forge first (faster and more reliable)
print(
f"[INSTALL] Installing {req} with {self.package_manager}..."
)
print(f"[INSTALL] Installing {req} with {self.package_manager}...")
install_cmd = [
self.package_manager,
"install",
@ -295,7 +299,9 @@ class CondaRunner:
return True
print(f"[ERROR] Unsupported dependency manifest: {deps_file}")
print("Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt")
print(
"Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt"
)
return False
except Exception as e:
@ -317,12 +323,12 @@ class CondaRunner:
env.update(
{
"CONDA_DEFAULT_ENV": self.conda_env,
"CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", ""), # Allow GPU access
"CUDA_VISIBLE_DEVICES": os.environ.get(
"CUDA_VISIBLE_DEVICES", ""
), # Allow GPU access
"SECURE_MODE": "1",
"NETWORK_ACCESS": (
"1"
if self.security_policy.check_network_access(None)
else "0"
"1" if self.security_policy.check_network_access(None) else "0"
),
"CONDA_MODE": "1",
}
@ -348,9 +354,7 @@ class CondaRunner:
# Run with timeout and resource limits
start_time = time.time()
max_time = self.security_policy.policy.get(
"max_execution_time", 3600
)
max_time = self.security_policy.policy.get("max_execution_time", 3600)
print(f"[RUN] Starting experiment: {train_script.name}")
print(f"[TIME] Time limit: {max_time}s")
@ -433,12 +437,13 @@ class CondaRunner:
return False
def main():
def main() -> int:
parser = argparse.ArgumentParser(description="Secure ML Experiment Runner")
parser.add_argument("--workspace", default="/workspace", help="Workspace directory")
parser.add_argument(
"--workspace", default="/workspace", help="Workspace directory"
"--deps",
help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)",
)
parser.add_argument("--deps", help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)")
parser.add_argument("--requirements", help="Deprecated alias for --deps")
parser.add_argument("--script", help="Training script path")
parser.add_argument(
@ -450,9 +455,7 @@ def main():
default=[],
help="Additional script arguments",
)
parser.add_argument(
"--check-gpu", action="store_true", help="Check GPU access"
)
parser.add_argument("--check-gpu", action="store_true", help="Check GPU access")
args = parser.parse_args()

View file

@ -2,30 +2,36 @@
"""
Test script to verify ML tools integration works
"""
import subprocess
import sys
import os
def test_tool_import(tool_name):
import sys
def test_tool_import(tool_name: str) -> bool:
"""Test if a tool can be imported"""
try:
if tool_name == "mlflow":
import mlflow
print(f"{tool_name}: {mlflow.__version__}")
elif tool_name == "wandb":
import wandb
print(f"{tool_name}: {wandb.__version__}")
elif tool_name == "streamlit":
import streamlit
print(f"{tool_name}: {streamlit.__version__}")
elif tool_name == "dash":
import dash
print(f"{tool_name}: {dash.__version__}")
elif tool_name == "panel":
import panel
print(f"{tool_name}: {panel.__version__}")
elif tool_name == "bokeh":
import bokeh
print(f"{tool_name}: {bokeh.__version__}")
else:
print(f"{tool_name}: Unknown tool")
@ -34,22 +40,23 @@ def test_tool_import(tool_name):
print(f"{tool_name}: {e}")
return False
def main():
def main() -> int:
print("🧪 Testing ML Tools Integration")
print("=" * 40)
tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"]
results = []
for tool in tools:
results.append(test_tool_import(tool))
print("\n" + "=" * 40)
success_count = sum(results)
total_count = len(results)
print(f"📊 Results: {success_count}/{total_count} tools available")
if success_count == total_count:
print("🎉 All ML tools are ready to use!")
return 0
@ -57,5 +64,6 @@ def main():
print("⚠️ Some tools are missing. Check environment.yml")
return 1
if __name__ == "__main__":
sys.exit(main())

View file

@ -21,6 +21,7 @@ type PerformanceRegressionDetector struct {
// ParseGoBenchFile reads a file containing `go test -bench` output and returns parsed benchmark results.
func ParseGoBenchFile(path string) ([]BenchmarkResult, error) {
// #nosec G304 -- path is for benchmark output files, internally controlled
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open benchmark file: %w", err)

View file

@ -210,8 +210,18 @@ func (p *Profiler) AnalyzeProfiles() (*ProfileAnalysis, error) {
// Get GC statistics
var gcStats debug.GCStats
debug.ReadGCStats(&gcStats)
// Validate before conversion to prevent overflow
numGC := uint32(0)
if gcStats.NumGC > 0 {
maxUint32 := int64(^uint32(0))
if gcStats.NumGC > maxUint32 {
numGC = ^uint32(0) // Max uint32 if value is too large
} else {
numGC = uint32(gcStats.NumGC)
}
}
analysis.GCStats = GCStats{
NumGC: uint32(gcStats.NumGC),
NumGC: numGC,
GCCPUFraction: 0.0, // Not available in this Go version
PauseTotal: gcStats.PauseTotal,
Pause: gcStats.Pause[0:], // Copy slice to avoid reference issues