chore: cleanup and miscellaneous updates

- .gitignore: Add reports/ and .api-keys - examples/jupyter_experiment_integration.py: Update for new API - podman/scripts/: CLI integration, secure runner, ML tool testing - tools/: Performance regression detector, profiler utilities
2026-03-08 13:04:01 -04:00 · 2026-03-08 13:04:01 -04:00 · 7eee31d721
commit 7eee31d721
parent c74e91dd69
7 changed files with 130 additions and 94 deletions
--- a/.gitignore
+++ b/.gitignore
@ -280,14 +280,16 @@ db/*.db-shm
 db/*.db-wal
 db/*.db

-# Security files
-.api-keys
+# Security scan reports
+reports/
 .env.secure
 .env.dev
 ssl/
 *.pem
 *.key

+.api-keys
+
 # AI assitant files
 AGENTS.md
 .windsurf/*
--- a/examples/jupyter_experiment_integration.py
+++ b/examples/jupyter_experiment_integration.py
@ -5,13 +5,14 @@ This script shows how to use the FetchML CLI to manage Jupyter workspaces
 linked with experiments.
 """

+import json
 import os
 import subprocess
-import json
 import time
 from pathlib import Path

-def run_command(cmd, capture_output=True):
+
+def run_command(cmd: str, capture_output: bool = True) -> subprocess.CompletedProcess:
    """Run a shell command and return the result."""
    print(f"Running: {cmd}")
    result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
@ -21,18 +22,21 @@ def run_command(cmd, capture_output=True):
            print(f"Error: {result.stderr}")
    return result

-def create_sample_workspace(workspace_path):
+
+def create_sample_workspace(workspace_path: str) -> None:
    """Create a sample Jupyter workspace with notebooks and scripts."""
    workspace = Path(workspace_path)
    workspace.mkdir(exist_ok=True)
-    
+
    # Create a simple notebook
    notebook_content = {
        "cells": [
            {
                "cell_type": "markdown",
                "metadata": {},
-                "source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
+                "source": [
+                    "# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."
+                ],
            },
            {
                "cell_type": "code",
@ -69,29 +73,26 @@ def create_sample_workspace(workspace_path):
                    "    mlflow.log_metric('accuracy', accuracy)\n",
                    "    \n",
                    "    print(f'Accuracy: {accuracy:.4f}')\n",
-                    "    print(f'Run ID: {run.info.run_id}')"
-                ]
-            }
+                    "    print(f'Run ID: {run.info.run_id}')",
+                ],
+            },
        ],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
-                "name": "python3"
+                "name": "python3",
            },
-            "language_info": {
-                "name": "python",
-                "version": "3.8.0"
-            }
+            "language_info": {"name": "python", "version": "3.8.0"},
        },
        "nbformat": 4,
-        "nbformat_minor": 4
+        "nbformat_minor": 4,
    }
-    
+
    notebook_path = workspace / "experiment_demo.ipynb"
-    with open(notebook_path, 'w') as f:
+    with open(notebook_path, "w") as f:
        json.dump(notebook_content, f, indent=2)
-    
+
    # Create a Python script for queue execution
    script_content = '''#!/usr/bin/env python3
 """
@ -113,130 +114,140 @@ def main():
    parser.add_argument('--experiment-id', help='Experiment ID to log to')
    parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
    args = parser.parse_args()
-    
+
    print(f"Starting experiment: {args.run_name}")
    if args.experiment_id:
        print(f"Linked to experiment: {args.experiment_id}")
-    
+
    # Generate sample data
    X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    
+
    # Train model with MLflow tracking
    with mlflow.start_run(run_name=args.run_name) as run:
        # Log parameters
        mlflow.log_param('model_type', 'RandomForest')
        mlflow.log_param('n_estimators', 100)
        mlflow.log_param('data_samples', len(X))
-        
+
        # Train model
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
-        
+
        # Make predictions
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
-        
+
        # Log metrics
        mlflow.log_metric('accuracy', accuracy)
        mlflow.log_metric('train_samples', len(X_train))
        mlflow.log_metric('test_samples', len(X_test))
-        
+
        print(f'Accuracy: {accuracy:.4f}')
        print(f'Run ID: {run.info.run_id}')
-        
+
        # Log model
        mlflow.sklearn.log_model(model, "model")
-    
+
    print("Experiment completed successfully!")

 if __name__ == "__main__":
    main()
 '''
-    
+
    script_path = workspace / "run_experiment.py"
-    with open(script_path, 'w') as f:
+    with open(script_path, "w") as f:
        f.write(script_content)
-    
+
    # Make script executable
    os.chmod(script_path, 0o755)
-    
+
    # Create requirements.txt
    requirements = """mlflow>=1.20.0
 scikit-learn>=1.0.0
 numpy>=1.20.0
 pandas>=1.3.0"""
-    
+
    req_path = workspace / "requirements.txt"
-    with open(req_path, 'w') as f:
+    with open(req_path, "w") as f:
        f.write(requirements)
-    
+
    print(f"Created sample workspace at: {workspace_path}")
    print(f"  - Notebook: {notebook_path}")
    print(f"  - Script: {script_path}")
    print(f"  - Requirements: {req_path}")

-def main():
+
+def main() -> None:
    """Main demonstration function."""
    print("=== FetchML Jupyter-Experiment Integration Demo ===\n")
-    
+
    # Create sample workspace
    workspace_path = "./demo_workspace"
    create_sample_workspace(workspace_path)
-    
+
    print("\n1. Starting Jupyter service...")
    # Start Jupyter service
    result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
    if result.returncode != 0:
        print("Failed to start Jupyter service")
        return
-    
+
    print("\n2. Creating experiment...")
    # Create a new experiment
    experiment_id = f"jupyter_demo_{int(time.time())}"
    print(f"Experiment ID: {experiment_id}")
-    
+
    print("\n3. Linking workspace with experiment...")
    # Link workspace with experiment
-    link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
+    link_result = run_command(
+        f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}"
+    )
    if link_result.returncode != 0:
        print("Failed to link workspace with experiment")
        return
-    
+
    print("\n4. Checking experiment status...")
    # Check experiment status
    status_result = run_command(f"ml jupyter experiment status {workspace_path}")
-    
+
    print("\n5. Queuing experiment from workspace...")
    # Queue experiment from workspace
-    queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
+    queue_result = run_command(
+        f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run"
+    )
    if queue_result.returncode != 0:
        print("Failed to queue experiment")
        return
-    
+
    print("\n6. Syncing workspace with experiment...")
    # Sync workspace with experiment
-    sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
+    sync_result = run_command(
+        f"ml jupyter experiment sync --workspace {workspace_path} --direction push"
+    )
    if sync_result.returncode != 0:
        print("Failed to sync workspace")
        return
-    
+
    print("\n7. Listing Jupyter services...")
    # List running services
    list_result = run_command("ml jupyter list")
-    
+
    print("\n8. Stopping Jupyter service...")
    # Stop Jupyter service (commented out for demo)
    # stop_result = run_command("ml jupyter stop demo")
-    
+
    print("\n=== Demo Complete ===")
    print(f"Workspace: {workspace_path}")
    print(f"Experiment ID: {experiment_id}")
    print("\nNext steps:")
    print("1. Open the Jupyter notebook in your browser to experiment interactively")
    print("2. Use 'ml experiment show' to view experiment results")
-    print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
+    print(
+        "3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data"
+    )
    print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")

+
 if __name__ == "__main__":
    main()
--- a/podman/scripts/cli_integration.py
+++ b/podman/scripts/cli_integration.py
@ -1,7 +1,8 @@
 # CLI-Jupyter Integration Helper
 import subprocess
-import os
-def sync_project(project_path):
+
+
+def sync_project(project_path: str) -> bool:
    """Sync project using CLI"""
    cmd = ["../cli/zig-out/bin/ml", "sync", project_path, "--queue"]
    result = subprocess.run(cmd, capture_output=True, text=True)
--- a/podman/scripts/secure_runner.py
+++ b/podman/scripts/secure_runner.py
@ -7,18 +7,16 @@ Optimized for data scientists with maximum speed
 import argparse
 import json
 import os
-from pathlib import Path
 import subprocess
 import sys
 import time
+from pathlib import Path


 class SecurityPolicy:
    """Manages security policies for experiment execution"""

-    def __init__(
-        self, policy_file: str = "/etc/ml_runner/security_policy.json"
-    ):
+    def __init__(self, policy_file: str = "/etc/ml_runner/security_policy.json"):
        self.policy_file = policy_file
        self.policy = self._load_policy()

@ -64,7 +62,7 @@ class SecurityPolicy:
        allowed_tools = self.policy.get("allowed_network_tools", [])
        if package_name in allowed_tools:
            return True
-            
+
        if package_name in self.policy.get("blocked_packages", []):
            return False
        return True
@ -78,7 +76,7 @@ class SecurityPolicy:
        if domain:
            whitelist = self.policy.get("network_whitelist", [])
            return any(allowed in domain for allowed in whitelist)
-        
+
        return True

    def check_tool_allowed(self, tool_name: str) -> bool:
@ -127,7 +125,9 @@ class CondaRunner:
                    str(deps_file),
                    "-y",
                ]
-                result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
+                result = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=900
+                )
                if result.returncode != 0:
                    print(f"[ERROR] Failed to apply environment file: {result.stderr}")
                    return False
@ -139,7 +139,9 @@ class CondaRunner:
                    print("[ERROR] poetry.lock provided but pyproject.toml is missing")
                    return False

-                print(f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}")
+                print(
+                    f"[SETUP] Installing dependencies from Poetry lockfile: {deps_file}"
+                )
                env = os.environ.copy()
                env.update(
                    {
@ -156,7 +158,9 @@ class CondaRunner:
                    env=env,
                )
                if check.returncode != 0:
-                    print("[ERROR] Poetry is not available in the container environment")
+                    print(
+                        "[ERROR] Poetry is not available in the container environment"
+                    )
                    print(check.stderr)
                    return False

@ -198,9 +202,13 @@ class CondaRunner:
                    str(self.workspace_dir),
                    "--no-cache-dir",
                ]
-                result = subprocess.run(cmd, capture_output=True, text=True, timeout=900)
+                result = subprocess.run(
+                    cmd, capture_output=True, text=True, timeout=900
+                )
                if result.returncode != 0:
-                    print(f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}")
+                    print(
+                        f"[ERROR] Failed to install project from pyproject.toml: {result.stderr}"
+                    )
                    return False
                return True

@ -240,18 +248,14 @@ class CondaRunner:
                        "-c",
                        f"import {package_name.replace('-', '_')}",
                    ]
-                    result = subprocess.run(
-                        check_cmd, capture_output=True, text=True
-                    )
+                    result = subprocess.run(check_cmd, capture_output=True, text=True)

                    if result.returncode == 0:
                        print(f"[OK] {package_name} already installed in conda env")
                        continue

                    # Try conda-forge first (faster and more reliable)
-                    print(
-                        f"[INSTALL] Installing {req} with {self.package_manager}..."
-                    )
+                    print(f"[INSTALL] Installing {req} with {self.package_manager}...")
                    install_cmd = [
                        self.package_manager,
                        "install",
@ -295,7 +299,9 @@ class CondaRunner:
                return True

            print(f"[ERROR] Unsupported dependency manifest: {deps_file}")
-            print("Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt")
+            print(
+                "Supported: environment.yml, environment.yaml, poetry.lock (requires pyproject.toml), pyproject.toml, requirements.txt"
+            )
            return False

        except Exception as e:
@ -317,12 +323,12 @@ class CondaRunner:
            env.update(
                {
                    "CONDA_DEFAULT_ENV": self.conda_env,
-                    "CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", ""),  # Allow GPU access
+                    "CUDA_VISIBLE_DEVICES": os.environ.get(
+                        "CUDA_VISIBLE_DEVICES", ""
+                    ),  # Allow GPU access
                    "SECURE_MODE": "1",
                    "NETWORK_ACCESS": (
-                        "1"
-                        if self.security_policy.check_network_access(None)
-                        else "0"
+                        "1" if self.security_policy.check_network_access(None) else "0"
                    ),
                    "CONDA_MODE": "1",
                }
@ -348,9 +354,7 @@ class CondaRunner:

            # Run with timeout and resource limits
            start_time = time.time()
-            max_time = self.security_policy.policy.get(
-                "max_execution_time", 3600
-            )
+            max_time = self.security_policy.policy.get("max_execution_time", 3600)

            print(f"[RUN] Starting experiment: {train_script.name}")
            print(f"[TIME] Time limit: {max_time}s")
@ -433,12 +437,13 @@ class CondaRunner:
            return False


-def main():
+def main() -> int:
    parser = argparse.ArgumentParser(description="Secure ML Experiment Runner")
+    parser.add_argument("--workspace", default="/workspace", help="Workspace directory")
    parser.add_argument(
-        "--workspace", default="/workspace", help="Workspace directory"
+        "--deps",
+        help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)",
    )
-    parser.add_argument("--deps", help="Dependency manifest path (environment.yml | poetry.lock | pyproject.toml | requirements.txt)")
    parser.add_argument("--requirements", help="Deprecated alias for --deps")
    parser.add_argument("--script", help="Training script path")
    parser.add_argument(
@ -450,9 +455,7 @@ def main():
        default=[],
        help="Additional script arguments",
    )
-    parser.add_argument(
-        "--check-gpu", action="store_true", help="Check GPU access"
-    )
+    parser.add_argument("--check-gpu", action="store_true", help="Check GPU access")

    args = parser.parse_args()

--- a/podman/scripts/test_ml_tools.py
+++ b/podman/scripts/test_ml_tools.py
@ -2,30 +2,36 @@
 """
 Test script to verify ML tools integration works
 """
-import subprocess
-import sys
-import os

-def test_tool_import(tool_name):
+import sys
+
+
+def test_tool_import(tool_name: str) -> bool:
    """Test if a tool can be imported"""
    try:
        if tool_name == "mlflow":
            import mlflow
+
            print(f"✅ {tool_name}: {mlflow.__version__}")
        elif tool_name == "wandb":
            import wandb
+
            print(f"✅ {tool_name}: {wandb.__version__}")
        elif tool_name == "streamlit":
            import streamlit
+
            print(f"✅ {tool_name}: {streamlit.__version__}")
        elif tool_name == "dash":
            import dash
+
            print(f"✅ {tool_name}: {dash.__version__}")
        elif tool_name == "panel":
            import panel
+
            print(f"✅ {tool_name}: {panel.__version__}")
        elif tool_name == "bokeh":
            import bokeh
+
            print(f"✅ {tool_name}: {bokeh.__version__}")
        else:
            print(f"❓ {tool_name}: Unknown tool")
@ -34,22 +40,23 @@ def test_tool_import(tool_name):
        print(f"❌ {tool_name}: {e}")
        return False

-def main():
+
+def main() -> int:
    print("🧪 Testing ML Tools Integration")
    print("=" * 40)
-    
+
    tools = ["mlflow", "wandb", "streamlit", "dash", "panel", "bokeh"]
-    
+
    results = []
    for tool in tools:
        results.append(test_tool_import(tool))
-    
+
    print("\n" + "=" * 40)
    success_count = sum(results)
    total_count = len(results)
-    
+
    print(f"📊 Results: {success_count}/{total_count} tools available")
-    
+
    if success_count == total_count:
        print("🎉 All ML tools are ready to use!")
        return 0
@ -57,5 +64,6 @@ def main():
        print("⚠️  Some tools are missing. Check environment.yml")
        return 1

+
 if __name__ == "__main__":
    sys.exit(main())
--- a/tools/performance_regression_detector.go
+++ b/tools/performance_regression_detector.go
@ -21,6 +21,7 @@ type PerformanceRegressionDetector struct {

 // ParseGoBenchFile reads a file containing `go test -bench` output and returns parsed benchmark results.
 func ParseGoBenchFile(path string) ([]BenchmarkResult, error) {
+	// #nosec G304 -- path is for benchmark output files, internally controlled
 	f, err := os.Open(path)
 	if err != nil {
 		return nil, fmt.Errorf("failed to open benchmark file: %w", err)
--- a/tools/profiler.go
+++ b/tools/profiler.go
@ -210,8 +210,18 @@ func (p *Profiler) AnalyzeProfiles() (*ProfileAnalysis, error) {
 	// Get GC statistics
 	var gcStats debug.GCStats
 	debug.ReadGCStats(&gcStats)
+	// Validate before conversion to prevent overflow
+	numGC := uint32(0)
+	if gcStats.NumGC > 0 {
+		maxUint32 := int64(^uint32(0))
+		if gcStats.NumGC > maxUint32 {
+			numGC = ^uint32(0) // Max uint32 if value is too large
+		} else {
+			numGC = uint32(gcStats.NumGC)
+		}
+	}
 	analysis.GCStats = GCStats{
-		NumGC:         uint32(gcStats.NumGC),
+		NumGC:         numGC,
 		GCCPUFraction: 0.0, // Not available in this Go version
 		PauseTotal:    gcStats.PauseTotal,
 		Pause:         gcStats.Pause[0:], // Copy slice to avoid reference issues