fetch_ml/examples/jupyter_experiment_integration.py
Jeremie Fraeys cd5640ebd2 Slim and secure: move scripts, clean configs, remove secrets
- Move ci-test.sh and setup.sh to scripts/
- Trim docs/src/zig-cli.md to current structure
- Replace hardcoded secrets with placeholders in configs
- Update .gitignore to block .env*, secrets/, keys, build artifacts
- Slim README.md to reflect current CLI/TUI split
- Add cleanup trap to ci-test.sh
- Ensure no secrets are committed
2025-12-07 13:57:51 -05:00

242 lines
8.8 KiB
Python

#!/usr/bin/env python3
"""
Example script demonstrating Jupyter workspace and experiment integration.
This script shows how to use the FetchML CLI to manage Jupyter workspaces
linked with experiments.
"""
import os
import subprocess
import json
import time
from pathlib import Path
def run_command(cmd, capture_output=True):
"""Run a shell command and return the result."""
print(f"Running: {cmd}")
result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
if capture_output:
print(f"Output: {result.stdout}")
if result.stderr:
print(f"Error: {result.stderr}")
return result
def create_sample_workspace(workspace_path):
"""Create a sample Jupyter workspace with notebooks and scripts."""
workspace = Path(workspace_path)
workspace.mkdir(exist_ok=True)
# Create a simple notebook
notebook_content = {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"import mlflow\n",
"import numpy as np\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.datasets import make_classification\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"# Generate sample data\n",
"X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n",
"# Train model with MLflow tracking\n",
"with mlflow.start_run() as run:\n",
" # Log parameters\n",
" mlflow.log_param('model_type', 'RandomForest')\n",
" mlflow.log_param('n_estimators', 100)\n",
" \n",
" # Train model\n",
" model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
" model.fit(X_train, y_train)\n",
" \n",
" # Make predictions\n",
" y_pred = model.predict(X_test)\n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" \n",
" # Log metrics\n",
" mlflow.log_metric('accuracy', accuracy)\n",
" \n",
" print(f'Accuracy: {accuracy:.4f}')\n",
" print(f'Run ID: {run.info.run_id}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.8.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
notebook_path = workspace / "experiment_demo.ipynb"
with open(notebook_path, 'w') as f:
json.dump(notebook_content, f, indent=2)
# Create a Python script for queue execution
script_content = '''#!/usr/bin/env python3
"""
Production script for the experiment demo.
This script can be queued using the FetchML job queue.
"""
import mlflow
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import argparse
import sys
def main():
parser = argparse.ArgumentParser(description='Run experiment demo')
parser.add_argument('--experiment-id', help='Experiment ID to log to')
parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
args = parser.parse_args()
print(f"Starting experiment: {args.run_name}")
if args.experiment_id:
print(f"Linked to experiment: {args.experiment_id}")
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model with MLflow tracking
with mlflow.start_run(run_name=args.run_name) as run:
# Log parameters
mlflow.log_param('model_type', 'RandomForest')
mlflow.log_param('n_estimators', 100)
mlflow.log_param('data_samples', len(X))
# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
# Log metrics
mlflow.log_metric('accuracy', accuracy)
mlflow.log_metric('train_samples', len(X_train))
mlflow.log_metric('test_samples', len(X_test))
print(f'Accuracy: {accuracy:.4f}')
print(f'Run ID: {run.info.run_id}')
# Log model
mlflow.sklearn.log_model(model, "model")
print("Experiment completed successfully!")
if __name__ == "__main__":
main()
'''
script_path = workspace / "run_experiment.py"
with open(script_path, 'w') as f:
f.write(script_content)
# Make script executable
os.chmod(script_path, 0o755)
# Create requirements.txt
requirements = """mlflow>=1.20.0
scikit-learn>=1.0.0
numpy>=1.20.0
pandas>=1.3.0"""
req_path = workspace / "requirements.txt"
with open(req_path, 'w') as f:
f.write(requirements)
print(f"Created sample workspace at: {workspace_path}")
print(f" - Notebook: {notebook_path}")
print(f" - Script: {script_path}")
print(f" - Requirements: {req_path}")
def main():
"""Main demonstration function."""
print("=== FetchML Jupyter-Experiment Integration Demo ===\n")
# Create sample workspace
workspace_path = "./demo_workspace"
create_sample_workspace(workspace_path)
print("\n1. Starting Jupyter service...")
# Start Jupyter service
result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
if result.returncode != 0:
print("Failed to start Jupyter service")
return
print("\n2. Creating experiment...")
# Create a new experiment
experiment_id = f"jupyter_demo_{int(time.time())}"
print(f"Experiment ID: {experiment_id}")
print("\n3. Linking workspace with experiment...")
# Link workspace with experiment
link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
if link_result.returncode != 0:
print("Failed to link workspace with experiment")
return
print("\n4. Checking experiment status...")
# Check experiment status
status_result = run_command(f"ml jupyter experiment status {workspace_path}")
print("\n5. Queuing experiment from workspace...")
# Queue experiment from workspace
queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
if queue_result.returncode != 0:
print("Failed to queue experiment")
return
print("\n6. Syncing workspace with experiment...")
# Sync workspace with experiment
sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
if sync_result.returncode != 0:
print("Failed to sync workspace")
return
print("\n7. Listing Jupyter services...")
# List running services
list_result = run_command("ml jupyter list")
print("\n8. Stopping Jupyter service...")
# Stop Jupyter service (commented out for demo)
# stop_result = run_command("ml jupyter stop demo")
print("\n=== Demo Complete ===")
print(f"Workspace: {workspace_path}")
print(f"Experiment ID: {experiment_id}")
print("\nNext steps:")
print("1. Open the Jupyter notebook in your browser to experiment interactively")
print("2. Use 'ml experiment show' to view experiment results")
print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")
if __name__ == "__main__":
main()