#!/usr/bin/env python3 """ Example script demonstrating Jupyter workspace and experiment integration. This script shows how to use the FetchML CLI to manage Jupyter workspaces linked with experiments. """ import os import subprocess import json import time from pathlib import Path def run_command(cmd, capture_output=True): """Run a shell command and return the result.""" print(f"Running: {cmd}") result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True) if capture_output: print(f"Output: {result.stdout}") if result.stderr: print(f"Error: {result.stderr}") return result def create_sample_workspace(workspace_path): """Create a sample Jupyter workspace with notebooks and scripts.""" workspace = Path(workspace_path) workspace.mkdir(exist_ok=True) # Create a simple notebook notebook_content = { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "import mlflow\n", "import numpy as np\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.datasets import make_classification\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "\n", "# Generate sample data\n", "X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", "# Train model with MLflow tracking\n", "with mlflow.start_run() as run:\n", " # Log parameters\n", " mlflow.log_param('model_type', 'RandomForest')\n", " mlflow.log_param('n_estimators', 100)\n", " \n", " # Train model\n", " model = RandomForestClassifier(n_estimators=100, random_state=42)\n", " model.fit(X_train, y_train)\n", " \n", " # Make predictions\n", " y_pred = model.predict(X_test)\n", " accuracy = accuracy_score(y_test, y_pred)\n", " \n", " # Log metrics\n", " mlflow.log_metric('accuracy', accuracy)\n", " \n", " print(f'Accuracy: {accuracy:.4f}')\n", " print(f'Run ID: {run.info.run_id}')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 } notebook_path = workspace / "experiment_demo.ipynb" with open(notebook_path, 'w') as f: json.dump(notebook_content, f, indent=2) # Create a Python script for queue execution script_content = '''#!/usr/bin/env python3 """ Production script for the experiment demo. This script can be queued using the FetchML job queue. """ import mlflow import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import argparse import sys def main(): parser = argparse.ArgumentParser(description='Run experiment demo') parser.add_argument('--experiment-id', help='Experiment ID to log to') parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run') args = parser.parse_args() print(f"Starting experiment: {args.run_name}") if args.experiment_id: print(f"Linked to experiment: {args.experiment_id}") # Generate sample data X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train model with MLflow tracking with mlflow.start_run(run_name=args.run_name) as run: # Log parameters mlflow.log_param('model_type', 'RandomForest') mlflow.log_param('n_estimators', 100) mlflow.log_param('data_samples', len(X)) # Train model model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Make predictions y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) # Log metrics mlflow.log_metric('accuracy', accuracy) mlflow.log_metric('train_samples', len(X_train)) mlflow.log_metric('test_samples', len(X_test)) print(f'Accuracy: {accuracy:.4f}') print(f'Run ID: {run.info.run_id}') # Log model mlflow.sklearn.log_model(model, "model") print("Experiment completed successfully!") if __name__ == "__main__": main() ''' script_path = workspace / "run_experiment.py" with open(script_path, 'w') as f: f.write(script_content) # Make script executable os.chmod(script_path, 0o755) # Create requirements.txt requirements = """mlflow>=1.20.0 scikit-learn>=1.0.0 numpy>=1.20.0 pandas>=1.3.0""" req_path = workspace / "requirements.txt" with open(req_path, 'w') as f: f.write(requirements) print(f"Created sample workspace at: {workspace_path}") print(f" - Notebook: {notebook_path}") print(f" - Script: {script_path}") print(f" - Requirements: {req_path}") def main(): """Main demonstration function.""" print("=== FetchML Jupyter-Experiment Integration Demo ===\n") # Create sample workspace workspace_path = "./demo_workspace" create_sample_workspace(workspace_path) print("\n1. Starting Jupyter service...") # Start Jupyter service result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo") if result.returncode != 0: print("Failed to start Jupyter service") return print("\n2. Creating experiment...") # Create a new experiment experiment_id = f"jupyter_demo_{int(time.time())}" print(f"Experiment ID: {experiment_id}") print("\n3. Linking workspace with experiment...") # Link workspace with experiment link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}") if link_result.returncode != 0: print("Failed to link workspace with experiment") return print("\n4. Checking experiment status...") # Check experiment status status_result = run_command(f"ml jupyter experiment status {workspace_path}") print("\n5. Queuing experiment from workspace...") # Queue experiment from workspace queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run") if queue_result.returncode != 0: print("Failed to queue experiment") return print("\n6. Syncing workspace with experiment...") # Sync workspace with experiment sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push") if sync_result.returncode != 0: print("Failed to sync workspace") return print("\n7. Listing Jupyter services...") # List running services list_result = run_command("ml jupyter list") print("\n8. Stopping Jupyter service...") # Stop Jupyter service (commented out for demo) # stop_result = run_command("ml jupyter stop demo") print("\n=== Demo Complete ===") print(f"Workspace: {workspace_path}") print(f"Experiment ID: {experiment_id}") print("\nNext steps:") print("1. Open the Jupyter notebook in your browser to experiment interactively") print("2. Use 'ml experiment show' to view experiment results") print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data") print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done") if __name__ == "__main__": main()