fetch_ml/examples/jupyter_experiment_integration.py

#!/usr/bin/env python3
"""
Example script demonstrating Jupyter workspace and experiment integration.
This script shows how to use the FetchML CLI to manage Jupyter workspaces
linked with experiments.
"""

import os
import subprocess
import json
import time
from pathlib import Path

def run_command(cmd, capture_output=True):
    """Run a shell command and return the result."""
    print(f"Running: {cmd}")
    result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
    if capture_output:
        print(f"Output: {result.stdout}")
        if result.stderr:
            print(f"Error: {result.stderr}")
    return result

def create_sample_workspace(workspace_path):
    """Create a sample Jupyter workspace with notebooks and scripts."""
    workspace = Path(workspace_path)
    workspace.mkdir(exist_ok=True)

    # Create a simple notebook
    notebook_content = {
        "cells": [
            {
                "cell_type": "markdown",
                "metadata": {},
                "source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
            },
            {
                "cell_type": "code",
                "execution_count": None,
                "metadata": {},
                "outputs": [],
                "source": [
                    "import mlflow\n",
                    "import numpy as np\n",
                    "from sklearn.ensemble import RandomForestClassifier\n",
                    "from sklearn.datasets import make_classification\n",
                    "from sklearn.model_selection import train_test_split\n",
                    "from sklearn.metrics import accuracy_score\n",
                    "\n",
                    "# Generate sample data\n",
                    "X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)\n",
                    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
                    "\n",
                    "# Train model with MLflow tracking\n",
                    "with mlflow.start_run() as run:\n",
                    "    # Log parameters\n",
                    "    mlflow.log_param('model_type', 'RandomForest')\n",
                    "    mlflow.log_param('n_estimators', 100)\n",
                    "    \n",
                    "    # Train model\n",
                    "    model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
                    "    model.fit(X_train, y_train)\n",
                    "    \n",
                    "    # Make predictions\n",
                    "    y_pred = model.predict(X_test)\n",
                    "    accuracy = accuracy_score(y_test, y_pred)\n",
                    "    \n",
                    "    # Log metrics\n",
                    "    mlflow.log_metric('accuracy', accuracy)\n",
                    "    \n",
                    "    print(f'Accuracy: {accuracy:.4f}')\n",
                    "    print(f'Run ID: {run.info.run_id}')"
                ]
            }
        ],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "name": "python",
                "version": "3.8.0"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 4
    }

    notebook_path = workspace / "experiment_demo.ipynb"
    with open(notebook_path, 'w') as f:
        json.dump(notebook_content, f, indent=2)

    # Create a Python script for queue execution
    script_content = '''#!/usr/bin/env python3
"""
Production script for the experiment demo.
This script can be queued using the FetchML job queue.
"""

import mlflow
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import argparse
import sys

def main():
    parser = argparse.ArgumentParser(description='Run experiment demo')
    parser.add_argument('--experiment-id', help='Experiment ID to log to')
    parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
    args = parser.parse_args()

    print(f"Starting experiment: {args.run_name}")
    if args.experiment_id:
        print(f"Linked to experiment: {args.experiment_id}")

    # Generate sample data
    X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train model with MLflow tracking
    with mlflow.start_run(run_name=args.run_name) as run:
        # Log parameters
        mlflow.log_param('model_type', 'RandomForest')
        mlflow.log_param('n_estimators', 100)
        mlflow.log_param('data_samples', len(X))

        # Train model
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # Log metrics
        mlflow.log_metric('accuracy', accuracy)
        mlflow.log_metric('train_samples', len(X_train))
        mlflow.log_metric('test_samples', len(X_test))

        print(f'Accuracy: {accuracy:.4f}')
        print(f'Run ID: {run.info.run_id}')

        # Log model
        mlflow.sklearn.log_model(model, "model")

    print("Experiment completed successfully!")

if __name__ == "__main__":
    main()
'''

    script_path = workspace / "run_experiment.py"
    with open(script_path, 'w') as f:
        f.write(script_content)

    # Make script executable
    os.chmod(script_path, 0o755)

    # Create requirements.txt
    requirements = """mlflow>=1.20.0
scikit-learn>=1.0.0
numpy>=1.20.0
pandas>=1.3.0"""

    req_path = workspace / "requirements.txt"
    with open(req_path, 'w') as f:
        f.write(requirements)

    print(f"Created sample workspace at: {workspace_path}")
    print(f"  - Notebook: {notebook_path}")
    print(f"  - Script: {script_path}")
    print(f"  - Requirements: {req_path}")

def main():
    """Main demonstration function."""
    print("=== FetchML Jupyter-Experiment Integration Demo ===\n")

    # Create sample workspace
    workspace_path = "./demo_workspace"
    create_sample_workspace(workspace_path)

    print("\n1. Starting Jupyter service...")
    # Start Jupyter service
    result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
    if result.returncode != 0:
        print("Failed to start Jupyter service")
        return

    print("\n2. Creating experiment...")
    # Create a new experiment
    experiment_id = f"jupyter_demo_{int(time.time())}"
    print(f"Experiment ID: {experiment_id}")

    print("\n3. Linking workspace with experiment...")
    # Link workspace with experiment
    link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
    if link_result.returncode != 0:
        print("Failed to link workspace with experiment")
        return

    print("\n4. Checking experiment status...")
    # Check experiment status
    status_result = run_command(f"ml jupyter experiment status {workspace_path}")

    print("\n5. Queuing experiment from workspace...")
    # Queue experiment from workspace
    queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
    if queue_result.returncode != 0:
        print("Failed to queue experiment")
        return

    print("\n6. Syncing workspace with experiment...")
    # Sync workspace with experiment
    sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
    if sync_result.returncode != 0:
        print("Failed to sync workspace")
        return

    print("\n7. Listing Jupyter services...")
    # List running services
    list_result = run_command("ml jupyter list")

    print("\n8. Stopping Jupyter service...")
    # Stop Jupyter service (commented out for demo)
    # stop_result = run_command("ml jupyter stop demo")

    print("\n=== Demo Complete ===")
    print(f"Workspace: {workspace_path}")
    print(f"Experiment ID: {experiment_id}")
    print("\nNext steps:")
    print("1. Open the Jupyter notebook in your browser to experiment interactively")
    print("2. Use 'ml experiment show' to view experiment results")
    print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
    print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")

if __name__ == "__main__":
    main()