- Move ci-test.sh and setup.sh to scripts/ - Trim docs/src/zig-cli.md to current structure - Replace hardcoded secrets with placeholders in configs - Update .gitignore to block .env*, secrets/, keys, build artifacts - Slim README.md to reflect current CLI/TUI split - Add cleanup trap to ci-test.sh - Ensure no secrets are committed
242 lines
8.8 KiB
Python
242 lines
8.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Example script demonstrating Jupyter workspace and experiment integration.
|
|
This script shows how to use the FetchML CLI to manage Jupyter workspaces
|
|
linked with experiments.
|
|
"""
|
|
|
|
import os
|
|
import subprocess
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
|
|
def run_command(cmd, capture_output=True):
|
|
"""Run a shell command and return the result."""
|
|
print(f"Running: {cmd}")
|
|
result = subprocess.run(cmd, shell=True, capture_output=capture_output, text=True)
|
|
if capture_output:
|
|
print(f"Output: {result.stdout}")
|
|
if result.stderr:
|
|
print(f"Error: {result.stderr}")
|
|
return result
|
|
|
|
def create_sample_workspace(workspace_path):
|
|
"""Create a sample Jupyter workspace with notebooks and scripts."""
|
|
workspace = Path(workspace_path)
|
|
workspace.mkdir(exist_ok=True)
|
|
|
|
# Create a simple notebook
|
|
notebook_content = {
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": ["# Experiment Integration Demo\n\nThis notebook demonstrates the integration between Jupyter workspaces and FetchML experiments."]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": None,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import mlflow\n",
|
|
"import numpy as np\n",
|
|
"from sklearn.ensemble import RandomForestClassifier\n",
|
|
"from sklearn.datasets import make_classification\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.metrics import accuracy_score\n",
|
|
"\n",
|
|
"# Generate sample data\n",
|
|
"X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)\n",
|
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|
"\n",
|
|
"# Train model with MLflow tracking\n",
|
|
"with mlflow.start_run() as run:\n",
|
|
" # Log parameters\n",
|
|
" mlflow.log_param('model_type', 'RandomForest')\n",
|
|
" mlflow.log_param('n_estimators', 100)\n",
|
|
" \n",
|
|
" # Train model\n",
|
|
" model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
|
|
" model.fit(X_train, y_train)\n",
|
|
" \n",
|
|
" # Make predictions\n",
|
|
" y_pred = model.predict(X_test)\n",
|
|
" accuracy = accuracy_score(y_test, y_pred)\n",
|
|
" \n",
|
|
" # Log metrics\n",
|
|
" mlflow.log_metric('accuracy', accuracy)\n",
|
|
" \n",
|
|
" print(f'Accuracy: {accuracy:.4f}')\n",
|
|
" print(f'Run ID: {run.info.run_id}')"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.8.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|
|
|
|
notebook_path = workspace / "experiment_demo.ipynb"
|
|
with open(notebook_path, 'w') as f:
|
|
json.dump(notebook_content, f, indent=2)
|
|
|
|
# Create a Python script for queue execution
|
|
script_content = '''#!/usr/bin/env python3
|
|
"""
|
|
Production script for the experiment demo.
|
|
This script can be queued using the FetchML job queue.
|
|
"""
|
|
|
|
import mlflow
|
|
import numpy as np
|
|
from sklearn.ensemble import RandomForestClassifier
|
|
from sklearn.datasets import make_classification
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import accuracy_score
|
|
import argparse
|
|
import sys
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Run experiment demo')
|
|
parser.add_argument('--experiment-id', help='Experiment ID to log to')
|
|
parser.add_argument('--run-name', default='random_forest_experiment', help='Name for the run')
|
|
args = parser.parse_args()
|
|
|
|
print(f"Starting experiment: {args.run_name}")
|
|
if args.experiment_id:
|
|
print(f"Linked to experiment: {args.experiment_id}")
|
|
|
|
# Generate sample data
|
|
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
|
# Train model with MLflow tracking
|
|
with mlflow.start_run(run_name=args.run_name) as run:
|
|
# Log parameters
|
|
mlflow.log_param('model_type', 'RandomForest')
|
|
mlflow.log_param('n_estimators', 100)
|
|
mlflow.log_param('data_samples', len(X))
|
|
|
|
# Train model
|
|
model = RandomForestClassifier(n_estimators=100, random_state=42)
|
|
model.fit(X_train, y_train)
|
|
|
|
# Make predictions
|
|
y_pred = model.predict(X_test)
|
|
accuracy = accuracy_score(y_test, y_pred)
|
|
|
|
# Log metrics
|
|
mlflow.log_metric('accuracy', accuracy)
|
|
mlflow.log_metric('train_samples', len(X_train))
|
|
mlflow.log_metric('test_samples', len(X_test))
|
|
|
|
print(f'Accuracy: {accuracy:.4f}')
|
|
print(f'Run ID: {run.info.run_id}')
|
|
|
|
# Log model
|
|
mlflow.sklearn.log_model(model, "model")
|
|
|
|
print("Experiment completed successfully!")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
'''
|
|
|
|
script_path = workspace / "run_experiment.py"
|
|
with open(script_path, 'w') as f:
|
|
f.write(script_content)
|
|
|
|
# Make script executable
|
|
os.chmod(script_path, 0o755)
|
|
|
|
# Create requirements.txt
|
|
requirements = """mlflow>=1.20.0
|
|
scikit-learn>=1.0.0
|
|
numpy>=1.20.0
|
|
pandas>=1.3.0"""
|
|
|
|
req_path = workspace / "requirements.txt"
|
|
with open(req_path, 'w') as f:
|
|
f.write(requirements)
|
|
|
|
print(f"Created sample workspace at: {workspace_path}")
|
|
print(f" - Notebook: {notebook_path}")
|
|
print(f" - Script: {script_path}")
|
|
print(f" - Requirements: {req_path}")
|
|
|
|
def main():
|
|
"""Main demonstration function."""
|
|
print("=== FetchML Jupyter-Experiment Integration Demo ===\n")
|
|
|
|
# Create sample workspace
|
|
workspace_path = "./demo_workspace"
|
|
create_sample_workspace(workspace_path)
|
|
|
|
print("\n1. Starting Jupyter service...")
|
|
# Start Jupyter service
|
|
result = run_command(f"ml jupyter start --workspace {workspace_path} --name demo")
|
|
if result.returncode != 0:
|
|
print("Failed to start Jupyter service")
|
|
return
|
|
|
|
print("\n2. Creating experiment...")
|
|
# Create a new experiment
|
|
experiment_id = f"jupyter_demo_{int(time.time())}"
|
|
print(f"Experiment ID: {experiment_id}")
|
|
|
|
print("\n3. Linking workspace with experiment...")
|
|
# Link workspace with experiment
|
|
link_result = run_command(f"ml jupyter experiment link --workspace {workspace_path} --experiment {experiment_id}")
|
|
if link_result.returncode != 0:
|
|
print("Failed to link workspace with experiment")
|
|
return
|
|
|
|
print("\n4. Checking experiment status...")
|
|
# Check experiment status
|
|
status_result = run_command(f"ml jupyter experiment status {workspace_path}")
|
|
|
|
print("\n5. Queuing experiment from workspace...")
|
|
# Queue experiment from workspace
|
|
queue_result = run_command(f"ml jupyter experiment queue --workspace {workspace_path} --script run_experiment.py --name jupyter_demo_run")
|
|
if queue_result.returncode != 0:
|
|
print("Failed to queue experiment")
|
|
return
|
|
|
|
print("\n6. Syncing workspace with experiment...")
|
|
# Sync workspace with experiment
|
|
sync_result = run_command(f"ml jupyter experiment sync --workspace {workspace_path} --direction push")
|
|
if sync_result.returncode != 0:
|
|
print("Failed to sync workspace")
|
|
return
|
|
|
|
print("\n7. Listing Jupyter services...")
|
|
# List running services
|
|
list_result = run_command("ml jupyter list")
|
|
|
|
print("\n8. Stopping Jupyter service...")
|
|
# Stop Jupyter service (commented out for demo)
|
|
# stop_result = run_command("ml jupyter stop demo")
|
|
|
|
print("\n=== Demo Complete ===")
|
|
print(f"Workspace: {workspace_path}")
|
|
print(f"Experiment ID: {experiment_id}")
|
|
print("\nNext steps:")
|
|
print("1. Open the Jupyter notebook in your browser to experiment interactively")
|
|
print("2. Use 'ml experiment show' to view experiment results")
|
|
print("3. Use 'ml jupyter experiment sync --direction pull' to pull experiment data")
|
|
print("4. Use 'ml jupyter stop demo' to stop the Jupyter service when done")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|