#!/usr/bin/env python3 import argparse import json import logging from pathlib import Path import time import numpy as np from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split def main(): parser = argparse.ArgumentParser() parser.add_argument("--n_estimators", type=int, default=100) parser.add_argument("--output_dir", type=str, required=True) args = parser.parse_args() logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) logger.info( f"Training Random Forest with {args.n_estimators} estimators..." ) # Generate synthetic data X, y = make_classification( n_samples=1000, n_features=20, n_classes=2, random_state=42 ) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) # Train model model = RandomForestClassifier( n_estimators=args.n_estimators, random_state=42 ) model.fit(X_train, y_train) # Evaluate y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) logger.info(f"Training completed. Accuracy: {accuracy:.4f}") # Save results results = { "model_type": "RandomForest", "n_estimators": args.n_estimators, "accuracy": accuracy, "n_samples": len(X), "n_features": X.shape[1], } output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "results.json", "w") as f: json.dump(results, f, indent=2) logger.info("Results saved successfully!") if __name__ == "__main__": main()