fetch_ml/tests/fixtures/examples/statsmodels_project/train.py
Jeremie Fraeys c980167041 test: implement comprehensive test suite with multiple test types
- Add end-to-end tests for complete workflow validation
- Include integration tests for API and database interactions
- Add unit tests for all major components and utilities
- Include performance tests for payload handling
- Add CLI API integration tests
- Include Podman container integration tests
- Add WebSocket and queue execution tests
- Include shell script tests for setup validation

Provides comprehensive test coverage ensuring platform reliability
and functionality across all components and interactions.
2025-12-04 16:55:13 -05:00

75 lines
2 KiB
Python
Executable file

#!/usr/bin/env python3
import argparse
import json
import logging
from pathlib import Path
import time
import numpy as np
import pandas as pd
import statsmodels.api as sm
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--output_dir", type=str, required=True)
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("Training statsmodels linear regression...")
# Generate synthetic data
np.random.seed(42)
n_samples = 1000
n_features = 5
X = np.random.randn(n_samples, n_features)
# True coefficients
true_coef = np.array([1.5, -2.0, 0.5, 3.0, -1.0])
noise = np.random.randn(n_samples) * 0.1
y = X @ true_coef + noise
# Create DataFrame
feature_names = [f"feature_{i}" for i in range(n_features)]
X_df = pd.DataFrame(X, columns=feature_names)
y_series = pd.Series(y, name="target")
# Add constant for intercept
X_with_const = sm.add_constant(X_df)
# Fit model
model = sm.OLS(y_series, X_with_const).fit()
logger.info(f"Model fitted successfully. R-squared: {model.rsquared:.4f}")
# Save results
results = {
"model_type": "LinearRegression",
"n_samples": n_samples,
"n_features": n_features,
"r_squared": float(model.rsquared),
"adj_r_squared": float(model.rsquared_adj),
"f_statistic": float(model.fvalue),
"f_pvalue": float(model.f_pvalue),
"coefficients": model.params.to_dict(),
"standard_errors": model.bse.to_dict(),
"p_values": model.pvalues.to_dict(),
}
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "results.json", "w") as f:
json.dump(results, f, indent=2)
# Save model summary
with open(output_dir / "model_summary.txt", "w") as f:
f.write(str(model.summary()))
logger.info("Results and model summary saved successfully!")
if __name__ == "__main__":
main()