fetch_ml/podman/workspace/standard_ml_project/train.py
Jeremie Fraeys 4aecd469a1 feat: implement comprehensive monitoring and container orchestration
- Add Prometheus, Grafana, and Loki monitoring stack
- Include pre-configured dashboards for ML metrics and logs
- Add Podman container support with security policies
- Implement ML runtime environments for multiple frameworks
- Add containerized ML project templates (PyTorch, TensorFlow, etc.)
- Include secure runner with isolation and resource limits
- Add comprehensive log aggregation and alerting
2025-12-04 16:54:49 -05:00

122 lines
3.4 KiB
Python
Executable file

#!/usr/bin/env python3
import argparse
import json
import logging
from pathlib import Path
import time
import numpy as np
import torch
import torch.nn as nn
class SimpleNet(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
self.relu = nn.ReLU()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
return x
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--epochs", type=int, default=5)
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--learning_rate", type=float, default=0.001)
parser.add_argument("--output_dir", type=str, required=True)
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info(f"Training model for {args.epochs} epochs...")
# Generate synthetic data
torch.manual_seed(42)
X = torch.randn(1000, 20)
y = torch.randint(0, 2, (1000,))
# Create dataset and dataloader
dataset = torch.utils.data.TensorDataset(X, y)
dataloader = torch.utils.data.DataLoader(
dataset, batch_size=args.batch_size, shuffle=True
)
# Initialize model
model = SimpleNet(20, 64, 2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
# Training loop
model.train()
for epoch in range(args.epochs):
total_loss = 0
correct = 0
total = 0
for batch_X, batch_y in dataloader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += batch_y.size(0)
correct += (predicted == batch_y).sum().item()
accuracy = correct / total
avg_loss = total_loss / len(dataloader)
logger.info(
f"Epoch {epoch + 1}/{args.epochs}: Loss={avg_loss:.4f}, Acc={accuracy:.4f}"
)
time.sleep(0.1) # Small delay for logging
# Final evaluation
model.eval()
with torch.no_grad():
correct = 0
total = 0
for batch_X, batch_y in dataloader:
outputs = model(batch_X)
_, predicted = torch.max(outputs.data, 1)
total += batch_y.size(0)
correct += (predicted == batch_y).sum().item()
final_accuracy = correct / total
logger.info(f"Training completed. Final accuracy: {final_accuracy:.4f}")
# Save results
results = {
"model_type": "PyTorch",
"epochs": args.epochs,
"batch_size": args.batch_size,
"learning_rate": args.learning_rate,
"final_accuracy": final_accuracy,
"n_samples": len(X),
"input_features": X.shape[1],
}
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "results.json", "w") as f:
json.dump(results, f, indent=2)
# Save model
torch.save(model.state_dict(), output_dir / "pytorch_model.pth")
logger.info("Results and model saved successfully!")
if __name__ == "__main__":
main()