fetch_ml/tools/performance_regression_detector.go
Jeremie Fraeys dddc2913e1
chore(tools): update scripts, native libs, and documentation
Update tooling and documentation:
- Smoke test script with scheduler health checks
- Release cleanup script
- Native test scripts with Redis integration
- TUI SSH test script
- Performance regression detector with scheduler metrics
- Profiler with distributed tracing
- Native CMake with test targets
- Dataset hash tests
- Storage symlink resistance tests
- Configuration reference documentation updates
2026-02-26 12:08:58 -05:00

291 lines
7.9 KiB
Go

// Package tools provides performance regression detection utilities.
package tools
import (
"bufio"
"encoding/json"
"fmt"
"io"
"os"
"strconv"
"strings"
"text/tabwriter"
"time"
)
// PerformanceRegressionDetector detects performance regressions in benchmark results
type PerformanceRegressionDetector struct {
BaselineFile string
Threshold float64
}
// ParseGoBenchFile reads a file containing `go test -bench` output and returns parsed benchmark results.
func ParseGoBenchFile(path string) ([]BenchmarkResult, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open benchmark file: %w", err)
}
defer func() { _ = f.Close() }()
return ParseGoBenchOutput(f)
}
// ParseGoBenchOutput parses `go test -bench` output.
//
// It extracts these metrics when present:
// - ns/op
// - B/op
// - allocs/op
//
// Each metric becomes a separate BenchmarkResult where Name is suffixed with the unit.
func ParseGoBenchOutput(r io.Reader) ([]BenchmarkResult, error) {
scanner := bufio.NewScanner(r)
results := make([]BenchmarkResult, 0)
now := time.Now()
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if !strings.HasPrefix(line, "Benchmark") {
continue
}
fields := strings.Fields(line)
if len(fields) < 4 {
continue
}
benchName := fields[0]
for i := 2; i+1 < len(fields); i++ {
valStr := fields[i]
unit := fields[i+1]
if unit != "ns/op" && unit != "B/op" && unit != "allocs/op" {
continue
}
v, err := strconv.ParseFloat(valStr, 64)
if err != nil {
continue
}
results = append(results, BenchmarkResult{
Name: benchName + "/" + unit,
Value: v,
Unit: unit,
Timestamp: now,
})
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed reading benchmark output: %w", err)
}
return results, nil
}
// BenchmarkResult represents a single benchmark result
type BenchmarkResult struct {
Timestamp time.Time `json:"timestamp"`
Name string `json:"name"`
Unit string `json:"unit"`
Value float64 `json:"value"`
}
// RegressionReport contains regression analysis results
type RegressionReport struct {
Summary string `json:"summary"`
Regressions []Regression `json:"regressions"`
Improvements []Improvement `json:"improvements"`
}
// Regression represents a performance regression
type Regression struct {
Benchmark string `json:"benchmark"`
Severity string `json:"severity"`
CurrentValue float64 `json:"current_value"`
BaselineValue float64 `json:"baseline_value"`
PercentChange float64 `json:"percent_change"`
}
// Improvement represents a performance improvement
type Improvement struct {
Benchmark string `json:"benchmark"`
CurrentValue float64 `json:"current_value"`
BaselineValue float64 `json:"baseline_value"`
PercentChange float64 `json:"percent_change"`
}
// NewPerformanceRegressionDetector creates a new detector instance
func NewPerformanceRegressionDetector(
baselineFile string,
threshold float64,
) *PerformanceRegressionDetector {
return &PerformanceRegressionDetector{
BaselineFile: baselineFile,
Threshold: threshold,
}
}
// LoadBaseline loads baseline benchmark results from file
func (prd *PerformanceRegressionDetector) LoadBaseline() ([]BenchmarkResult, error) {
if _, err := os.Stat(prd.BaselineFile); os.IsNotExist(err) {
return nil, fmt.Errorf("baseline file not found: %s", prd.BaselineFile)
}
data, err := os.ReadFile(prd.BaselineFile)
if err != nil {
return nil, fmt.Errorf("failed to read baseline file: %w", err)
}
var results []BenchmarkResult
if err := json.Unmarshal(data, &results); err != nil {
return nil, fmt.Errorf("failed to parse baseline file: %w", err)
}
return results, nil
}
// AnalyzeResults analyzes current results against baseline
func (prd *PerformanceRegressionDetector) AnalyzeResults(
current []BenchmarkResult,
) (*RegressionReport, error) {
baseline, err := prd.LoadBaseline()
if err != nil {
return nil, fmt.Errorf("failed to load baseline: %w", err)
}
return prd.AnalyzeParsedResults(baseline, current)
}
// AnalyzeParsedResults analyzes current results against a provided baseline.
func (prd *PerformanceRegressionDetector) AnalyzeParsedResults(
baseline []BenchmarkResult,
current []BenchmarkResult,
) (*RegressionReport, error) {
report := &RegressionReport{
Regressions: []Regression{},
Improvements: []Improvement{},
}
baselineMap := make(map[string]BenchmarkResult)
for _, result := range baseline {
baselineMap[result.Name] = result
}
for _, currentResult := range current {
baselineResult, exists := baselineMap[currentResult.Name]
if !exists {
continue // Skip new benchmarks without baseline
}
if baselineResult.Value == 0 {
continue
}
percentChange := ((currentResult.Value - baselineResult.Value) / baselineResult.Value) * 100
if percentChange > prd.Threshold {
// Performance regression detected
severity := "minor"
if percentChange > prd.Threshold*2 {
severity = "major"
}
if percentChange > prd.Threshold*3 {
severity = "critical"
}
report.Regressions = append(report.Regressions, Regression{
Benchmark: currentResult.Name,
CurrentValue: currentResult.Value,
BaselineValue: baselineResult.Value,
PercentChange: percentChange,
Severity: severity,
})
} else if percentChange < -prd.Threshold {
// Performance improvement detected
report.Improvements = append(report.Improvements, Improvement{
Benchmark: currentResult.Name,
CurrentValue: currentResult.Value,
BaselineValue: baselineResult.Value,
PercentChange: percentChange,
})
}
}
// Generate summary
regressionCount := len(report.Regressions)
improvementCount := len(report.Improvements)
if regressionCount == 0 && improvementCount == 0 {
report.Summary = "No significant performance changes detected"
} else {
report.Summary = fmt.Sprintf("Detected %d regression(s) and %d improvement(s)",
regressionCount, improvementCount)
}
return report, nil
}
// SaveBaseline saves current results as new baseline
func (prd *PerformanceRegressionDetector) SaveBaseline(results []BenchmarkResult) error {
data, err := json.MarshalIndent(results, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal results: %w", err)
}
err = os.WriteFile(prd.BaselineFile, data, 0600)
if err != nil {
return fmt.Errorf("failed to write baseline file: %w", err)
}
return nil
}
// PrintReport prints a formatted regression report
func (prd *PerformanceRegressionDetector) PrintReport(report *RegressionReport) {
fmt.Printf("Performance Regression Analysis Report\n")
fmt.Printf("=====================================\n\n")
fmt.Printf("Summary:\t%s\n", report.Summary)
fmt.Printf("Threshold:\t%.1f%%\n\n", prd.Threshold)
if len(report.Regressions) > 0 {
fmt.Printf("Regressions (%d)\n", len(report.Regressions))
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
_, _ = fmt.Fprintln(w, "Severity\tBenchmark\tBaseline\tCurrent\tChange")
_, _ = fmt.Fprintln(w, "--------\t---------\t--------\t-------\t------")
for _, regression := range report.Regressions {
_, _ = fmt.Fprintf(
w,
"%s\t%s\t%.2f\t%.2f\t%.1f%% worse\n",
regression.Severity,
regression.Benchmark,
regression.BaselineValue,
regression.CurrentValue,
regression.PercentChange,
)
}
_ = w.Flush()
fmt.Println()
}
if len(report.Improvements) > 0 {
fmt.Printf("Improvements (%d)\n", len(report.Improvements))
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
_, _ = fmt.Fprintln(w, "Benchmark\tBaseline\tCurrent\tChange")
_, _ = fmt.Fprintln(w, "---------\t--------\t-------\t------")
for _, improvement := range report.Improvements {
_, _ = fmt.Fprintf(
w,
"%s\t%.2f\t%.2f\t%.1f%% better\n",
improvement.Benchmark,
improvement.BaselineValue,
improvement.CurrentValue,
-improvement.PercentChange,
)
}
_ = w.Flush()
fmt.Println()
}
}