Update tooling and documentation: - Smoke test script with scheduler health checks - Release cleanup script - Native test scripts with Redis integration - TUI SSH test script - Performance regression detector with scheduler metrics - Profiler with distributed tracing - Native CMake with test targets - Dataset hash tests - Storage symlink resistance tests - Configuration reference documentation updates
422 lines
11 KiB
Go
422 lines
11 KiB
Go
package tools
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"runtime"
|
|
"runtime/debug"
|
|
"runtime/pprof"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Profiler provides performance profiling capabilities
|
|
type Profiler struct {
|
|
startTime time.Time
|
|
cpuProfile string
|
|
memProfile string
|
|
traceProfile string
|
|
blockProfile string
|
|
mutexProfile string
|
|
enabled bool
|
|
}
|
|
|
|
// ProfileConfig defines profiling configuration
|
|
type ProfileConfig struct {
|
|
CPUProfile string
|
|
MemProfile string
|
|
TraceProfile string
|
|
BlockProfile string
|
|
MutexProfile string
|
|
EnableCPU bool
|
|
EnableMem bool
|
|
EnableTrace bool
|
|
EnableBlock bool
|
|
EnableMutex bool
|
|
SampleRate int
|
|
BlockSize int
|
|
}
|
|
|
|
// NewProfiler creates a new profiler instance
|
|
func NewProfiler(config ProfileConfig) *Profiler {
|
|
return &Profiler{
|
|
cpuProfile: config.CPUProfile,
|
|
memProfile: config.MemProfile,
|
|
traceProfile: config.TraceProfile,
|
|
blockProfile: config.BlockProfile,
|
|
mutexProfile: config.MutexProfile,
|
|
enabled: true,
|
|
}
|
|
}
|
|
|
|
// Start begins profiling with the specified configuration
|
|
func (p *Profiler) Start() error {
|
|
if !p.enabled {
|
|
return nil
|
|
}
|
|
|
|
p.startTime = time.Now()
|
|
|
|
// Configure runtime profiling
|
|
runtime.SetCPUProfileRate(100) // 100Hz sampling rate
|
|
|
|
// Start CPU profiling if enabled
|
|
if p.cpuProfile != "" {
|
|
f, err := os.Create(p.cpuProfile)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create CPU profile: %w", err)
|
|
}
|
|
if err := pprof.StartCPUProfile(f); err != nil {
|
|
_ = f.Close()
|
|
return fmt.Errorf("failed to start CPU profiling: %w", err)
|
|
}
|
|
}
|
|
|
|
// Start block profiling if enabled
|
|
if p.blockProfile != "" {
|
|
runtime.SetBlockProfileRate(1)
|
|
}
|
|
|
|
// Start mutex profiling if enabled
|
|
if p.mutexProfile != "" {
|
|
runtime.SetMutexProfileFraction(1)
|
|
}
|
|
|
|
fmt.Printf("Profiler started at %s\n", p.startTime.Format(time.RFC3339))
|
|
return nil
|
|
}
|
|
|
|
// Stop stops all active profiling and saves results
|
|
func (p *Profiler) Stop() error {
|
|
if !p.enabled {
|
|
return nil
|
|
}
|
|
|
|
// Stop CPU profiling
|
|
if p.cpuProfile != "" {
|
|
pprof.StopCPUProfile()
|
|
fmt.Printf("CPU profile saved to: %s\n", p.cpuProfile)
|
|
}
|
|
|
|
// Save memory profile
|
|
if p.memProfile != "" {
|
|
f, err := os.Create(p.memProfile)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create memory profile: %w", err)
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
|
|
runtime.GC() // Force GC before taking memory snapshot
|
|
if err := pprof.WriteHeapProfile(f); err != nil {
|
|
return fmt.Errorf("failed to write memory profile: %w", err)
|
|
}
|
|
fmt.Printf("Memory profile saved to: %s\n", p.memProfile)
|
|
}
|
|
|
|
// Save block profile
|
|
if p.blockProfile != "" {
|
|
f, err := os.Create(p.blockProfile)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create block profile: %w", err)
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
|
|
if err := pprof.Lookup("block").WriteTo(f, 0); err != nil {
|
|
return fmt.Errorf("failed to write block profile: %w", err)
|
|
}
|
|
fmt.Printf("Block profile saved to: %s\n", p.blockProfile)
|
|
}
|
|
|
|
// Save mutex profile
|
|
if p.mutexProfile != "" {
|
|
f, err := os.Create(p.mutexProfile)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create mutex profile: %w", err)
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
|
|
if err := pprof.Lookup("mutex").WriteTo(f, 0); err != nil {
|
|
return fmt.Errorf("failed to write mutex profile: %w", err)
|
|
}
|
|
fmt.Printf("Mutex profile saved to: %s\n", p.mutexProfile)
|
|
}
|
|
|
|
duration := time.Since(p.startTime)
|
|
fmt.Printf("Profiler stopped after %v\n", duration)
|
|
return nil
|
|
}
|
|
|
|
// ProfileAnalysis contains analysis results from profiling data
|
|
type ProfileAnalysis struct {
|
|
GCStats GCStats `json:"gc_stats"`
|
|
TopFunctions []FunctionInfo `json:"top_functions"`
|
|
Recommendations []string `json:"recommendations"`
|
|
MemoryUsage MemoryInfo `json:"memory_usage"`
|
|
GoroutineCount int `json:"goroutine_count"`
|
|
HeapSize uint64 `json:"heap_size"`
|
|
}
|
|
|
|
// FunctionInfo represents profiling information for a function
|
|
type FunctionInfo struct {
|
|
Name string `json:"name"`
|
|
Time float64 `json:"time_seconds"`
|
|
Percentage float64 `json:"percentage"`
|
|
Calls int64 `json:"calls"`
|
|
}
|
|
|
|
// MemoryInfo contains memory usage information
|
|
type MemoryInfo struct {
|
|
Alloc uint64 `json:"alloc_bytes"`
|
|
TotalAlloc uint64 `json:"total_alloc_bytes"`
|
|
Sys uint64 `json:"sys_bytes"`
|
|
Lookups uint64 `json:"lookups"`
|
|
Mallocs uint64 `json:"mallocs"`
|
|
Frees uint64 `json:"frees"`
|
|
}
|
|
|
|
// GCStats contains garbage collection statistics
|
|
type GCStats struct {
|
|
Pause []time.Duration `json:"pauses_ns"`
|
|
GCCPUFraction float64 `json:"gc_cpu_fraction"`
|
|
PauseTotal time.Duration `json:"pause_total_ns"`
|
|
NumGC uint32 `json:"num_gc"`
|
|
}
|
|
|
|
// AnalyzeProfiles analyzes generated profile files and returns insights
|
|
func (p *Profiler) AnalyzeProfiles() (*ProfileAnalysis, error) {
|
|
analysis := &ProfileAnalysis{
|
|
Recommendations: []string{},
|
|
}
|
|
|
|
// Get current runtime statistics
|
|
var m runtime.MemStats
|
|
runtime.ReadMemStats(&m)
|
|
|
|
analysis.MemoryUsage = MemoryInfo{
|
|
Alloc: m.Alloc,
|
|
TotalAlloc: m.TotalAlloc,
|
|
Sys: m.Sys,
|
|
Lookups: m.Lookups,
|
|
Mallocs: m.Mallocs,
|
|
Frees: m.Frees,
|
|
}
|
|
|
|
analysis.GoroutineCount = runtime.NumGoroutine()
|
|
analysis.HeapSize = m.HeapAlloc
|
|
|
|
// Get GC statistics
|
|
var gcStats debug.GCStats
|
|
debug.ReadGCStats(&gcStats)
|
|
analysis.GCStats = GCStats{
|
|
NumGC: uint32(gcStats.NumGC),
|
|
GCCPUFraction: 0.0, // Not available in this Go version
|
|
PauseTotal: gcStats.PauseTotal,
|
|
Pause: gcStats.Pause[0:], // Copy slice to avoid reference issues
|
|
}
|
|
|
|
// Analyze CPU profile if available
|
|
if p.cpuProfile != "" {
|
|
cpuAnalysis, err := p.analyzeCPUProfile()
|
|
if err == nil {
|
|
analysis.TopFunctions = cpuAnalysis
|
|
}
|
|
}
|
|
|
|
// Generate recommendations based on analysis
|
|
analysis.Recommendations = p.generateRecommendations(analysis)
|
|
|
|
return analysis, nil
|
|
}
|
|
|
|
// analyzeCPUProfile processes CPU profile data
|
|
func (p *Profiler) analyzeCPUProfile() ([]FunctionInfo, error) {
|
|
if p.cpuProfile == "" {
|
|
return nil, fmt.Errorf("no CPU profile available")
|
|
}
|
|
|
|
// Use go tool pprof to analyze the profile
|
|
//nolint:gosec // G204: Subprocess launched with potential tainted input - this is a developer tool
|
|
cmd := exec.CommandContext(
|
|
context.Background(),
|
|
"go", "tool", "pprof", "-text", p.cpuProfile,
|
|
)
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to analyze CPU profile: %w", err)
|
|
}
|
|
|
|
lines := strings.Split(string(output), "\n")
|
|
var functions []FunctionInfo
|
|
|
|
for _, line := range lines {
|
|
if strings.HasPrefix(line, "#") || line == "" {
|
|
continue
|
|
}
|
|
|
|
fields := strings.Fields(line)
|
|
if len(fields) < 4 {
|
|
continue
|
|
}
|
|
|
|
// Parse function information from pprof output
|
|
// Format: flat flat% sum% cum cum%
|
|
flatTime := parseTime(fields[0])
|
|
flatPercent := parsePercent(fields[1])
|
|
funcName := fields[3]
|
|
|
|
if flatTime > 0 {
|
|
functions = append(functions, FunctionInfo{
|
|
Name: funcName,
|
|
Time: flatTime,
|
|
Percentage: flatPercent,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Sort by time (descending)
|
|
sort.Slice(functions, func(i, j int) bool {
|
|
return functions[i].Time > functions[j].Time
|
|
})
|
|
|
|
// Return top 10 functions
|
|
if len(functions) > 10 {
|
|
functions = functions[:10]
|
|
}
|
|
|
|
return functions, nil
|
|
}
|
|
|
|
// parseTime converts time string from pprof output to seconds
|
|
func parseTime(timeStr string) float64 {
|
|
// pprof outputs time in various formats (s, ms, etc.)
|
|
timeStr = strings.TrimSuffix(timeStr, "s")
|
|
return parseFloat(timeStr)
|
|
}
|
|
|
|
// parsePercent converts percentage string to float
|
|
func parsePercent(percentStr string) float64 {
|
|
percentStr = strings.TrimSuffix(percentStr, "%")
|
|
return parseFloat(percentStr)
|
|
}
|
|
|
|
// parseFloat safely converts string to float
|
|
func parseFloat(s string) float64 {
|
|
var f float64
|
|
_, _ = fmt.Sscanf(s, "%f", &f)
|
|
return f
|
|
}
|
|
|
|
// generateRecommendations provides performance optimization suggestions
|
|
func (p *Profiler) generateRecommendations(analysis *ProfileAnalysis) []string {
|
|
var recommendations []string
|
|
|
|
// Memory usage recommendations
|
|
if analysis.MemoryUsage.Alloc > 100*1024*1024 { // > 100MB
|
|
recommendations = append(
|
|
recommendations,
|
|
"High memory usage detected. Consider optimizing memory allocations.",
|
|
)
|
|
}
|
|
|
|
if analysis.GoroutineCount > 1000 {
|
|
recommendations = append(
|
|
recommendations,
|
|
"High goroutine count detected. Check for goroutine leaks.",
|
|
)
|
|
}
|
|
|
|
// GC recommendations
|
|
if analysis.GCStats.GCCPUFraction > 0.1 {
|
|
recommendations = append(recommendations, "High GC CPU usage. Consider reducing allocation rate.")
|
|
}
|
|
|
|
if len(analysis.GCStats.Pause) > 0 {
|
|
avgPause := analysis.GCStats.PauseTotal / time.Duration(len(analysis.GCStats.Pause))
|
|
if avgPause > 10*time.Millisecond {
|
|
recommendations = append(
|
|
recommendations,
|
|
"Long GC pauses detected. Consider tuning GC parameters.",
|
|
)
|
|
}
|
|
}
|
|
|
|
// Function-level recommendations
|
|
for _, fn := range analysis.TopFunctions {
|
|
if fn.Percentage > 20.0 {
|
|
recommendations = append(recommendations,
|
|
fmt.Sprintf("Function %s uses %.1f%% CPU time. Consider optimization.", fn.Name, fn.Percentage),
|
|
)
|
|
}
|
|
}
|
|
|
|
if len(recommendations) == 0 {
|
|
recommendations = append(recommendations, "Performance looks good. No major issues detected.")
|
|
}
|
|
|
|
return recommendations
|
|
}
|
|
|
|
// PrintAnalysis prints a formatted analysis report
|
|
func (p *Profiler) PrintAnalysis(analysis *ProfileAnalysis) {
|
|
fmt.Printf("\nPerformance Profile Analysis Report\n")
|
|
fmt.Printf("===================================\n\n")
|
|
|
|
fmt.Printf("Runtime Statistics:\n")
|
|
fmt.Printf(" Goroutines: %d\n", analysis.GoroutineCount)
|
|
fmt.Printf(" Heap Size: %.2f MB\n", float64(analysis.HeapSize)/1024/1024)
|
|
fmt.Printf(" Memory Allocated: %.2f MB\n", float64(analysis.MemoryUsage.Alloc)/1024/1024)
|
|
totalAllocMB := float64(analysis.MemoryUsage.TotalAlloc) / 1024 / 1024
|
|
fmt.Printf(" Total Memory Allocated: %.2f MB\n", totalAllocMB)
|
|
|
|
fmt.Printf("\nGarbage Collection:\n")
|
|
fmt.Printf(" GC Cycles: %d\n", analysis.GCStats.NumGC)
|
|
fmt.Printf(" GC CPU Fraction: %.2f%%\n", analysis.GCStats.GCCPUFraction*100)
|
|
fmt.Printf(" Total GC Pause: %v\n", analysis.GCStats.PauseTotal)
|
|
|
|
if len(analysis.TopFunctions) > 0 {
|
|
fmt.Printf("\nTop CPU Functions:\n")
|
|
for i, fn := range analysis.TopFunctions {
|
|
fmt.Printf(" %d. %s: %.3fs (%.1f%%)\n", i+1, fn.Name, fn.Time, fn.Percentage)
|
|
}
|
|
}
|
|
|
|
fmt.Printf("\nRecommendations:\n")
|
|
for i, rec := range analysis.Recommendations {
|
|
fmt.Printf(" %d. %s\n", i+1, rec)
|
|
}
|
|
fmt.Println()
|
|
}
|
|
|
|
// ProfileWithFunction profiles a specific function execution
|
|
func ProfileWithFunction(profileName string, fn func() error) error {
|
|
config := ProfileConfig{
|
|
CPUProfile: fmt.Sprintf("%s-cpu.prof", profileName),
|
|
MemProfile: fmt.Sprintf("%s-mem.prof", profileName),
|
|
BlockProfile: fmt.Sprintf("%s-block.prof", profileName),
|
|
EnableCPU: true,
|
|
EnableMem: true,
|
|
EnableBlock: true,
|
|
}
|
|
|
|
profiler := NewProfiler(config)
|
|
|
|
if err := profiler.Start(); err != nil {
|
|
return fmt.Errorf("failed to start profiler: %w", err)
|
|
}
|
|
|
|
defer func() {
|
|
if err := profiler.Stop(); err != nil {
|
|
fmt.Printf("Warning: failed to stop profiler: %v\n", err)
|
|
}
|
|
|
|
analysis, err := profiler.AnalyzeProfiles()
|
|
if err == nil {
|
|
profiler.PrintAnalysis(analysis)
|
|
}
|
|
}()
|
|
|
|
return fn()
|
|
}
|