package prommetrics import ( "net/http" "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" ) // Metrics holds all Prometheus metrics for the application type Metrics struct { // WebSocket metrics wsConnections *prometheus.GaugeVec wsMessages *prometheus.CounterVec wsDuration *prometheus.HistogramVec wsErrors *prometheus.CounterVec // Job queue metrics jobsQueued prometheus.Counter jobsCompleted *prometheus.CounterVec jobsActive prometheus.Gauge jobDuration *prometheus.HistogramVec queueLength prometheus.Gauge // Jupyter metrics jupyterServices *prometheus.GaugeVec jupyterOps *prometheus.CounterVec // HTTP metrics httpRequests *prometheus.CounterVec httpDuration *prometheus.HistogramVec // Prewarm metrics prewarmSnapshotHit prometheus.Counter prewarmSnapshotMiss prometheus.Counter prewarmSnapshotBuilt prometheus.Counter prewarmSnapshotTime prometheus.Histogram registry *prometheus.Registry } // New creates a new Prometheus Metrics instance func New() *Metrics { m := &Metrics{ registry: prometheus.NewRegistry(), } m.initMetrics() return m } // initMetrics initializes all Prometheus metrics func (m *Metrics) initMetrics() { // WebSocket metrics m.wsConnections = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "fetchml_websocket_connections", Help: "Number of active WebSocket connections", }, []string{"status"}, ) m.wsMessages = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "fetchml_websocket_messages_total", Help: "Total number of WebSocket messages", }, []string{"opcode", "status"}, ) m.wsDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "fetchml_websocket_duration_seconds", Help: "WebSocket message processing duration", Buckets: prometheus.DefBuckets, }, []string{"opcode"}, ) m.wsErrors = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "fetchml_websocket_errors_total", Help: "Total number of WebSocket errors", }, []string{"type"}, ) // Job queue metrics m.jobsQueued = prometheus.NewCounter( prometheus.CounterOpts{ Name: "fetchml_jobs_queued_total", Help: "Total number of jobs queued", }, ) m.jobsCompleted = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "fetchml_jobs_completed_total", Help: "Total number of completed jobs", }, []string{"status"}, ) m.jobsActive = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "fetchml_jobs_active", Help: "Number of currently active jobs", }, ) m.jobDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "fetchml_job_duration_seconds", Help: "Job execution duration", Buckets: []float64{1, 5, 10, 30, 60, 300, 600, 1800, 3600}, }, []string{"status"}, ) m.queueLength = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "fetchml_queue_length", Help: "Current job queue length", }, ) // Jupyter metrics m.jupyterServices = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "fetchml_jupyter_services", Help: "Number of Jupyter services", }, []string{"status"}, ) m.jupyterOps = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "fetchml_jupyter_operations_total", Help: "Total number of Jupyter operations", }, []string{"operation", "status"}, ) // HTTP metrics m.httpRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ Name: "fetchml_http_requests_total", Help: "Total number of HTTP requests", }, []string{"method", "endpoint", "status"}, ) m.httpDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Name: "fetchml_http_duration_seconds", Help: "HTTP request duration", Buckets: prometheus.DefBuckets, }, []string{"method", "endpoint"}, ) // Prewarm metrics m.prewarmSnapshotHit = prometheus.NewCounter( prometheus.CounterOpts{ Name: "fetchml_prewarm_snapshot_hit_total", Help: "Total number of prewarmed snapshot hits (snapshots found in .prewarm/)", }, ) m.prewarmSnapshotMiss = prometheus.NewCounter( prometheus.CounterOpts{ Name: "fetchml_prewarm_snapshot_miss_total", Help: "Total number of prewarmed snapshot misses (snapshots not found in .prewarm/)", }, ) m.prewarmSnapshotBuilt = prometheus.NewCounter( prometheus.CounterOpts{ Name: "fetchml_prewarm_snapshot_built_total", Help: "Total number of snapshots prewarmed into .prewarm/", }, ) m.prewarmSnapshotTime = prometheus.NewHistogram( prometheus.HistogramOpts{ Name: "fetchml_prewarm_snapshot_duration_seconds", Help: "Time spent prewarming snapshots", Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60, 120}, }, ) // Register all metrics m.registry.MustRegister( m.wsConnections, m.wsMessages, m.wsDuration, m.wsErrors, m.jobsQueued, m.jobsCompleted, m.jobsActive, m.jobDuration, m.queueLength, m.jupyterServices, m.jupyterOps, m.httpRequests, m.httpDuration, m.prewarmSnapshotHit, m.prewarmSnapshotMiss, m.prewarmSnapshotBuilt, m.prewarmSnapshotTime, ) } // Handler returns the Prometheus HTTP handler func (m *Metrics) Handler() http.Handler { return promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}) } // WebSocket metrics methods func (m *Metrics) IncWSConnections(status string) { m.wsConnections.WithLabelValues(status).Inc() } func (m *Metrics) DecWSConnections(status string) { m.wsConnections.WithLabelValues(status).Dec() } func (m *Metrics) IncWSMessages(opcode, status string) { m.wsMessages.WithLabelValues(opcode, status).Inc() } func (m *Metrics) ObserveWSDuration(opcode string, duration time.Duration) { m.wsDuration.WithLabelValues(opcode).Observe(duration.Seconds()) } func (m *Metrics) IncWSErrors(errType string) { m.wsErrors.WithLabelValues(errType).Inc() } // Job queue metrics methods func (m *Metrics) IncJobsQueued() { m.jobsQueued.Inc() } func (m *Metrics) IncJobsCompleted(status string) { m.jobsCompleted.WithLabelValues(status).Inc() } func (m *Metrics) SetJobsActive(count float64) { m.jobsActive.Set(count) } func (m *Metrics) ObserveJobDuration(status string, duration time.Duration) { m.jobDuration.WithLabelValues(status).Observe(duration.Seconds()) } func (m *Metrics) SetQueueLength(length float64) { m.queueLength.Set(length) } // Jupyter metrics methods func (m *Metrics) SetJupyterServices(status string, count float64) { m.jupyterServices.WithLabelValues(status).Set(count) } func (m *Metrics) IncJupyterOps(operation, status string) { m.jupyterOps.WithLabelValues(operation, status).Inc() } // HTTP metrics methods func (m *Metrics) IncHTTPRequests(method, endpoint, status string) { m.httpRequests.WithLabelValues(method, endpoint, status).Inc() } func (m *Metrics) ObserveHTTPDuration(method, endpoint string, duration time.Duration) { m.httpDuration.WithLabelValues(method, endpoint).Observe(duration.Seconds()) } // Prewarm metrics methods func (m *Metrics) IncPrewarmSnapshotHit() { m.prewarmSnapshotHit.Inc() } func (m *Metrics) IncPrewarmSnapshotMiss() { m.prewarmSnapshotMiss.Inc() } func (m *Metrics) IncPrewarmSnapshotBuilt() { m.prewarmSnapshotBuilt.Inc() } func (m *Metrics) ObservePrewarmSnapshotDuration(duration time.Duration) { m.prewarmSnapshotTime.Observe(duration.Seconds()) }