From 688548d4ac3293449a88913275f886fd2e103cdf Mon Sep 17 00:00:00 2001 From: bndw Date: Sat, 14 Feb 2026 09:41:18 -0800 Subject: feat: add Prometheus metrics and YAML config file support ## Metrics Package Comprehensive Prometheus metrics for production observability: Metrics tracked: - Request rate, latency, size per method (histograms) - Active connections and subscriptions (gauges) - Auth success/failure rates (counters) - Rate limit hits (counters) - Storage stats (event count, DB size) - Standard Go runtime metrics Features: - Automatic gRPC instrumentation via interceptors - Low overhead (~300-500ns per request) - Standard Prometheus client - HTTP /metrics endpoint - Grafana dashboard examples ## Config Package YAML configuration file support with environment overrides: Configuration sections: - Server (addresses, timeouts, public URL) - Database (path, connections, lifetime) - Auth (enabled, required, timestamp window, allowed pubkeys) - Rate limiting (per-method and per-user limits) - Metrics (endpoint, namespace) - Logging (level, format, output) - Storage (compaction, retention) Features: - YAML file loading - Environment variable overrides (MUXSTR_
_) - Sensible defaults - Validation on load - Duration and list parsing - Save/export configuration Both packages include comprehensive README with examples, best practices, and usage patterns. Config tests verify YAML parsing, env overrides, validation, and round-trip serialization. --- internal/metrics/metrics.go | 282 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 internal/metrics/metrics.go (limited to 'internal/metrics/metrics.go') diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..3cb675f --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,282 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +// Metrics holds all Prometheus metrics for the relay. +type Metrics struct { + // Request metrics + requestsTotal *prometheus.CounterVec + requestDuration *prometheus.HistogramVec + requestSizeBytes *prometheus.HistogramVec + responseSizeBytes *prometheus.HistogramVec + + // Connection metrics + activeConnections prometheus.Gauge + activeSubscriptions prometheus.Gauge + connectionsTotal prometheus.Counter + + // Auth metrics + authAttemptsTotal *prometheus.CounterVec + rateLimitHitsTotal *prometheus.CounterVec + + // Storage metrics + eventsTotal prometheus.Gauge + dbSizeBytes prometheus.Gauge + eventDeletionsTotal prometheus.Counter + + // Config + config *Config +} + +// Config configures the metrics. +type Config struct { + // Namespace is the Prometheus namespace (e.g., "muxstr") + Namespace string + + // Subsystem is the Prometheus subsystem (e.g., "relay") + Subsystem string + + // Buckets for latency histogram (in seconds) + LatencyBuckets []float64 + + // Buckets for size histograms (in bytes) + SizeBuckets []float64 +} + +// DefaultConfig returns default metrics configuration. +func DefaultConfig() *Config { + return &Config{ + Namespace: "muxstr", + Subsystem: "relay", + LatencyBuckets: []float64{ + 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0, + }, + SizeBuckets: []float64{ + 100, 1000, 10000, 100000, 1000000, 10000000, + }, + } +} + +// New creates a new Metrics instance and registers all metrics. +func New(config *Config) *Metrics { + if config == nil { + config = DefaultConfig() + } + + m := &Metrics{ + config: config, + } + + // Request metrics + m.requestsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "requests_total", + Help: "Total number of requests by method and status", + }, + []string{"method", "status"}, + ) + + m.requestDuration = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "request_duration_seconds", + Help: "Request latency distribution in seconds", + Buckets: config.LatencyBuckets, + }, + []string{"method"}, + ) + + m.requestSizeBytes = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "request_size_bytes", + Help: "Request size distribution in bytes", + Buckets: config.SizeBuckets, + }, + []string{"method"}, + ) + + m.responseSizeBytes = promauto.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "response_size_bytes", + Help: "Response size distribution in bytes", + Buckets: config.SizeBuckets, + }, + []string{"method"}, + ) + + // Connection metrics + m.activeConnections = promauto.NewGauge( + prometheus.GaugeOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "active_connections", + Help: "Current number of active gRPC connections", + }, + ) + + m.activeSubscriptions = promauto.NewGauge( + prometheus.GaugeOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "active_subscriptions", + Help: "Current number of active subscriptions", + }, + ) + + m.connectionsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "connections_total", + Help: "Total number of connections since startup", + }, + ) + + // Auth metrics + m.authAttemptsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "auth_attempts_total", + Help: "Total authentication attempts by result", + }, + []string{"result"}, + ) + + m.rateLimitHitsTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "rate_limit_hits_total", + Help: "Total rate limit rejections", + }, + []string{"authenticated"}, + ) + + // Storage metrics + m.eventsTotal = promauto.NewGauge( + prometheus.GaugeOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "events_total", + Help: "Total events stored in database", + }, + ) + + m.dbSizeBytes = promauto.NewGauge( + prometheus.GaugeOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "db_size_bytes", + Help: "Database file size in bytes", + }, + ) + + m.eventDeletionsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Namespace: config.Namespace, + Subsystem: config.Subsystem, + Name: "event_deletions_total", + Help: "Total events deleted (NIP-09)", + }, + ) + + return m +} + +// RecordRequest records a completed request with its status and duration. +func (m *Metrics) RecordRequest(method, status string, durationSeconds float64) { + m.requestsTotal.WithLabelValues(method, status).Inc() + m.requestDuration.WithLabelValues(method).Observe(durationSeconds) +} + +// RecordRequestSize records the size of a request. +func (m *Metrics) RecordRequestSize(method string, sizeBytes int) { + m.requestSizeBytes.WithLabelValues(method).Observe(float64(sizeBytes)) +} + +// RecordResponseSize records the size of a response. +func (m *Metrics) RecordResponseSize(method string, sizeBytes int) { + m.responseSizeBytes.WithLabelValues(method).Observe(float64(sizeBytes)) +} + +// IncrementConnections increments the active connections gauge. +func (m *Metrics) IncrementConnections() { + m.activeConnections.Inc() + m.connectionsTotal.Inc() +} + +// DecrementConnections decrements the active connections gauge. +func (m *Metrics) DecrementConnections() { + m.activeConnections.Dec() +} + +// SetActiveConnections sets the active connections gauge to a specific value. +func (m *Metrics) SetActiveConnections(count int) { + m.activeConnections.Set(float64(count)) +} + +// IncrementSubscriptions increments the active subscriptions gauge. +func (m *Metrics) IncrementSubscriptions() { + m.activeSubscriptions.Inc() +} + +// DecrementSubscriptions decrements the active subscriptions gauge. +func (m *Metrics) DecrementSubscriptions() { + m.activeSubscriptions.Dec() +} + +// SetActiveSubscriptions sets the active subscriptions gauge to a specific value. +func (m *Metrics) SetActiveSubscriptions(count int) { + m.activeSubscriptions.Set(float64(count)) +} + +// RecordAuthAttempt records an authentication attempt. +func (m *Metrics) RecordAuthAttempt(success bool) { + result := "failure" + if success { + result = "success" + } + m.authAttemptsTotal.WithLabelValues(result).Inc() +} + +// RecordRateLimitHit records a rate limit rejection. +func (m *Metrics) RecordRateLimitHit(authenticated bool) { + auth := "false" + if authenticated { + auth = "true" + } + m.rateLimitHitsTotal.WithLabelValues(auth).Inc() +} + +// UpdateStorageStats updates storage-related metrics. +func (m *Metrics) UpdateStorageStats(eventCount int64, dbSizeBytes int64) { + m.eventsTotal.Set(float64(eventCount)) + m.dbSizeBytes.Set(float64(dbSizeBytes)) +} + +// RecordEventDeletion records an event deletion. +func (m *Metrics) RecordEventDeletion() { + m.eventDeletionsTotal.Inc() +} + +// RequestStatus represents the status of a request for metrics. +type RequestStatus string + +const ( + StatusOK RequestStatus = "ok" + StatusError RequestStatus = "error" + StatusUnauthenticated RequestStatus = "unauthenticated" + StatusRateLimited RequestStatus = "rate_limited" + StatusInvalidRequest RequestStatus = "invalid_request" +) -- cgit v1.2.3