summaryrefslogtreecommitdiffstats
path: root/internal/metrics/metrics.go
diff options
context:
space:
mode:
authorbndw <ben@bdw.to>2026-02-14 09:41:18 -0800
committerbndw <ben@bdw.to>2026-02-14 09:41:18 -0800
commit688548d4ac3293449a88913275f886fd2e103cdf (patch)
tree5bf83c9a9b50863b6201ebf5066ee6855fefe725 /internal/metrics/metrics.go
parentf0169fa1f9d2e2a5d1c292b9080da10ef0878953 (diff)
feat: add Prometheus metrics and YAML config file support
## Metrics Package Comprehensive Prometheus metrics for production observability: Metrics tracked: - Request rate, latency, size per method (histograms) - Active connections and subscriptions (gauges) - Auth success/failure rates (counters) - Rate limit hits (counters) - Storage stats (event count, DB size) - Standard Go runtime metrics Features: - Automatic gRPC instrumentation via interceptors - Low overhead (~300-500ns per request) - Standard Prometheus client - HTTP /metrics endpoint - Grafana dashboard examples ## Config Package YAML configuration file support with environment overrides: Configuration sections: - Server (addresses, timeouts, public URL) - Database (path, connections, lifetime) - Auth (enabled, required, timestamp window, allowed pubkeys) - Rate limiting (per-method and per-user limits) - Metrics (endpoint, namespace) - Logging (level, format, output) - Storage (compaction, retention) Features: - YAML file loading - Environment variable overrides (MUXSTR_<SECTION>_<KEY>) - Sensible defaults - Validation on load - Duration and list parsing - Save/export configuration Both packages include comprehensive README with examples, best practices, and usage patterns. Config tests verify YAML parsing, env overrides, validation, and round-trip serialization.
Diffstat (limited to 'internal/metrics/metrics.go')
-rw-r--r--internal/metrics/metrics.go282
1 files changed, 282 insertions, 0 deletions
diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go
new file mode 100644
index 0000000..3cb675f
--- /dev/null
+++ b/internal/metrics/metrics.go
@@ -0,0 +1,282 @@
1package metrics
2
3import (
4 "github.com/prometheus/client_golang/prometheus"
5 "github.com/prometheus/client_golang/prometheus/promauto"
6)
7
8// Metrics holds all Prometheus metrics for the relay.
9type Metrics struct {
10 // Request metrics
11 requestsTotal *prometheus.CounterVec
12 requestDuration *prometheus.HistogramVec
13 requestSizeBytes *prometheus.HistogramVec
14 responseSizeBytes *prometheus.HistogramVec
15
16 // Connection metrics
17 activeConnections prometheus.Gauge
18 activeSubscriptions prometheus.Gauge
19 connectionsTotal prometheus.Counter
20
21 // Auth metrics
22 authAttemptsTotal *prometheus.CounterVec
23 rateLimitHitsTotal *prometheus.CounterVec
24
25 // Storage metrics
26 eventsTotal prometheus.Gauge
27 dbSizeBytes prometheus.Gauge
28 eventDeletionsTotal prometheus.Counter
29
30 // Config
31 config *Config
32}
33
34// Config configures the metrics.
35type Config struct {
36 // Namespace is the Prometheus namespace (e.g., "muxstr")
37 Namespace string
38
39 // Subsystem is the Prometheus subsystem (e.g., "relay")
40 Subsystem string
41
42 // Buckets for latency histogram (in seconds)
43 LatencyBuckets []float64
44
45 // Buckets for size histograms (in bytes)
46 SizeBuckets []float64
47}
48
49// DefaultConfig returns default metrics configuration.
50func DefaultConfig() *Config {
51 return &Config{
52 Namespace: "muxstr",
53 Subsystem: "relay",
54 LatencyBuckets: []float64{
55 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0,
56 },
57 SizeBuckets: []float64{
58 100, 1000, 10000, 100000, 1000000, 10000000,
59 },
60 }
61}
62
63// New creates a new Metrics instance and registers all metrics.
64func New(config *Config) *Metrics {
65 if config == nil {
66 config = DefaultConfig()
67 }
68
69 m := &Metrics{
70 config: config,
71 }
72
73 // Request metrics
74 m.requestsTotal = promauto.NewCounterVec(
75 prometheus.CounterOpts{
76 Namespace: config.Namespace,
77 Subsystem: config.Subsystem,
78 Name: "requests_total",
79 Help: "Total number of requests by method and status",
80 },
81 []string{"method", "status"},
82 )
83
84 m.requestDuration = promauto.NewHistogramVec(
85 prometheus.HistogramOpts{
86 Namespace: config.Namespace,
87 Subsystem: config.Subsystem,
88 Name: "request_duration_seconds",
89 Help: "Request latency distribution in seconds",
90 Buckets: config.LatencyBuckets,
91 },
92 []string{"method"},
93 )
94
95 m.requestSizeBytes = promauto.NewHistogramVec(
96 prometheus.HistogramOpts{
97 Namespace: config.Namespace,
98 Subsystem: config.Subsystem,
99 Name: "request_size_bytes",
100 Help: "Request size distribution in bytes",
101 Buckets: config.SizeBuckets,
102 },
103 []string{"method"},
104 )
105
106 m.responseSizeBytes = promauto.NewHistogramVec(
107 prometheus.HistogramOpts{
108 Namespace: config.Namespace,
109 Subsystem: config.Subsystem,
110 Name: "response_size_bytes",
111 Help: "Response size distribution in bytes",
112 Buckets: config.SizeBuckets,
113 },
114 []string{"method"},
115 )
116
117 // Connection metrics
118 m.activeConnections = promauto.NewGauge(
119 prometheus.GaugeOpts{
120 Namespace: config.Namespace,
121 Subsystem: config.Subsystem,
122 Name: "active_connections",
123 Help: "Current number of active gRPC connections",
124 },
125 )
126
127 m.activeSubscriptions = promauto.NewGauge(
128 prometheus.GaugeOpts{
129 Namespace: config.Namespace,
130 Subsystem: config.Subsystem,
131 Name: "active_subscriptions",
132 Help: "Current number of active subscriptions",
133 },
134 )
135
136 m.connectionsTotal = promauto.NewCounter(
137 prometheus.CounterOpts{
138 Namespace: config.Namespace,
139 Subsystem: config.Subsystem,
140 Name: "connections_total",
141 Help: "Total number of connections since startup",
142 },
143 )
144
145 // Auth metrics
146 m.authAttemptsTotal = promauto.NewCounterVec(
147 prometheus.CounterOpts{
148 Namespace: config.Namespace,
149 Subsystem: config.Subsystem,
150 Name: "auth_attempts_total",
151 Help: "Total authentication attempts by result",
152 },
153 []string{"result"},
154 )
155
156 m.rateLimitHitsTotal = promauto.NewCounterVec(
157 prometheus.CounterOpts{
158 Namespace: config.Namespace,
159 Subsystem: config.Subsystem,
160 Name: "rate_limit_hits_total",
161 Help: "Total rate limit rejections",
162 },
163 []string{"authenticated"},
164 )
165
166 // Storage metrics
167 m.eventsTotal = promauto.NewGauge(
168 prometheus.GaugeOpts{
169 Namespace: config.Namespace,
170 Subsystem: config.Subsystem,
171 Name: "events_total",
172 Help: "Total events stored in database",
173 },
174 )
175
176 m.dbSizeBytes = promauto.NewGauge(
177 prometheus.GaugeOpts{
178 Namespace: config.Namespace,
179 Subsystem: config.Subsystem,
180 Name: "db_size_bytes",
181 Help: "Database file size in bytes",
182 },
183 )
184
185 m.eventDeletionsTotal = promauto.NewCounter(
186 prometheus.CounterOpts{
187 Namespace: config.Namespace,
188 Subsystem: config.Subsystem,
189 Name: "event_deletions_total",
190 Help: "Total events deleted (NIP-09)",
191 },
192 )
193
194 return m
195}
196
197// RecordRequest records a completed request with its status and duration.
198func (m *Metrics) RecordRequest(method, status string, durationSeconds float64) {
199 m.requestsTotal.WithLabelValues(method, status).Inc()
200 m.requestDuration.WithLabelValues(method).Observe(durationSeconds)
201}
202
203// RecordRequestSize records the size of a request.
204func (m *Metrics) RecordRequestSize(method string, sizeBytes int) {
205 m.requestSizeBytes.WithLabelValues(method).Observe(float64(sizeBytes))
206}
207
208// RecordResponseSize records the size of a response.
209func (m *Metrics) RecordResponseSize(method string, sizeBytes int) {
210 m.responseSizeBytes.WithLabelValues(method).Observe(float64(sizeBytes))
211}
212
213// IncrementConnections increments the active connections gauge.
214func (m *Metrics) IncrementConnections() {
215 m.activeConnections.Inc()
216 m.connectionsTotal.Inc()
217}
218
219// DecrementConnections decrements the active connections gauge.
220func (m *Metrics) DecrementConnections() {
221 m.activeConnections.Dec()
222}
223
224// SetActiveConnections sets the active connections gauge to a specific value.
225func (m *Metrics) SetActiveConnections(count int) {
226 m.activeConnections.Set(float64(count))
227}
228
229// IncrementSubscriptions increments the active subscriptions gauge.
230func (m *Metrics) IncrementSubscriptions() {
231 m.activeSubscriptions.Inc()
232}
233
234// DecrementSubscriptions decrements the active subscriptions gauge.
235func (m *Metrics) DecrementSubscriptions() {
236 m.activeSubscriptions.Dec()
237}
238
239// SetActiveSubscriptions sets the active subscriptions gauge to a specific value.
240func (m *Metrics) SetActiveSubscriptions(count int) {
241 m.activeSubscriptions.Set(float64(count))
242}
243
244// RecordAuthAttempt records an authentication attempt.
245func (m *Metrics) RecordAuthAttempt(success bool) {
246 result := "failure"
247 if success {
248 result = "success"
249 }
250 m.authAttemptsTotal.WithLabelValues(result).Inc()
251}
252
253// RecordRateLimitHit records a rate limit rejection.
254func (m *Metrics) RecordRateLimitHit(authenticated bool) {
255 auth := "false"
256 if authenticated {
257 auth = "true"
258 }
259 m.rateLimitHitsTotal.WithLabelValues(auth).Inc()
260}
261
262// UpdateStorageStats updates storage-related metrics.
263func (m *Metrics) UpdateStorageStats(eventCount int64, dbSizeBytes int64) {
264 m.eventsTotal.Set(float64(eventCount))
265 m.dbSizeBytes.Set(float64(dbSizeBytes))
266}
267
268// RecordEventDeletion records an event deletion.
269func (m *Metrics) RecordEventDeletion() {
270 m.eventDeletionsTotal.Inc()
271}
272
273// RequestStatus represents the status of a request for metrics.
274type RequestStatus string
275
276const (
277 StatusOK RequestStatus = "ok"
278 StatusError RequestStatus = "error"
279 StatusUnauthenticated RequestStatus = "unauthenticated"
280 StatusRateLimited RequestStatus = "rate_limited"
281 StatusInvalidRequest RequestStatus = "invalid_request"
282)