grpc: client/server histogram bucket change (#7591)

Changes the default grpc client/server histogram buckets from the
defaults to better track the long tail of slow requests. Removes `.005`
and `.25` granularity in favor of adding the larger values of `45` and `90`
to avoid changing the cardinality.

```
# Before, the default prometheus buckets
[]float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}

# After
[]float64{.01, .025, .05, .1, .5, 1, 2.5, 5, 10, 45, 90}
```

Fixes https://github.com/letsencrypt/boulder/issues/6384
This commit is contained in:
Phil Porada 2024-07-16 13:36:57 -04:00 committed by GitHub
parent f2e46486f9
commit 07d6713736
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 2 deletions

View File

@ -85,7 +85,9 @@ type clientMetrics struct {
func newClientMetrics(stats prometheus.Registerer) (clientMetrics, error) {
// Create the grpc prometheus client metrics instance and register it
grpcMetrics := grpc_prometheus.NewClientMetrics(
grpc_prometheus.WithClientHandlingTimeHistogram(),
grpc_prometheus.WithClientHandlingTimeHistogram(
grpc_prometheus.WithHistogramBuckets([]float64{.01, .025, .05, .1, .5, 1, 2.5, 5, 10, 45, 90}),
),
)
err := stats.Register(grpcMetrics)
if err != nil {

View File

@ -292,7 +292,9 @@ type serverMetrics struct {
func newServerMetrics(stats prometheus.Registerer) (serverMetrics, error) {
// Create the grpc prometheus server metrics instance and register it
grpcMetrics := grpc_prometheus.NewServerMetrics(
grpc_prometheus.WithServerHandlingTimeHistogram(),
grpc_prometheus.WithServerHandlingTimeHistogram(
grpc_prometheus.WithHistogramBuckets([]float64{.01, .025, .05, .1, .5, 1, 2.5, 5, 10, 45, 90}),
),
)
err := stats.Register(grpcMetrics)
if err != nil {