ratelimits: Export override utilization metrics (#7044)

Fixes #7036
This commit is contained in:
Samantha 2023-08-23 13:40:23 -04:00 committed by GitHub
parent 4ed54ff9c6
commit 077a4e2dc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 5 deletions

View File

@ -34,6 +34,10 @@ type limit struct {
// bucket to go from empty to full (burst * (period / count)). This is
// precomputed to avoid doing the same calculation on every request.
burstOffset int64
// isOverride is true if this limit is an override limit, false if it is a
// default limit.
isOverride bool
}
func precomputeLimit(l limit) limit {
@ -131,6 +135,7 @@ func loadAndParseOverrideLimits(path string) (limits, error) {
fqdnSet := core.HashNames(domains)
id = fmt.Sprintf("%s:%s", regId, fqdnSet)
}
v.isOverride = true
parsed[bucketKey(name, id)] = precomputeLimit(v)
}
return parsed, nil

View File

@ -7,6 +7,7 @@ import (
"time"
"github.com/jmhodges/clock"
"github.com/prometheus/client_golang/prometheus"
)
// ErrInvalidCost indicates that the cost specified was <= 0.
@ -34,13 +35,15 @@ type Limiter struct {
// source is used to store buckets. It must be safe for concurrent use.
source source
clk clock.Clock
overrideUsageGauge *prometheus.GaugeVec
}
// NewLimiter returns a new *Limiter. The provided source must be safe for
// concurrent use. The defaults and overrides paths are expected to be paths to
// YAML files that contain the default and override limits, respectively. The
// overrides file is optional, all other arguments are required.
func NewLimiter(clk clock.Clock, source source, defaults, overrides string) (*Limiter, error) {
func NewLimiter(clk clock.Clock, source source, defaults, overrides string, stats prometheus.Registerer) (*Limiter, error) {
limiter := &Limiter{source: source, clk: clk}
var err error
@ -60,6 +63,12 @@ func NewLimiter(clk clock.Clock, source source, defaults, overrides string) (*Li
return nil, err
}
limiter.overrideUsageGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "ratelimits_override_usage",
Help: "Proportion of override limit used, by limit name and client id.",
}, []string{"limit_name", "client_id"})
stats.MustRegister(limiter.overrideUsageGauge)
return limiter, nil
}
@ -160,6 +169,13 @@ func (l *Limiter) Spend(ctx context.Context, name Name, id string, cost int64) (
d := maybeSpend(l.clk, limit, tat, cost)
if limit.isOverride {
// Calculate the current utilization of the override limit for the
// specified client id.
utilization := float64(limit.Burst-d.Remaining) / float64(limit.Burst)
l.overrideUsageGauge.WithLabelValues(nameToString[name], id).Set(utilization)
}
if !d.Allowed {
return d, nil
}

View File

@ -8,7 +8,9 @@ import (
"time"
"github.com/jmhodges/clock"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/test"
"github.com/prometheus/client_golang/prometheus"
)
// tenZeroZeroTwo is overridden in 'testdata/working_override.yml' to have
@ -19,7 +21,7 @@ const tenZeroZeroTwo = "10.0.0.2"
// - 'NewRegistrationsPerIPAddress' burst: 20 count: 20 period: 1s
// - 'NewRegistrationsPerIPAddress:10.0.0.2' burst: 40 count: 40 period: 1s
func newTestLimiter(t *testing.T, s source, clk clock.FakeClock) *Limiter {
l, err := NewLimiter(clk, s, "testdata/working_default.yml", "testdata/working_override.yml")
l, err := NewLimiter(clk, s, "testdata/working_default.yml", "testdata/working_override.yml", metrics.NoopRegisterer)
test.AssertNotError(t, err, "should not error")
return l
}
@ -44,16 +46,16 @@ func setup(t *testing.T) (context.Context, map[string]*Limiter, clock.FakeClock,
func Test_Limiter_WithBadLimitsPath(t *testing.T) {
t.Parallel()
_, err := NewLimiter(clock.NewFake(), newInmem(), "testdata/does-not-exist.yml", "")
_, err := NewLimiter(clock.NewFake(), newInmem(), "testdata/does-not-exist.yml", "", metrics.NoopRegisterer)
test.AssertError(t, err, "should error")
_, err = NewLimiter(clock.NewFake(), newInmem(), "testdata/defaults.yml", "testdata/does-not-exist.yml")
_, err = NewLimiter(clock.NewFake(), newInmem(), "testdata/defaults.yml", "testdata/does-not-exist.yml", metrics.NoopRegisterer)
test.AssertError(t, err, "should error")
}
func Test_Limiter_getLimitNoExist(t *testing.T) {
t.Parallel()
l, err := NewLimiter(clock.NewFake(), newInmem(), "testdata/working_default.yml", "")
l, err := NewLimiter(clock.NewFake(), newInmem(), "testdata/working_default.yml", "", metrics.NoopRegisterer)
test.AssertNotError(t, err, "should not error")
_, err = l.getLimit(Name(9999), "")
test.AssertError(t, err, "should error")
@ -76,6 +78,11 @@ func Test_Limiter_CheckWithLimitOverrides(t *testing.T) {
testCtx, limiters, clk, _ := setup(t)
for name, l := range limiters {
t.Run(name, func(t *testing.T) {
// Verify our overrideUsageGauge is being set correctly. 0.0 == 0% of
// the bucket has been consumed.
test.AssertMetricWithLabelsEquals(t, l.overrideUsageGauge, prometheus.Labels{
"limit_name": nameToString[NewRegistrationsPerIPAddress], "client_id": tenZeroZeroTwo}, 0)
// Attempt to check a spend of 41 requests (a cost > the limit burst
// capacity), this should fail with a specific error.
_, err := l.Check(testCtx, NewRegistrationsPerIPAddress, tenZeroZeroTwo, 41)
@ -98,6 +105,11 @@ func Test_Limiter_CheckWithLimitOverrides(t *testing.T) {
test.AssertEquals(t, d.Remaining, int64(0))
test.AssertEquals(t, d.ResetIn, time.Second)
// Verify our overrideUsageGauge is being set correctly. 1.0 == 100% of
// the bucket has been consumed.
test.AssertMetricWithLabelsEquals(t, l.overrideUsageGauge, prometheus.Labels{
"limit_name": nameToString[NewRegistrationsPerIPAddress], "client_id": tenZeroZeroTwo}, 1.0)
// Verify our RetryIn is correct. 1 second == 1000 milliseconds and
// 1000/40 = 25 milliseconds per request.
test.AssertEquals(t, d.RetryIn, time.Millisecond*25)