RA: Use the same key/id as the override (#7098)
Fix the overrideUsageGauge implementation added in #7076 that resulted in metric cardinality explosion.
This commit is contained in:
parent
574c5cfa9b
commit
162458ff52
38
ra/ra.go
38
ra/ra.go
|
|
@ -385,11 +385,11 @@ func (ra *RegistrationAuthorityImpl) checkRegistrationIPLimit(ctx context.Contex
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
threshold, isOverridden := limit.GetThreshold(ip.String(), noRegistrationID)
|
threshold, overrideKey := limit.GetThreshold(ip.String(), noRegistrationID)
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
// We do not support overrides for the NewRegistrationsPerIPRange limit.
|
// We do not support overrides for the NewRegistrationsPerIPRange limit.
|
||||||
utilization := float64(count.Count) / float64(threshold)
|
utilization := float64(count.Count) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.RegistrationsPerIP, ip.String()).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.RegistrationsPerIP, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
|
|
||||||
if count.Count >= threshold {
|
if count.Count >= threshold {
|
||||||
|
|
@ -582,7 +582,7 @@ func (ra *RegistrationAuthorityImpl) validateContacts(contacts []string) error {
|
||||||
func (ra *RegistrationAuthorityImpl) checkPendingAuthorizationLimit(ctx context.Context, regID int64, limit ratelimit.RateLimitPolicy) error {
|
func (ra *RegistrationAuthorityImpl) checkPendingAuthorizationLimit(ctx context.Context, regID int64, limit ratelimit.RateLimitPolicy) error {
|
||||||
// This rate limit's threshold can only be overridden on a per-regID basis,
|
// This rate limit's threshold can only be overridden on a per-regID basis,
|
||||||
// not based on any other key.
|
// not based on any other key.
|
||||||
threshold, isOverridden := limit.GetThreshold("", regID)
|
threshold, overrideKey := limit.GetThreshold("", regID)
|
||||||
if threshold == -1 {
|
if threshold == -1 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -592,9 +592,9 @@ func (ra *RegistrationAuthorityImpl) checkPendingAuthorizationLimit(ctx context.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
utilization := float64(countPB.Count) / float64(threshold)
|
utilization := float64(countPB.Count) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, strconv.FormatInt(regID, 10)).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
if countPB.Count >= threshold {
|
if countPB.Count >= threshold {
|
||||||
ra.log.Infof("Rate limit exceeded, PendingAuthorizationsByRegID, regID: %d", regID)
|
ra.log.Infof("Rate limit exceeded, PendingAuthorizationsByRegID, regID: %d", regID)
|
||||||
|
|
@ -642,10 +642,10 @@ func (ra *RegistrationAuthorityImpl) checkInvalidAuthorizationLimit(ctx context.
|
||||||
// Most rate limits have a key for overrides, but there is no meaningful key
|
// Most rate limits have a key for overrides, but there is no meaningful key
|
||||||
// here.
|
// here.
|
||||||
noKey := ""
|
noKey := ""
|
||||||
threshold, isOverridden := limit.GetThreshold(noKey, regID)
|
threshold, overrideKey := limit.GetThreshold(noKey, regID)
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
utilization := float64(count.Count) / float64(threshold)
|
utilization := float64(count.Count) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, strconv.FormatInt(regID, 10)).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
if count.Count >= threshold {
|
if count.Count >= threshold {
|
||||||
ra.log.Infof("Rate limit exceeded, InvalidAuthorizationsByRegID, regID: %d", regID)
|
ra.log.Infof("Rate limit exceeded, InvalidAuthorizationsByRegID, regID: %d", regID)
|
||||||
|
|
@ -671,10 +671,10 @@ func (ra *RegistrationAuthorityImpl) checkNewOrdersPerAccountLimit(ctx context.C
|
||||||
}
|
}
|
||||||
// There is no meaningful override key to use for this rate limit
|
// There is no meaningful override key to use for this rate limit
|
||||||
noKey := ""
|
noKey := ""
|
||||||
threshold, isOverridden := limit.GetThreshold(noKey, acctID)
|
threshold, overrideKey := limit.GetThreshold(noKey, acctID)
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
utilization := float64(count.Count) / float64(threshold)
|
utilization := float64(count.Count) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, strconv.FormatInt(acctID, 10)).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
|
|
||||||
if count.Count >= threshold {
|
if count.Count >= threshold {
|
||||||
|
|
@ -1413,11 +1413,10 @@ func (ra *RegistrationAuthorityImpl) enforceNameCounts(ctx context.Context, name
|
||||||
// over the names slice input to ensure the order of badNames will
|
// over the names slice input to ensure the order of badNames will
|
||||||
// return the badNames in the same order they were input.
|
// return the badNames in the same order they were input.
|
||||||
for _, name := range names {
|
for _, name := range names {
|
||||||
threshold, isOverridden := limit.GetThreshold(name, regID)
|
threshold, overrideKey := limit.GetThreshold(name, regID)
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
clientId := fmt.Sprintf("%d:%s", regID, name)
|
|
||||||
utilization := float64(response.Counts[name]) / float64(threshold)
|
utilization := float64(response.Counts[name]) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, clientId).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
if response.Counts[name] >= threshold {
|
if response.Counts[name] >= threshold {
|
||||||
badNames = append(badNames, name)
|
badNames = append(badNames, name)
|
||||||
|
|
@ -1470,7 +1469,7 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerNameLimit(ctx context.C
|
||||||
|
|
||||||
func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx context.Context, names []string, limit ratelimit.RateLimitPolicy, regID int64) error {
|
func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx context.Context, names []string, limit ratelimit.RateLimitPolicy, regID int64) error {
|
||||||
names = core.UniqueLowerNames(names)
|
names = core.UniqueLowerNames(names)
|
||||||
threshold, isOverridden := limit.GetThreshold(strings.Join(names, ","), regID)
|
threshold, overrideKey := limit.GetThreshold(strings.Join(names, ","), regID)
|
||||||
if threshold <= 0 {
|
if threshold <= 0 {
|
||||||
// No limit configured.
|
// No limit configured.
|
||||||
return nil
|
return nil
|
||||||
|
|
@ -1484,10 +1483,9 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
|
||||||
return fmt.Errorf("checking duplicate certificate limit for %q: %s", names, err)
|
return fmt.Errorf("checking duplicate certificate limit for %q: %s", names, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if isOverridden {
|
if overrideKey != "" {
|
||||||
clientId := fmt.Sprintf("%d:%s", regID, strings.Join(names, ","))
|
|
||||||
utilization := float64(len(prevIssuances.TimestampsNS)) / float64(threshold)
|
utilization := float64(len(prevIssuances.TimestampsNS)) / float64(threshold)
|
||||||
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, clientId).Set(utilization)
|
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
|
||||||
}
|
}
|
||||||
|
|
||||||
if int64(len(prevIssuances.TimestampsNS)) < threshold {
|
if int64(len(prevIssuances.TimestampsNS)) < threshold {
|
||||||
|
|
|
||||||
|
|
@ -1235,9 +1235,9 @@ func TestCheckCertificatesPerNameLimit(t *testing.T) {
|
||||||
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "example.com", "good-example.com"}, rlp, 99)
|
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "example.com", "good-example.com"}, rlp, 99)
|
||||||
test.AssertError(t, err, "incorrectly failed to rate limit example.com")
|
test.AssertError(t, err, "incorrectly failed to rate limit example.com")
|
||||||
test.AssertErrorIs(t, err, berrors.RateLimit)
|
test.AssertErrorIs(t, err, berrors.RateLimit)
|
||||||
// There are no overrides for "99:example.com", so the override usage gauge
|
// There are no overrides for "example.com", so the override usage gauge
|
||||||
// should contain 0 entries with labels matching it.
|
// should contain 0 entries with labels matching it.
|
||||||
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "99:example.com"}, 0)
|
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "example.com"}, 0)
|
||||||
// Verify it has no sub errors as there is only one bad name
|
// Verify it has no sub errors as there is only one bad name
|
||||||
test.AssertEquals(t, err.Error(), "too many certificates already issued for \"example.com\". Retry after 1970-01-01T23:00:00Z: see https://letsencrypt.org/docs/rate-limits/")
|
test.AssertEquals(t, err.Error(), "too many certificates already issued for \"example.com\". Retry after 1970-01-01T23:00:00Z: see https://letsencrypt.org/docs/rate-limits/")
|
||||||
var bErr *berrors.BoulderError
|
var bErr *berrors.BoulderError
|
||||||
|
|
@ -1265,10 +1265,10 @@ func TestCheckCertificatesPerNameLimit(t *testing.T) {
|
||||||
mockSA.nameCounts.Counts["bigissuer.com"] = 50
|
mockSA.nameCounts.Counts["bigissuer.com"] = 50
|
||||||
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
|
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
|
||||||
test.AssertNotError(t, err, "incorrectly rate limited bigissuer")
|
test.AssertNotError(t, err, "incorrectly rate limited bigissuer")
|
||||||
// "99:bigissuer.com" has an override of 100 and they've issued 50. We do
|
// "bigissuer.com" has an override of 100 and they've issued 50. We do
|
||||||
// not count issuance that has yet to occur, so we expect to see 50%
|
// not count issuance that has yet to occur, so we expect to see 50%
|
||||||
// utilization.
|
// utilization.
|
||||||
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "99:bigissuer.com"}, .5)
|
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, .5)
|
||||||
|
|
||||||
// Two base domains, one above its override
|
// Two base domains, one above its override
|
||||||
mockSA.nameCounts.Counts["example.com"] = 10
|
mockSA.nameCounts.Counts["example.com"] = 10
|
||||||
|
|
@ -1276,18 +1276,18 @@ func TestCheckCertificatesPerNameLimit(t *testing.T) {
|
||||||
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
|
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
|
||||||
test.AssertError(t, err, "incorrectly failed to rate limit bigissuer")
|
test.AssertError(t, err, "incorrectly failed to rate limit bigissuer")
|
||||||
test.AssertErrorIs(t, err, berrors.RateLimit)
|
test.AssertErrorIs(t, err, berrors.RateLimit)
|
||||||
// "99:bigissuer.com" has an override of 100 and they've issued 100. So we
|
// "bigissuer.com" has an override of 100 and they've issued 100. So we
|
||||||
// expect to see 100% utilization.
|
// expect to see 100% utilization.
|
||||||
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "99:bigissuer.com"}, 1)
|
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, 1)
|
||||||
|
|
||||||
// One base domain, above its override (which is below threshold)
|
// One base domain, above its override (which is below threshold)
|
||||||
mockSA.nameCounts.Counts["smallissuer.co.uk"] = 1
|
mockSA.nameCounts.Counts["smallissuer.co.uk"] = 1
|
||||||
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.smallissuer.co.uk"}, rlp, 99)
|
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.smallissuer.co.uk"}, rlp, 99)
|
||||||
test.AssertError(t, err, "incorrectly failed to rate limit smallissuer")
|
test.AssertError(t, err, "incorrectly failed to rate limit smallissuer")
|
||||||
test.AssertErrorIs(t, err, berrors.RateLimit)
|
test.AssertErrorIs(t, err, berrors.RateLimit)
|
||||||
// "99:smallissuer.co.uk" has an override of 1 and they've issued 1. So we
|
// "smallissuer.co.uk" has an override of 1 and they've issued 1. So we
|
||||||
// expect to see 100% utilization.
|
// expect to see 100% utilization.
|
||||||
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "99:smallissuer.co.uk"}, 1)
|
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "smallissuer.co.uk"}, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestCheckExactCertificateLimit tests that the duplicate certificate limit
|
// TestCheckExactCertificateLimit tests that the duplicate certificate limit
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package ratelimit
|
package ratelimit
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -223,32 +224,33 @@ func (rlp *RateLimitPolicy) Enabled() bool {
|
||||||
return rlp.Threshold != 0
|
return rlp.Threshold != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetThreshold returns the threshold for this rate limit and true if that
|
// GetThreshold returns the threshold for this rate limit and the override
|
||||||
// threshold is an override for the default limit, false otherwise. The
|
// Id/Key if that threshold is the result of an override for the default limit,
|
||||||
// threshold returned takes into account any overrides for `key` or `regID`. If
|
// empty-string otherwise. The threshold returned takes into account any
|
||||||
// both `key` and `regID` have an override the largest of the two will be used.
|
// overrides for `key` or `regID`. If both `key` and `regID` have an override
|
||||||
func (rlp *RateLimitPolicy) GetThreshold(key string, regID int64) (int64, bool) {
|
// the largest of the two will be used.
|
||||||
|
func (rlp *RateLimitPolicy) GetThreshold(key string, regID int64) (int64, string) {
|
||||||
regOverride, regOverrideExists := rlp.RegistrationOverrides[regID]
|
regOverride, regOverrideExists := rlp.RegistrationOverrides[regID]
|
||||||
keyOverride, keyOverrideExists := rlp.Overrides[key]
|
keyOverride, keyOverrideExists := rlp.Overrides[key]
|
||||||
|
|
||||||
if regOverrideExists && !keyOverrideExists {
|
if regOverrideExists && !keyOverrideExists {
|
||||||
// If there is a regOverride and no keyOverride use the regOverride
|
// If there is a regOverride and no keyOverride use the regOverride
|
||||||
return regOverride, true
|
return regOverride, strconv.FormatInt(regID, 10)
|
||||||
} else if !regOverrideExists && keyOverrideExists {
|
} else if !regOverrideExists && keyOverrideExists {
|
||||||
// If there is a keyOverride and no regOverride use the keyOverride
|
// If there is a keyOverride and no regOverride use the keyOverride
|
||||||
return keyOverride, true
|
return keyOverride, key
|
||||||
} else if regOverrideExists && keyOverrideExists {
|
} else if regOverrideExists && keyOverrideExists {
|
||||||
// If there is both a regOverride and a keyOverride use whichever is larger.
|
// If there is both a regOverride and a keyOverride use whichever is larger.
|
||||||
if regOverride > keyOverride {
|
if regOverride > keyOverride {
|
||||||
return regOverride, true
|
return regOverride, strconv.FormatInt(regID, 10)
|
||||||
} else {
|
} else {
|
||||||
return keyOverride, true
|
return keyOverride, key
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise there was no regOverride and no keyOverride, use the base
|
// Otherwise there was no regOverride and no keyOverride, use the base
|
||||||
// Threshold
|
// Threshold
|
||||||
return rlp.Threshold, false
|
return rlp.Threshold, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// WindowBegin returns the time that a RateLimitPolicy's window begins, given a
|
// WindowBegin returns the time that a RateLimitPolicy's window begins, given a
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue