Remove `TotalCertificates` rate limit. (#3638)
The `TotalCertificates` rate limit serves to ensure we don't accidentally exceed our OCSP signing capacity by issuing too many certificates within a fixed period. In practice this rate limit has been fragile and the associated queries have been linked to performance problems. Since we now have better means of monitoring our OCSP signing capacity this commit removes the rate limit and associated code.
This commit is contained in:
parent
f235b96339
commit
74d5decc67
|
@ -249,9 +249,6 @@ func main() {
|
|||
rai.CA = cac
|
||||
rai.SA = sac
|
||||
|
||||
err = rai.UpdateIssuedCountForever()
|
||||
cmd.FailOnError(err, "Updating total issuance count")
|
||||
|
||||
serverMetrics := bgrpc.NewServerMetrics(scope)
|
||||
grpcSrv, listener, err := bgrpc.NewServer(c.RA.GRPC, tlsConfig, serverMetrics)
|
||||
cmd.FailOnError(err, "Unable to setup RA gRPC server")
|
||||
|
|
86
ra/ra.go
86
ra/ra.go
|
@ -77,15 +77,11 @@ type RegistrationAuthorityImpl struct {
|
|||
authorizationLifetime time.Duration
|
||||
pendingAuthorizationLifetime time.Duration
|
||||
rlPolicies ratelimit.Limits
|
||||
// tiMu protects totalIssuedCount and totalIssuedLastUpdate
|
||||
tiMu *sync.RWMutex
|
||||
totalIssuedCount int
|
||||
totalIssuedLastUpdate time.Time
|
||||
maxContactsPerReg int
|
||||
maxNames int
|
||||
forceCNFromSAN bool
|
||||
reuseValidAuthz bool
|
||||
orderLifetime time.Duration
|
||||
maxContactsPerReg int
|
||||
maxNames int
|
||||
forceCNFromSAN bool
|
||||
reuseValidAuthz bool
|
||||
orderLifetime time.Duration
|
||||
|
||||
regByIPStats metrics.Scope
|
||||
regByIPRangeStats metrics.Scope
|
||||
|
@ -93,7 +89,6 @@ type RegistrationAuthorityImpl struct {
|
|||
pendOrdersByRegIDStats metrics.Scope
|
||||
newOrderByRegIDStats metrics.Scope
|
||||
certsForDomainStats metrics.Scope
|
||||
totalCertsStats metrics.Scope
|
||||
|
||||
ctpolicy *ctpolicy.CTPolicy
|
||||
ctpolicyResults *prometheus.HistogramVec
|
||||
|
@ -133,7 +128,6 @@ func NewRegistrationAuthorityImpl(
|
|||
authorizationLifetime: authorizationLifetime,
|
||||
pendingAuthorizationLifetime: pendingAuthorizationLifetime,
|
||||
rlPolicies: ratelimit.New(),
|
||||
tiMu: new(sync.RWMutex),
|
||||
maxContactsPerReg: maxContactsPerReg,
|
||||
keyPolicy: keyPolicy,
|
||||
maxNames: maxNames,
|
||||
|
@ -145,7 +139,6 @@ func NewRegistrationAuthorityImpl(
|
|||
pendOrdersByRegIDStats: stats.NewScope("RateLimit", "PendingOrdersByRegID"),
|
||||
newOrderByRegIDStats: stats.NewScope("RateLimit", "NewOrdersByRegID"),
|
||||
certsForDomainStats: stats.NewScope("RateLimit", "CertificatesForDomain"),
|
||||
totalCertsStats: stats.NewScope("RateLimit", "TotalCertificates"),
|
||||
publisher: pubc,
|
||||
caa: caaClient,
|
||||
orderLifetime: orderLifetime,
|
||||
|
@ -168,45 +161,6 @@ func (ra *RegistrationAuthorityImpl) rateLimitPoliciesLoadError(err error) {
|
|||
ra.log.Err(fmt.Sprintf("error reloading rate limit policy: %s", err))
|
||||
}
|
||||
|
||||
// Run this to continually update the totalIssuedCount field of this
|
||||
// RA by calling out to the SA. It will run one update before returning, and
|
||||
// return an error if that update failed.
|
||||
func (ra *RegistrationAuthorityImpl) UpdateIssuedCountForever() error {
|
||||
if err := ra.updateIssuedCount(); err != nil {
|
||||
return err
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
_ = ra.updateIssuedCount()
|
||||
time.Sleep(1 * time.Minute)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ra *RegistrationAuthorityImpl) updateIssuedCount() error {
|
||||
totalCertLimit := ra.rlPolicies.TotalCertificates()
|
||||
if totalCertLimit.Enabled() {
|
||||
now := ra.clk.Now()
|
||||
// We don't have a Context here, so use the background context. Note that a
|
||||
// timeout is still imposed by our RPC layer.
|
||||
count, err := ra.SA.CountCertificatesRange(
|
||||
context.Background(),
|
||||
now.Add(-totalCertLimit.Window.Duration),
|
||||
now,
|
||||
)
|
||||
if err != nil {
|
||||
ra.log.AuditErr(fmt.Sprintf("updating total issued count: %s", err))
|
||||
return err
|
||||
}
|
||||
ra.tiMu.Lock()
|
||||
ra.totalIssuedCount = int(count)
|
||||
ra.totalIssuedLastUpdate = ra.clk.Now()
|
||||
ra.tiMu.Unlock()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
unparseableEmailError = berrors.InvalidEmailError("not a valid e-mail address")
|
||||
emptyDNSResponseError = berrors.InvalidEmailError(
|
||||
|
@ -1352,37 +1306,7 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
|
|||
return nil
|
||||
}
|
||||
|
||||
func (ra *RegistrationAuthorityImpl) checkTotalCertificatesLimit() error {
|
||||
totalCertLimits := ra.rlPolicies.TotalCertificates()
|
||||
ra.tiMu.RLock()
|
||||
defer ra.tiMu.RUnlock()
|
||||
// If last update of the total issued count was more than five minutes ago,
|
||||
// or not yet updated, fail.
|
||||
if ra.clk.Now().After(ra.totalIssuedLastUpdate.Add(5*time.Minute)) ||
|
||||
ra.totalIssuedLastUpdate.IsZero() {
|
||||
return berrors.InternalServerError(
|
||||
"Total certificate count out of date: updated %s",
|
||||
ra.totalIssuedLastUpdate,
|
||||
)
|
||||
}
|
||||
if ra.totalIssuedCount >= totalCertLimits.Threshold {
|
||||
ra.totalCertsStats.Inc("Exceeded", 1)
|
||||
ra.log.Info(fmt.Sprintf("Rate limit exceeded, TotalCertificates, totalIssued: %d, lastUpdated %s", ra.totalIssuedCount, ra.totalIssuedLastUpdate))
|
||||
return berrors.RateLimitError("global certificate issuance limit reached. Try again in an hour")
|
||||
}
|
||||
ra.totalCertsStats.Inc("Pass", 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ra *RegistrationAuthorityImpl) checkLimits(ctx context.Context, names []string, regID int64) error {
|
||||
totalCertLimits := ra.rlPolicies.TotalCertificates()
|
||||
if totalCertLimits.Enabled() {
|
||||
err := ra.checkTotalCertificatesLimit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
certNameLimits := ra.rlPolicies.CertificatesPerName()
|
||||
if certNameLimits.Enabled() {
|
||||
err := ra.checkCertificatesPerNameLimit(ctx, names, certNameLimits, regID)
|
||||
|
|
|
@ -1080,10 +1080,6 @@ func TestNewCertificate(t *testing.T) {
|
|||
CSR: ExampleCSR,
|
||||
}
|
||||
|
||||
if err := ra.updateIssuedCount(); err != nil {
|
||||
t.Fatal("Updating issuance count:", err)
|
||||
}
|
||||
|
||||
cert, err := ra.NewCertificate(ctx, certRequest, Registration.ID)
|
||||
test.AssertNotError(t, err, "Failed to issue certificate")
|
||||
|
||||
|
@ -1095,64 +1091,6 @@ func TestNewCertificate(t *testing.T) {
|
|||
test.AssertNotError(t, err, "Failed to parse certificate")
|
||||
}
|
||||
|
||||
func TestTotalCertRateLimit(t *testing.T) {
|
||||
_, sa, ra, fc, cleanUp := initAuthorities(t)
|
||||
defer cleanUp()
|
||||
|
||||
ra.rlPolicies = &dummyRateLimitConfig{
|
||||
TotalCertificatesPolicy: ratelimit.RateLimitPolicy{
|
||||
Threshold: 1,
|
||||
Window: cmd.ConfigDuration{Duration: 24 * 90 * time.Hour},
|
||||
},
|
||||
}
|
||||
fc.Add(24 * 90 * time.Hour)
|
||||
|
||||
AuthzFinal.RegistrationID = Registration.ID
|
||||
AuthzFinal, err := sa.NewPendingAuthorization(ctx, AuthzFinal)
|
||||
test.AssertNotError(t, err, "Could not store test data")
|
||||
err = sa.FinalizeAuthorization(ctx, AuthzFinal)
|
||||
|
||||
// Inject another final authorization to cover www.not-example.com
|
||||
authzFinalWWW := AuthzFinal
|
||||
authzFinalWWW.Identifier.Value = "www.not-example.com"
|
||||
authzFinalWWW, err = sa.NewPendingAuthorization(ctx, authzFinalWWW)
|
||||
test.AssertNotError(t, err, "Could not store test data")
|
||||
err = sa.FinalizeAuthorization(ctx, authzFinalWWW)
|
||||
test.AssertNotError(t, err, "Could not store test data")
|
||||
|
||||
ExampleCSR.Subject.CommonName = "www.NOT-example.com"
|
||||
certRequest := core.CertificateRequest{
|
||||
CSR: ExampleCSR,
|
||||
}
|
||||
|
||||
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
|
||||
test.AssertError(t, err, "Expected to fail issuance when updateIssuedCount not yet called")
|
||||
|
||||
if err := ra.updateIssuedCount(); err != nil {
|
||||
t.Fatal("Updating issuance count:", err)
|
||||
}
|
||||
|
||||
// TODO(jsha): Since we're using a real SA rather than a mock, we call
|
||||
// NewCertificate twice and insert the first result into the SA. Instead we
|
||||
// should mock out the SA and have it return the cert count that we want.
|
||||
cert, err := ra.NewCertificate(ctx, certRequest, Registration.ID)
|
||||
test.AssertNotError(t, err, "Failed to issue certificate")
|
||||
_, err = sa.AddCertificate(ctx, cert.DER, Registration.ID, nil)
|
||||
test.AssertNotError(t, err, "Failed to store certificate")
|
||||
|
||||
fc.Add(time.Hour)
|
||||
if err := ra.updateIssuedCount(); err != nil {
|
||||
t.Fatal("Updating issuance count:", err)
|
||||
}
|
||||
|
||||
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
|
||||
test.AssertError(t, err, "Total certificate rate limit failed")
|
||||
|
||||
fc.Add(time.Hour)
|
||||
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
|
||||
test.AssertError(t, err, "Expected to fail issuance when updateIssuedCount too long out of date")
|
||||
}
|
||||
|
||||
func TestAuthzRateLimiting(t *testing.T) {
|
||||
_, _, ra, fc, cleanUp := initAuthorities(t)
|
||||
defer cleanUp()
|
||||
|
@ -1334,7 +1272,6 @@ func TestRateLimitLiveReload(t *testing.T) {
|
|||
test.AssertNotError(t, err, "failed to SetRateLimitPoliciesFile")
|
||||
|
||||
// Test some fields of the initial policy to ensure it loaded correctly
|
||||
test.AssertEquals(t, ra.rlPolicies.TotalCertificates().Threshold, 100000)
|
||||
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le.wtf"], 10000)
|
||||
test.AssertEquals(t, ra.rlPolicies.RegistrationsPerIP().Overrides["127.0.0.1"], 1000000)
|
||||
test.AssertEquals(t, ra.rlPolicies.PendingAuthorizationsPerAccount().Threshold, 150)
|
||||
|
@ -1355,7 +1292,6 @@ func TestRateLimitLiveReload(t *testing.T) {
|
|||
|
||||
// Test fields of the policy to make sure writing the new policy to the monitored file
|
||||
// resulted in the runtime values being updated
|
||||
test.AssertEquals(t, ra.rlPolicies.TotalCertificates().Threshold, 99999)
|
||||
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le.wtf"], 9999)
|
||||
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le4.wtf"], 9999)
|
||||
test.AssertEquals(t, ra.rlPolicies.RegistrationsPerIP().Overrides["127.0.0.1"], 999990)
|
||||
|
|
|
@ -12,7 +12,6 @@ import (
|
|||
// Limits is defined to allow mock implementations be provided during unit
|
||||
// testing
|
||||
type Limits interface {
|
||||
TotalCertificates() RateLimitPolicy
|
||||
CertificatesPerName() RateLimitPolicy
|
||||
RegistrationsPerIP() RateLimitPolicy
|
||||
RegistrationsPerIPRange() RateLimitPolicy
|
||||
|
@ -33,15 +32,6 @@ type limitsImpl struct {
|
|||
rlPolicy *rateLimitConfig
|
||||
}
|
||||
|
||||
func (r *limitsImpl) TotalCertificates() RateLimitPolicy {
|
||||
r.RLock()
|
||||
defer r.RUnlock()
|
||||
if r.rlPolicy == nil {
|
||||
return RateLimitPolicy{}
|
||||
}
|
||||
return r.rlPolicy.TotalCertificates
|
||||
}
|
||||
|
||||
func (r *limitsImpl) CertificatesPerName() RateLimitPolicy {
|
||||
r.RLock()
|
||||
defer r.RUnlock()
|
||||
|
@ -136,11 +126,6 @@ func New() Limits {
|
|||
// rateLimitConfig contains all application layer rate limiting policies. It is
|
||||
// unexported and clients are expected to use the exported container struct
|
||||
type rateLimitConfig struct {
|
||||
// Total number of certificates that can be extant at any given time.
|
||||
// The 2160h window, 90 days, is chosen to match certificate lifetime, since the
|
||||
// main capacity factor is how many OCSP requests we can sign with available
|
||||
// hardware.
|
||||
TotalCertificates RateLimitPolicy `yaml:"totalCertificates"`
|
||||
// Number of certificates that can be extant containing any given name.
|
||||
// These are counted by "base domain" aka eTLD+1, so any entries in the
|
||||
// overrides section must be an eTLD+1 according to the publicsuffix package.
|
||||
|
|
|
@ -73,12 +73,6 @@ func TestLoadPolicies(t *testing.T) {
|
|||
err := policy.LoadPolicies(policyContent)
|
||||
test.AssertNotError(t, err, "Failed to parse rate-limit-policies.yml")
|
||||
|
||||
// Test that the TotalCertificates section parsed correctly
|
||||
totalCerts := policy.TotalCertificates()
|
||||
test.AssertEquals(t, totalCerts.Threshold, 100000)
|
||||
test.AssertEquals(t, len(totalCerts.Overrides), 0)
|
||||
test.AssertEquals(t, len(totalCerts.RegistrationOverrides), 0)
|
||||
|
||||
// Test that the CertificatesPerName section parsed correctly
|
||||
certsPerName := policy.CertificatesPerName()
|
||||
test.AssertEquals(t, certsPerName.Threshold, 2)
|
||||
|
@ -142,7 +136,6 @@ func TestLoadPolicies(t *testing.T) {
|
|||
// `LoadPolicy` call, and instead return empty RateLimitPolicy objects with default
|
||||
// values.
|
||||
emptyPolicy := New()
|
||||
test.AssertEquals(t, emptyPolicy.TotalCertificates().Threshold, 0)
|
||||
test.AssertEquals(t, emptyPolicy.CertificatesPerName().Threshold, 0)
|
||||
test.AssertEquals(t, emptyPolicy.RegistrationsPerIP().Threshold, 0)
|
||||
test.AssertEquals(t, emptyPolicy.RegistrationsPerIP().Threshold, 0)
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
# See cmd/shell.go for definitions of these rate limits.
|
||||
totalCertificates:
|
||||
window: 9999h
|
||||
threshold: 99999
|
||||
certificatesPerName:
|
||||
window: 2160h
|
||||
threshold: 99
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
# See cmd/shell.go for definitions of these rate limits.
|
||||
totalCertificates:
|
||||
window: 2160h
|
||||
threshold: 100000
|
||||
certificatesPerName:
|
||||
window: 2160h
|
||||
threshold: 2
|
||||
|
|
Loading…
Reference in New Issue