Remove `TotalCertificates` rate limit. (#3638)

The `TotalCertificates` rate limit serves to ensure we don't
accidentally exceed our OCSP signing capacity by issuing too many
certificates within a fixed period. In practice this rate limit has been
fragile and the associated queries have been linked to performance
problems.

Since we now have better means of monitoring our OCSP signing capacity
this commit removes the rate limit and associated code.
This commit is contained in:
Daniel McCarney 2018-04-12 16:25:47 -04:00 committed by Jacob Hoffman-Andrews
parent f235b96339
commit 74d5decc67
7 changed files with 5 additions and 176 deletions

View File

@ -249,9 +249,6 @@ func main() {
rai.CA = cac
rai.SA = sac
err = rai.UpdateIssuedCountForever()
cmd.FailOnError(err, "Updating total issuance count")
serverMetrics := bgrpc.NewServerMetrics(scope)
grpcSrv, listener, err := bgrpc.NewServer(c.RA.GRPC, tlsConfig, serverMetrics)
cmd.FailOnError(err, "Unable to setup RA gRPC server")

View File

@ -77,15 +77,11 @@ type RegistrationAuthorityImpl struct {
authorizationLifetime time.Duration
pendingAuthorizationLifetime time.Duration
rlPolicies ratelimit.Limits
// tiMu protects totalIssuedCount and totalIssuedLastUpdate
tiMu *sync.RWMutex
totalIssuedCount int
totalIssuedLastUpdate time.Time
maxContactsPerReg int
maxNames int
forceCNFromSAN bool
reuseValidAuthz bool
orderLifetime time.Duration
maxContactsPerReg int
maxNames int
forceCNFromSAN bool
reuseValidAuthz bool
orderLifetime time.Duration
regByIPStats metrics.Scope
regByIPRangeStats metrics.Scope
@ -93,7 +89,6 @@ type RegistrationAuthorityImpl struct {
pendOrdersByRegIDStats metrics.Scope
newOrderByRegIDStats metrics.Scope
certsForDomainStats metrics.Scope
totalCertsStats metrics.Scope
ctpolicy *ctpolicy.CTPolicy
ctpolicyResults *prometheus.HistogramVec
@ -133,7 +128,6 @@ func NewRegistrationAuthorityImpl(
authorizationLifetime: authorizationLifetime,
pendingAuthorizationLifetime: pendingAuthorizationLifetime,
rlPolicies: ratelimit.New(),
tiMu: new(sync.RWMutex),
maxContactsPerReg: maxContactsPerReg,
keyPolicy: keyPolicy,
maxNames: maxNames,
@ -145,7 +139,6 @@ func NewRegistrationAuthorityImpl(
pendOrdersByRegIDStats: stats.NewScope("RateLimit", "PendingOrdersByRegID"),
newOrderByRegIDStats: stats.NewScope("RateLimit", "NewOrdersByRegID"),
certsForDomainStats: stats.NewScope("RateLimit", "CertificatesForDomain"),
totalCertsStats: stats.NewScope("RateLimit", "TotalCertificates"),
publisher: pubc,
caa: caaClient,
orderLifetime: orderLifetime,
@ -168,45 +161,6 @@ func (ra *RegistrationAuthorityImpl) rateLimitPoliciesLoadError(err error) {
ra.log.Err(fmt.Sprintf("error reloading rate limit policy: %s", err))
}
// Run this to continually update the totalIssuedCount field of this
// RA by calling out to the SA. It will run one update before returning, and
// return an error if that update failed.
func (ra *RegistrationAuthorityImpl) UpdateIssuedCountForever() error {
if err := ra.updateIssuedCount(); err != nil {
return err
}
go func() {
for {
_ = ra.updateIssuedCount()
time.Sleep(1 * time.Minute)
}
}()
return nil
}
func (ra *RegistrationAuthorityImpl) updateIssuedCount() error {
totalCertLimit := ra.rlPolicies.TotalCertificates()
if totalCertLimit.Enabled() {
now := ra.clk.Now()
// We don't have a Context here, so use the background context. Note that a
// timeout is still imposed by our RPC layer.
count, err := ra.SA.CountCertificatesRange(
context.Background(),
now.Add(-totalCertLimit.Window.Duration),
now,
)
if err != nil {
ra.log.AuditErr(fmt.Sprintf("updating total issued count: %s", err))
return err
}
ra.tiMu.Lock()
ra.totalIssuedCount = int(count)
ra.totalIssuedLastUpdate = ra.clk.Now()
ra.tiMu.Unlock()
}
return nil
}
var (
unparseableEmailError = berrors.InvalidEmailError("not a valid e-mail address")
emptyDNSResponseError = berrors.InvalidEmailError(
@ -1352,37 +1306,7 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
return nil
}
func (ra *RegistrationAuthorityImpl) checkTotalCertificatesLimit() error {
totalCertLimits := ra.rlPolicies.TotalCertificates()
ra.tiMu.RLock()
defer ra.tiMu.RUnlock()
// If last update of the total issued count was more than five minutes ago,
// or not yet updated, fail.
if ra.clk.Now().After(ra.totalIssuedLastUpdate.Add(5*time.Minute)) ||
ra.totalIssuedLastUpdate.IsZero() {
return berrors.InternalServerError(
"Total certificate count out of date: updated %s",
ra.totalIssuedLastUpdate,
)
}
if ra.totalIssuedCount >= totalCertLimits.Threshold {
ra.totalCertsStats.Inc("Exceeded", 1)
ra.log.Info(fmt.Sprintf("Rate limit exceeded, TotalCertificates, totalIssued: %d, lastUpdated %s", ra.totalIssuedCount, ra.totalIssuedLastUpdate))
return berrors.RateLimitError("global certificate issuance limit reached. Try again in an hour")
}
ra.totalCertsStats.Inc("Pass", 1)
return nil
}
func (ra *RegistrationAuthorityImpl) checkLimits(ctx context.Context, names []string, regID int64) error {
totalCertLimits := ra.rlPolicies.TotalCertificates()
if totalCertLimits.Enabled() {
err := ra.checkTotalCertificatesLimit()
if err != nil {
return err
}
}
certNameLimits := ra.rlPolicies.CertificatesPerName()
if certNameLimits.Enabled() {
err := ra.checkCertificatesPerNameLimit(ctx, names, certNameLimits, regID)

View File

@ -1080,10 +1080,6 @@ func TestNewCertificate(t *testing.T) {
CSR: ExampleCSR,
}
if err := ra.updateIssuedCount(); err != nil {
t.Fatal("Updating issuance count:", err)
}
cert, err := ra.NewCertificate(ctx, certRequest, Registration.ID)
test.AssertNotError(t, err, "Failed to issue certificate")
@ -1095,64 +1091,6 @@ func TestNewCertificate(t *testing.T) {
test.AssertNotError(t, err, "Failed to parse certificate")
}
func TestTotalCertRateLimit(t *testing.T) {
_, sa, ra, fc, cleanUp := initAuthorities(t)
defer cleanUp()
ra.rlPolicies = &dummyRateLimitConfig{
TotalCertificatesPolicy: ratelimit.RateLimitPolicy{
Threshold: 1,
Window: cmd.ConfigDuration{Duration: 24 * 90 * time.Hour},
},
}
fc.Add(24 * 90 * time.Hour)
AuthzFinal.RegistrationID = Registration.ID
AuthzFinal, err := sa.NewPendingAuthorization(ctx, AuthzFinal)
test.AssertNotError(t, err, "Could not store test data")
err = sa.FinalizeAuthorization(ctx, AuthzFinal)
// Inject another final authorization to cover www.not-example.com
authzFinalWWW := AuthzFinal
authzFinalWWW.Identifier.Value = "www.not-example.com"
authzFinalWWW, err = sa.NewPendingAuthorization(ctx, authzFinalWWW)
test.AssertNotError(t, err, "Could not store test data")
err = sa.FinalizeAuthorization(ctx, authzFinalWWW)
test.AssertNotError(t, err, "Could not store test data")
ExampleCSR.Subject.CommonName = "www.NOT-example.com"
certRequest := core.CertificateRequest{
CSR: ExampleCSR,
}
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
test.AssertError(t, err, "Expected to fail issuance when updateIssuedCount not yet called")
if err := ra.updateIssuedCount(); err != nil {
t.Fatal("Updating issuance count:", err)
}
// TODO(jsha): Since we're using a real SA rather than a mock, we call
// NewCertificate twice and insert the first result into the SA. Instead we
// should mock out the SA and have it return the cert count that we want.
cert, err := ra.NewCertificate(ctx, certRequest, Registration.ID)
test.AssertNotError(t, err, "Failed to issue certificate")
_, err = sa.AddCertificate(ctx, cert.DER, Registration.ID, nil)
test.AssertNotError(t, err, "Failed to store certificate")
fc.Add(time.Hour)
if err := ra.updateIssuedCount(); err != nil {
t.Fatal("Updating issuance count:", err)
}
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
test.AssertError(t, err, "Total certificate rate limit failed")
fc.Add(time.Hour)
_, err = ra.NewCertificate(ctx, certRequest, Registration.ID)
test.AssertError(t, err, "Expected to fail issuance when updateIssuedCount too long out of date")
}
func TestAuthzRateLimiting(t *testing.T) {
_, _, ra, fc, cleanUp := initAuthorities(t)
defer cleanUp()
@ -1334,7 +1272,6 @@ func TestRateLimitLiveReload(t *testing.T) {
test.AssertNotError(t, err, "failed to SetRateLimitPoliciesFile")
// Test some fields of the initial policy to ensure it loaded correctly
test.AssertEquals(t, ra.rlPolicies.TotalCertificates().Threshold, 100000)
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le.wtf"], 10000)
test.AssertEquals(t, ra.rlPolicies.RegistrationsPerIP().Overrides["127.0.0.1"], 1000000)
test.AssertEquals(t, ra.rlPolicies.PendingAuthorizationsPerAccount().Threshold, 150)
@ -1355,7 +1292,6 @@ func TestRateLimitLiveReload(t *testing.T) {
// Test fields of the policy to make sure writing the new policy to the monitored file
// resulted in the runtime values being updated
test.AssertEquals(t, ra.rlPolicies.TotalCertificates().Threshold, 99999)
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le.wtf"], 9999)
test.AssertEquals(t, ra.rlPolicies.CertificatesPerName().Overrides["le4.wtf"], 9999)
test.AssertEquals(t, ra.rlPolicies.RegistrationsPerIP().Overrides["127.0.0.1"], 999990)

View File

@ -12,7 +12,6 @@ import (
// Limits is defined to allow mock implementations be provided during unit
// testing
type Limits interface {
TotalCertificates() RateLimitPolicy
CertificatesPerName() RateLimitPolicy
RegistrationsPerIP() RateLimitPolicy
RegistrationsPerIPRange() RateLimitPolicy
@ -33,15 +32,6 @@ type limitsImpl struct {
rlPolicy *rateLimitConfig
}
func (r *limitsImpl) TotalCertificates() RateLimitPolicy {
r.RLock()
defer r.RUnlock()
if r.rlPolicy == nil {
return RateLimitPolicy{}
}
return r.rlPolicy.TotalCertificates
}
func (r *limitsImpl) CertificatesPerName() RateLimitPolicy {
r.RLock()
defer r.RUnlock()
@ -136,11 +126,6 @@ func New() Limits {
// rateLimitConfig contains all application layer rate limiting policies. It is
// unexported and clients are expected to use the exported container struct
type rateLimitConfig struct {
// Total number of certificates that can be extant at any given time.
// The 2160h window, 90 days, is chosen to match certificate lifetime, since the
// main capacity factor is how many OCSP requests we can sign with available
// hardware.
TotalCertificates RateLimitPolicy `yaml:"totalCertificates"`
// Number of certificates that can be extant containing any given name.
// These are counted by "base domain" aka eTLD+1, so any entries in the
// overrides section must be an eTLD+1 according to the publicsuffix package.

View File

@ -73,12 +73,6 @@ func TestLoadPolicies(t *testing.T) {
err := policy.LoadPolicies(policyContent)
test.AssertNotError(t, err, "Failed to parse rate-limit-policies.yml")
// Test that the TotalCertificates section parsed correctly
totalCerts := policy.TotalCertificates()
test.AssertEquals(t, totalCerts.Threshold, 100000)
test.AssertEquals(t, len(totalCerts.Overrides), 0)
test.AssertEquals(t, len(totalCerts.RegistrationOverrides), 0)
// Test that the CertificatesPerName section parsed correctly
certsPerName := policy.CertificatesPerName()
test.AssertEquals(t, certsPerName.Threshold, 2)
@ -142,7 +136,6 @@ func TestLoadPolicies(t *testing.T) {
// `LoadPolicy` call, and instead return empty RateLimitPolicy objects with default
// values.
emptyPolicy := New()
test.AssertEquals(t, emptyPolicy.TotalCertificates().Threshold, 0)
test.AssertEquals(t, emptyPolicy.CertificatesPerName().Threshold, 0)
test.AssertEquals(t, emptyPolicy.RegistrationsPerIP().Threshold, 0)
test.AssertEquals(t, emptyPolicy.RegistrationsPerIP().Threshold, 0)

View File

@ -1,7 +1,4 @@
# See cmd/shell.go for definitions of these rate limits.
totalCertificates:
window: 9999h
threshold: 99999
certificatesPerName:
window: 2160h
threshold: 99

View File

@ -1,7 +1,4 @@
# See cmd/shell.go for definitions of these rate limits.
totalCertificates:
window: 2160h
threshold: 100000
certificatesPerName:
window: 2160h
threshold: 2