From 8c9d4f87b2b61a89c18289a2b61ef841915015a7 Mon Sep 17 00:00:00 2001 From: Daniel McCarney Date: Tue, 3 Dec 2019 12:58:31 -0500 Subject: [PATCH] SA: separate out RL update transaction in AddCertificate. (#4587) The AddCertificate processing related to updating the fqdnSets and certificatesPerNames tables can be done in a separate transaction from the inserts to issuedNames and certificates. This has the advantage of letting the overall AddCertificate request succeed when the primary transaction succeeds but the rate limit update transaction fails. We are OK with slightly incorrect rate limit results if it means more AddCertificate requests succeed and there are fewer orphaned final certificates. To maintain visibility we audit log when the rate limit transaction fails and also increment a new failedAddCertRLTransactions prometheus counter. Resolves #4566 --- sa/sa.go | 64 +++++++++++++++++++++++++++++++++++++++++++-------- sa/sa_test.go | 31 +++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/sa/sa.go b/sa/sa.go index 2aada09c5..ee484fae1 100644 --- a/sa/sa.go +++ b/sa/sa.go @@ -14,6 +14,7 @@ import ( "time" "github.com/jmhodges/clock" + "github.com/prometheus/client_golang/prometheus" "gopkg.in/go-gorp/gorp.v2" jose "gopkg.in/square/go-jose.v2" @@ -45,6 +46,13 @@ type SQLStorageAuthority struct { // We use function types here so we can mock out this internal function in // unittests. countCertificatesByName certCountFunc + + // rateLimitWriteErrors is a Counter for the number of times + // a ratelimit update transaction failed during AddCertificate request + // processing. We do not fail the overall AddCertificate call when ratelimit + // transactions fail and so use this stat to maintain visibility into the rate + // this occurs. + rateLimitWriteErrors prometheus.Counter } func digest256(data []byte) []byte { @@ -97,11 +105,18 @@ func NewSQLStorageAuthority( ) (*SQLStorageAuthority, error) { SetSQLDebug(dbMap, logger) + rateLimitWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{ + Name: "rate_limit_write_errors", + Help: "number of failed ratelimit update transactions during AddCertificate", + }) + scope.MustRegister(rateLimitWriteErrors) + ssa := &SQLStorageAuthority{ - dbMap: dbMap, - clk: clk, - log: logger, - parallelismPerRPC: parallelismPerRPC, + dbMap: dbMap, + clk: clk, + log: logger, + parallelismPerRPC: parallelismPerRPC, + rateLimitWriteErrors: rateLimitWriteErrors, } ssa.countCertificatesByName = ssa.countCertificates @@ -462,7 +477,7 @@ func (ssa *SQLStorageAuthority) AddCertificate( Expires: parsedCertificate.NotAfter, } - _, overallError := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) { + isRenewalRaw, overallError := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) { // Save the final certificate err = txWithCtx.Insert(cert) if err != nil { @@ -501,6 +516,28 @@ func (ssa *SQLStorageAuthority) AddCertificate( } } + return isRenewal, err + }) + if overallError != nil { + return "", overallError + } + + // Recast the interface{} return from db.WithTransaction as a bool, returning + // an error if we can't. + var isRenewal bool + if boolVal, ok := isRenewalRaw.(bool); !ok { + return "", fmt.Errorf( + "AddCertificate db.WithTransaction returned %T out var, expected bool", + isRenewalRaw) + } else { + isRenewal = boolVal + } + + // In a separate transaction perform the work required to update tables used + // for rate limits. Since the effects of failing these writes is slight + // miscalculation of rate limits we choose to not fail the AddCertificate + // operation if the rate limit update transaction fails. + _, rlTransactionErr := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) { // Add to the rate limit table, but only for new certificates. Renewals // don't count against the certificatesPerName limit. if !isRenewal { @@ -512,18 +549,25 @@ func (ssa *SQLStorageAuthority) AddCertificate( // Update the FQDN sets now that there is a final certificate to ensure rate // limits are calculated correctly. - err = addFQDNSet( + if err := addFQDNSet( txWithCtx, parsedCertificate.DNSNames, core.SerialToString(parsedCertificate.SerialNumber), parsedCertificate.NotBefore, parsedCertificate.NotAfter, - ) - return nil, err + ); err != nil { + return nil, err + } + + return nil, nil }) - if overallError != nil { - return "", overallError + // If the ratelimit transaction failed increment a stat and log a warning + // but don't return an error from AddCertificate. + if rlTransactionErr != nil { + ssa.rateLimitWriteErrors.Inc() + ssa.log.AuditErrf("failed AddCertificate ratelimit update transaction: %v", rlTransactionErr) } + return digest, nil } diff --git a/sa/sa_test.go b/sa/sa_test.go index 1a2173ff5..7c5fd0a5f 100644 --- a/sa/sa_test.go +++ b/sa/sa_test.go @@ -1696,6 +1696,37 @@ func TestAddCertificateRenewalBit(t *testing.T) { } } +func TestAddCertificateBadRatelimitUpdate(t *testing.T) { + sa, _, cleanUp := initSA(t) + defer cleanUp() + + reg := satest.CreateWorkingRegistration(t, sa) + issued := sa.clk.Now() + serial, cert := test.ThrowAwayCert(t, 1) + + // Manually add an fqdn set for the certificate serial. This will cause the + // real fqdn set update in AddCertificate to fail due to the duplicate serial. + err := addFQDNSet( + sa.dbMap, + cert.DNSNames, + serial, + cert.NotBefore, + cert.NotAfter) + test.AssertNotError(t, err, "Couldn't manually add fqdnSet") + + // Add the test certificate, it shouldn't error even though part of the rate + // limit updates failed. + _, err = sa.AddCertificate(ctx, cert.Raw, reg.ID, nil, &issued) + test.AssertNotError(t, err, "Couldn't add testCert") + + // The rate limit transaction failure stat should have been incremented + test.AssertEquals(t, test.CountCounter(sa.rateLimitWriteErrors), 1) + + // The rate limit transaction failure should have been audit logged + logLines := log.GetAllMatching(`\[AUDIT\] failed AddCertificate ratelimit update transaction: Error 1062: Duplicate entry '.*' for key 'serial'`) + test.AssertEquals(t, len(logLines), 1) +} + func TestCountCertificatesRenewalBit(t *testing.T) { sa, fc, cleanUp := initSA(t) defer cleanUp()