SA: separate out RL update transaction in AddCertificate. (#4587)

The AddCertificate processing related to updating the fqdnSets and certificatesPerNames tables can be done in a separate transaction from the inserts to issuedNames and certificates.

This has the advantage of letting the overall AddCertificate request succeed when the primary transaction succeeds but the rate limit update transaction fails. We are OK with slightly incorrect rate limit results if it means more AddCertificate requests succeed and there are fewer orphaned final certificates.

To maintain visibility we audit log when the rate limit transaction fails and also increment a new failedAddCertRLTransactions prometheus counter.

Resolves #4566
This commit is contained in:
Daniel McCarney 2019-12-03 12:58:31 -05:00 committed by Roland Bracewell Shoemaker
parent e29830ac33
commit 8c9d4f87b2
2 changed files with 85 additions and 10 deletions

View File

@ -14,6 +14,7 @@ import (
"time"
"github.com/jmhodges/clock"
"github.com/prometheus/client_golang/prometheus"
"gopkg.in/go-gorp/gorp.v2"
jose "gopkg.in/square/go-jose.v2"
@ -45,6 +46,13 @@ type SQLStorageAuthority struct {
// We use function types here so we can mock out this internal function in
// unittests.
countCertificatesByName certCountFunc
// rateLimitWriteErrors is a Counter for the number of times
// a ratelimit update transaction failed during AddCertificate request
// processing. We do not fail the overall AddCertificate call when ratelimit
// transactions fail and so use this stat to maintain visibility into the rate
// this occurs.
rateLimitWriteErrors prometheus.Counter
}
func digest256(data []byte) []byte {
@ -97,11 +105,18 @@ func NewSQLStorageAuthority(
) (*SQLStorageAuthority, error) {
SetSQLDebug(dbMap, logger)
rateLimitWriteErrors := prometheus.NewCounter(prometheus.CounterOpts{
Name: "rate_limit_write_errors",
Help: "number of failed ratelimit update transactions during AddCertificate",
})
scope.MustRegister(rateLimitWriteErrors)
ssa := &SQLStorageAuthority{
dbMap: dbMap,
clk: clk,
log: logger,
parallelismPerRPC: parallelismPerRPC,
dbMap: dbMap,
clk: clk,
log: logger,
parallelismPerRPC: parallelismPerRPC,
rateLimitWriteErrors: rateLimitWriteErrors,
}
ssa.countCertificatesByName = ssa.countCertificates
@ -462,7 +477,7 @@ func (ssa *SQLStorageAuthority) AddCertificate(
Expires: parsedCertificate.NotAfter,
}
_, overallError := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) {
isRenewalRaw, overallError := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) {
// Save the final certificate
err = txWithCtx.Insert(cert)
if err != nil {
@ -501,6 +516,28 @@ func (ssa *SQLStorageAuthority) AddCertificate(
}
}
return isRenewal, err
})
if overallError != nil {
return "", overallError
}
// Recast the interface{} return from db.WithTransaction as a bool, returning
// an error if we can't.
var isRenewal bool
if boolVal, ok := isRenewalRaw.(bool); !ok {
return "", fmt.Errorf(
"AddCertificate db.WithTransaction returned %T out var, expected bool",
isRenewalRaw)
} else {
isRenewal = boolVal
}
// In a separate transaction perform the work required to update tables used
// for rate limits. Since the effects of failing these writes is slight
// miscalculation of rate limits we choose to not fail the AddCertificate
// operation if the rate limit update transaction fails.
_, rlTransactionErr := db.WithTransaction(ctx, ssa.dbMap, func(txWithCtx db.Transaction) (interface{}, error) {
// Add to the rate limit table, but only for new certificates. Renewals
// don't count against the certificatesPerName limit.
if !isRenewal {
@ -512,18 +549,25 @@ func (ssa *SQLStorageAuthority) AddCertificate(
// Update the FQDN sets now that there is a final certificate to ensure rate
// limits are calculated correctly.
err = addFQDNSet(
if err := addFQDNSet(
txWithCtx,
parsedCertificate.DNSNames,
core.SerialToString(parsedCertificate.SerialNumber),
parsedCertificate.NotBefore,
parsedCertificate.NotAfter,
)
return nil, err
); err != nil {
return nil, err
}
return nil, nil
})
if overallError != nil {
return "", overallError
// If the ratelimit transaction failed increment a stat and log a warning
// but don't return an error from AddCertificate.
if rlTransactionErr != nil {
ssa.rateLimitWriteErrors.Inc()
ssa.log.AuditErrf("failed AddCertificate ratelimit update transaction: %v", rlTransactionErr)
}
return digest, nil
}

View File

@ -1696,6 +1696,37 @@ func TestAddCertificateRenewalBit(t *testing.T) {
}
}
func TestAddCertificateBadRatelimitUpdate(t *testing.T) {
sa, _, cleanUp := initSA(t)
defer cleanUp()
reg := satest.CreateWorkingRegistration(t, sa)
issued := sa.clk.Now()
serial, cert := test.ThrowAwayCert(t, 1)
// Manually add an fqdn set for the certificate serial. This will cause the
// real fqdn set update in AddCertificate to fail due to the duplicate serial.
err := addFQDNSet(
sa.dbMap,
cert.DNSNames,
serial,
cert.NotBefore,
cert.NotAfter)
test.AssertNotError(t, err, "Couldn't manually add fqdnSet")
// Add the test certificate, it shouldn't error even though part of the rate
// limit updates failed.
_, err = sa.AddCertificate(ctx, cert.Raw, reg.ID, nil, &issued)
test.AssertNotError(t, err, "Couldn't add testCert")
// The rate limit transaction failure stat should have been incremented
test.AssertEquals(t, test.CountCounter(sa.rateLimitWriteErrors), 1)
// The rate limit transaction failure should have been audit logged
logLines := log.GetAllMatching(`\[AUDIT\] failed AddCertificate ratelimit update transaction: Error 1062: Duplicate entry '.*' for key 'serial'`)
test.AssertEquals(t, len(logLines), 1)
}
func TestCountCertificatesRenewalBit(t *testing.T) {
sa, fc, cleanUp := initSA(t)
defer cleanUp()