From c902b3068f07ad7ac987219f58130868804d3ad4 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Wed, 20 Jul 2022 17:12:51 -0700 Subject: [PATCH] cert-checker: retry failed selects (#6238) Fixes #6229. --- cmd/cert-checker/main.go | 41 ++++++++++++++++++++++++++++------- cmd/cert-checker/main_test.go | 16 +++++++------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/cmd/cert-checker/main.go b/cmd/cert-checker/main.go index b1fccdbd6..3fe4880a4 100644 --- a/cmd/cert-checker/main.go +++ b/cmd/cert-checker/main.go @@ -97,9 +97,17 @@ type certChecker struct { issuedReport report checkPeriod time.Duration acceptableValidityDurations map[time.Duration]bool + logger blog.Logger } -func newChecker(saDbMap certDB, clk clock.Clock, pa core.PolicyAuthority, kp goodkey.KeyPolicy, period time.Duration, avd map[time.Duration]bool) certChecker { +func newChecker(saDbMap certDB, + clk clock.Clock, + pa core.PolicyAuthority, + kp goodkey.KeyPolicy, + period time.Duration, + avd map[time.Duration]bool, + logger blog.Logger, +) certChecker { return certChecker{ pa: pa, kp: kp, @@ -110,6 +118,7 @@ func newChecker(saDbMap certDB, clk clock.Clock, pa core.PolicyAuthority, kp goo issuedReport: report{Entries: make(map[string]reportEntry)}, checkPeriod: period, acceptableValidityDurations: avd, + logger: logger, } } @@ -122,12 +131,22 @@ func (c *certChecker) getCerts(unexpiredOnly bool) error { args["now"] = c.clock.Now() } - sni, err := c.dbMap.SelectNullInt( - "SELECT MIN(id) FROM certificates WHERE issued >= :issued AND expires >= :now", - args, - ) - if err != nil { - return err + var sni sql.NullInt64 + var err error + var retries int + for { + sni, err = c.dbMap.SelectNullInt( + "SELECT MIN(id) FROM certificates WHERE issued >= :issued AND expires >= :now", + args, + ) + if err != nil { + c.logger.AuditErrf("finding starting certificate: %s", err) + retries++ + time.Sleep(core.RetryBackoff(retries, time.Second, time.Minute, 2)) + continue + } + retries = 0 + break } if !sni.Valid { // a nil response was returned by the DB, so return error and fail @@ -145,6 +164,7 @@ func (c *certChecker) getCerts(unexpiredOnly bool) error { // packet limit. args["limit"] = batchSize args["id"] = initialID + for { certs, err := sa.SelectCertificates( c.dbMap, @@ -152,8 +172,12 @@ func (c *certChecker) getCerts(unexpiredOnly bool) error { args, ) if err != nil { - return err + c.logger.AuditErrf("selecting certificates: %s", err) + retries++ + time.Sleep(core.RetryBackoff(retries, time.Second, time.Minute, 2)) + continue } + retries = 0 for _, cert := range certs { c.certs <- cert.Certificate } @@ -445,6 +469,7 @@ func main() { kp, config.CertChecker.CheckPeriod.Duration, acceptableValidityDurations, + logger, ) fmt.Fprintf(os.Stderr, "# Getting certificates issued in the last %s\n", config.CertChecker.CheckPeriod) diff --git a/cmd/cert-checker/main_test.go b/cmd/cert-checker/main_test.go index 398b11ffd..219abdc6f 100644 --- a/cmd/cert-checker/main_test.go +++ b/cmd/cert-checker/main_test.go @@ -63,7 +63,7 @@ func init() { } func BenchmarkCheckCert(b *testing.B) { - checker := newChecker(nil, clock.New(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(nil, clock.New(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) testKey, _ := rsa.GenerateKey(rand.Reader, 1024) expiry := time.Now().AddDate(0, 0, 1) serial := big.NewInt(1337) @@ -99,7 +99,7 @@ func TestCheckWildcardCert(t *testing.T) { testKey, _ := rsa.GenerateKey(rand.Reader, 2048) fc := clock.NewFake() - checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations, blog.NewMock()) issued := checker.clock.Now().Add(-time.Minute) goodExpiry := issued.Add(testValidityDuration - time.Second) serial := big.NewInt(1337) @@ -142,7 +142,7 @@ func TestCheckCertReturnsDNSNames(t *testing.T) { defer func() { saCleanup() }() - checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) certPEM, err := ioutil.ReadFile("testdata/quite_invalid.pem") if err != nil { @@ -209,7 +209,7 @@ func TestCheckCert(t *testing.T) { t.Run(tc.name, func(t *testing.T) { testKey, _ := tc.key.genKey() - checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) // Create a RFC 7633 OCSP Must Staple Extension. // OID 1.3.6.1.5.5.7.1.24 @@ -330,7 +330,7 @@ func TestGetAndProcessCerts(t *testing.T) { fc := clock.NewFake() fc.Set(fc.Now().Add(time.Hour)) - checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, fc, pa, kp, time.Hour, testValidityDurations, blog.NewMock()) sa, err := sa.NewSQLStorageAuthority(saDbMap, saDbMap, nil, fc, blog.NewMock(), metrics.NoopRegisterer, 1) test.AssertNotError(t, err, "Couldn't create SA to insert certificates") saCleanUp := test.ResetSATestDatabase(t) @@ -420,7 +420,7 @@ func (db mismatchedCountDB) Select(output interface{}, _ string, _ ...interface{ func TestGetCertsEmptyResults(t *testing.T) { saDbMap, err := sa.NewDbMap(vars.DBConnSA, sa.DbSettings{}) test.AssertNotError(t, err, "Couldn't connect to database") - checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) checker.dbMap = mismatchedCountDB{} batchSize = 3 @@ -448,7 +448,7 @@ func (db emptyDB) SelectNullInt(_ string, _ ...interface{}) (sql.NullInt64, erro func TestGetCertsNullResults(t *testing.T) { saDbMap, err := sa.NewDbMap(vars.DBConnSA, sa.DbSettings{}) test.AssertNotError(t, err, "Couldn't connect to database") - checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) checker.dbMap = emptyDB{} err = checker.getCerts(false) @@ -530,7 +530,7 @@ func TestIgnoredLint(t *testing.T) { err = loglist.InitLintList("../../test/ct-test-srv/log_list.json") test.AssertNotError(t, err, "failed to load ct log list") testKey, _ := rsa.GenerateKey(rand.Reader, 2048) - checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations) + checker := newChecker(saDbMap, clock.NewFake(), pa, kp, time.Hour, testValidityDurations, blog.NewMock()) serial := big.NewInt(1337) template := &x509.Certificate{