Reduce flakiness in crl-updater integration tests (#8044)
Remove crl-updater from the list of services run by startservers.py, so that it isn't running at the same time as the crl-updater instances run by specific integration tests. In return, add a new integration test which starts crl-updater and waits for it to listen on its debug port, just like startservers does. Also make the existing crl-updater integration tests more robust and more parallelizable by having them always reset the leasedUntil column before executing the updater, instead of requiring each individual test to perform that reset. Fixes https://github.com/letsencrypt/boulder/issues/7590
This commit is contained in:
parent
7aebcb1aeb
commit
12e660874d
|
|
@ -3,12 +3,15 @@
|
||||||
package integration
|
package integration
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"crypto/ecdsa"
|
"crypto/ecdsa"
|
||||||
"crypto/elliptic"
|
"crypto/elliptic"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
|
@ -16,6 +19,7 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -37,6 +41,14 @@ func runUpdater(t *testing.T, configFile string) {
|
||||||
crlUpdaterMu.Lock()
|
crlUpdaterMu.Lock()
|
||||||
defer crlUpdaterMu.Unlock()
|
defer crlUpdaterMu.Unlock()
|
||||||
|
|
||||||
|
// Reset the "leasedUntil" column so this can be done alongside other
|
||||||
|
// updater runs without worrying about unclean state.
|
||||||
|
fc := clock.NewFake()
|
||||||
|
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
|
||||||
|
test.AssertNotError(t, err, "opening database connection")
|
||||||
|
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
|
||||||
|
test.AssertNotError(t, err, "resetting leasedUntil column")
|
||||||
|
|
||||||
binPath, err := filepath.Abs("bin/boulder")
|
binPath, err := filepath.Abs("bin/boulder")
|
||||||
test.AssertNotError(t, err, "computing boulder binary path")
|
test.AssertNotError(t, err, "computing boulder binary path")
|
||||||
|
|
||||||
|
|
@ -49,22 +61,73 @@ func runUpdater(t *testing.T, configFile string) {
|
||||||
test.AssertNotError(t, err, "crl-updater failed")
|
test.AssertNotError(t, err, "crl-updater failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestCRLPipeline runs an end-to-end test of the crl issuance process, ensuring
|
// TestCRLUpdaterStartup ensures that the crl-updater can start in daemon mode.
|
||||||
// that the correct number of properly-formed and validly-signed CRLs are sent
|
// We do this here instead of in startservers so that we can shut it down after
|
||||||
// to our fake S3 service.
|
// we've confirmed it is running. It's important that it not be running while
|
||||||
func TestCRLPipeline(t *testing.T) {
|
// other CRL integration tests are running, because otherwise they fight over
|
||||||
// Basic setup.
|
// database leases, leading to flaky test failures.
|
||||||
fc := clock.NewFake()
|
func TestCRLUpdaterStartup(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
crlUpdaterMu.Lock()
|
||||||
|
defer crlUpdaterMu.Unlock()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
|
binPath, err := filepath.Abs("bin/boulder")
|
||||||
|
test.AssertNotError(t, err, "computing boulder binary path")
|
||||||
|
|
||||||
configDir, ok := os.LookupEnv("BOULDER_CONFIG_DIR")
|
configDir, ok := os.LookupEnv("BOULDER_CONFIG_DIR")
|
||||||
test.Assert(t, ok, "failed to look up test config directory")
|
test.Assert(t, ok, "failed to look up test config directory")
|
||||||
configFile := path.Join(configDir, "crl-updater.json")
|
configFile := path.Join(configDir, "crl-updater.json")
|
||||||
|
|
||||||
// Reset the "leasedUntil" column so that this test isn't dependent on state
|
c := exec.CommandContext(ctx, binPath, "crl-updater", "-config", configFile, "-debug-addr", ":8021")
|
||||||
// like prior runs of this test.
|
|
||||||
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
|
var wg sync.WaitGroup
|
||||||
test.AssertNotError(t, err, "opening database connection")
|
wg.Add(1)
|
||||||
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
|
go func() {
|
||||||
test.AssertNotError(t, err, "resetting leasedUntil column")
|
out, err := c.CombinedOutput()
|
||||||
|
// Log the output and error, but only if the main goroutine couldn't connect
|
||||||
|
// and declared the test failed.
|
||||||
|
for _, line := range strings.Split(string(out), "\n") {
|
||||||
|
t.Log(line)
|
||||||
|
}
|
||||||
|
t.Log(err)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
for attempt := range 10 {
|
||||||
|
time.Sleep(core.RetryBackoff(attempt, 10*time.Millisecond, 1*time.Second, 2))
|
||||||
|
|
||||||
|
conn, err := net.DialTimeout("tcp", "localhost:8021", 100*time.Millisecond)
|
||||||
|
if errors.Is(err, syscall.ECONNREFUSED) {
|
||||||
|
t.Logf("Connection attempt %d failed: %s", attempt, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Logf("Connection attempt %d failed unrecoverably: %s", attempt, err)
|
||||||
|
t.Fail()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
t.Logf("Connection attempt %d succeeded", attempt)
|
||||||
|
defer conn.Close()
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
cancel()
|
||||||
|
wg.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCRLPipeline runs an end-to-end test of the crl issuance process, ensuring
|
||||||
|
// that the correct number of properly-formed and validly-signed CRLs are sent
|
||||||
|
// to our fake S3 service.
|
||||||
|
func TestCRLPipeline(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Basic setup.
|
||||||
|
configDir, ok := os.LookupEnv("BOULDER_CONFIG_DIR")
|
||||||
|
test.Assert(t, ok, "failed to look up test config directory")
|
||||||
|
configFile := path.Join(configDir, "crl-updater.json")
|
||||||
|
|
||||||
// Issue a test certificate and save its serial number.
|
// Issue a test certificate and save its serial number.
|
||||||
client, err := makeClient()
|
client, err := makeClient()
|
||||||
|
|
@ -85,10 +148,6 @@ func TestCRLPipeline(t *testing.T) {
|
||||||
err = client.RevokeCertificate(client.Account, cert, client.PrivateKey, 5)
|
err = client.RevokeCertificate(client.Account, cert, client.PrivateKey, 5)
|
||||||
test.AssertNotError(t, err, "failed to revoke test certificate")
|
test.AssertNotError(t, err, "failed to revoke test certificate")
|
||||||
|
|
||||||
// Reset the "leasedUntil" column to prepare for another round of CRLs.
|
|
||||||
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
|
|
||||||
test.AssertNotError(t, err, "resetting leasedUntil column")
|
|
||||||
|
|
||||||
// Confirm that the cert now *does* show up in the CRLs.
|
// Confirm that the cert now *does* show up in the CRLs.
|
||||||
runUpdater(t, configFile)
|
runUpdater(t, configFile)
|
||||||
resp, err = http.Get("http://localhost:4501/query?serial=" + serial)
|
resp, err = http.Get("http://localhost:4501/query?serial=" + serial)
|
||||||
|
|
@ -103,6 +162,8 @@ func TestCRLPipeline(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCRLTemporalAndExplicitShardingCoexist(t *testing.T) {
|
func TestCRLTemporalAndExplicitShardingCoexist(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
|
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("sql.Open: %s", err)
|
t.Fatalf("sql.Open: %s", err)
|
||||||
|
|
@ -158,13 +219,6 @@ func TestCRLTemporalAndExplicitShardingCoexist(t *testing.T) {
|
||||||
t.Fatalf("revoking: %s", err)
|
t.Fatalf("revoking: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset the "leasedUntil" column to prepare for another round of CRLs.
|
|
||||||
fc := clock.NewFake()
|
|
||||||
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("resetting crlShards.leasedUntil: %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
runUpdater(t, path.Join(os.Getenv("BOULDER_CONFIG_DIR"), "crl-updater.json"))
|
runUpdater(t, path.Join(os.Getenv("BOULDER_CONFIG_DIR"), "crl-updater.json"))
|
||||||
|
|
||||||
allCRLs := getAllCRLs(t)
|
allCRLs := getAllCRLs(t)
|
||||||
|
|
|
||||||
|
|
@ -90,10 +90,6 @@ SERVICES = (
|
||||||
9667, None, None,
|
9667, None, None,
|
||||||
('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'),
|
('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'),
|
||||||
('s3-test-srv',)),
|
('s3-test-srv',)),
|
||||||
Service('crl-updater',
|
|
||||||
8021, None, None,
|
|
||||||
('./bin/boulder', 'crl-updater', '--config', os.path.join(config_dir, 'crl-updater.json'), '--debug-addr', ':8021'),
|
|
||||||
('boulder-ca-1', 'boulder-ca-2', 'boulder-sa-1', 'boulder-sa-2', 'crl-storer')),
|
|
||||||
Service('boulder-ra-1',
|
Service('boulder-ra-1',
|
||||||
8002, 9394, 'ra.boulder',
|
8002, 9394, 'ra.boulder',
|
||||||
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'),
|
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'),
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue