crl-updater: split temporal/explicit sharding by serial (#7990)

When we turn on explicit sharding, we'll change the CA serial prefix, so
we can know that all issuance from the new prefixes uses explicit
sharding, and all issuance from the old prefixes uses temporal sharding.
This lets us avoid putting a revoked cert in two different CRL shards
(the temporal one and the explicit one).

To achieve this, the crl-updater gets a list of temporally sharded
serial prefixes. When it queries the `certificateStatus` table by date
(`GetRevokedCerts`), it will filter out explicitly sharded certificates:
those that don't have their prefix on the list.

Part of #7094
This commit is contained in:
Jacob Hoffman-Andrews 2025-02-04 08:45:46 -08:00 committed by GitHub
parent 2f8c6bc522
commit eda496606d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 191 additions and 15 deletions

View File

@ -91,6 +91,19 @@ type Config struct {
// of magnitude greater than our p99 update latency.
UpdateTimeout config.Duration `validate:"-"`
// TemporallyShardedSerialPrefixes is a list of prefixes that were used to
// issue certificates with no CRLDistributionPoints extension, and which are
// therefore temporally sharded. If it's non-empty, the CRL Updater will
// require matching serials when querying by temporal shard. When querying
// by explicit shard, any prefix is allowed.
//
// This should be set to the current set of serial prefixes in production.
// When deploying explicit sharding (i.e. the CRLDistributionPoints extension),
// the CAs should be configured with a new set of serial prefixes that haven't
// been used before (and the OCSP Responder config should be updated to
// recognize the new prefixes as well as the old ones).
TemporallyShardedSerialPrefixes []string
// MaxParallelism controls how many workers may be running in parallel.
// A higher value reduces the total time necessary to update all CRL shards
// that this updater is responsible for, but also increases the memory used
@ -176,6 +189,7 @@ func main() {
c.CRLUpdater.UpdateTimeout.Duration,
c.CRLUpdater.MaxParallelism,
c.CRLUpdater.MaxAttempts,
c.CRLUpdater.TemporallyShardedSerialPrefixes,
sac,
cac,
csc,

View File

@ -26,6 +26,7 @@ func TestRunOnce(t *testing.T) {
[]*issuance.Certificate{e1, r3},
2, 18*time.Hour, 24*time.Hour,
6*time.Hour, time.Minute, 1, 1,
nil,
&fakeSAC{revokedCerts: revokedCertsStream{err: errors.New("db no worky")}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)},
&fakeCA{gcc: generateCRLStream{}},
&fakeStorer{uploaderStream: &noopUploader{}},

View File

@ -7,6 +7,7 @@ import (
"fmt"
"io"
"math"
"slices"
"time"
"github.com/jmhodges/clock"
@ -35,6 +36,8 @@ type crlUpdater struct {
maxParallelism int
maxAttempts int
temporallyShardedPrefixes []string
sa sapb.StorageAuthorityClient
ca capb.CRLGeneratorClient
cs cspb.CRLStorerClient
@ -55,6 +58,7 @@ func NewUpdater(
updateTimeout time.Duration,
maxParallelism int,
maxAttempts int,
temporallyShardedPrefixes []string,
sa sapb.StorageAuthorityClient,
ca capb.CRLGeneratorClient,
cs cspb.CRLStorerClient,
@ -113,6 +117,7 @@ func NewUpdater(
updateTimeout,
maxParallelism,
maxAttempts,
temporallyShardedPrefixes,
sa,
ca,
cs,
@ -207,9 +212,11 @@ func reRevoked(a *proto.CRLEntry, b *proto.CRLEntry) (*proto.CRLEntry, error) {
// addFromStream pulls `proto.CRLEntry` objects from a stream, adding them to the crlEntries map.
//
// Consolidates duplicates and checks for internal consistency of the results.
// If allowedSerialPrefixes is non-empty, only serials with that one-byte prefix (two hex-encoded
// bytes) will be accepted.
//
// Returns the number of entries received from the stream, regardless of duplicate status.
func addFromStream(crlEntries map[string]*proto.CRLEntry, stream crlStream) (int, error) {
// Returns the number of entries received from the stream, regardless of whether they were accepted.
func addFromStream(crlEntries map[string]*proto.CRLEntry, stream crlStream, allowedSerialPrefixes []string) (int, error) {
var count int
for {
entry, err := stream.Recv()
@ -220,6 +227,10 @@ func addFromStream(crlEntries map[string]*proto.CRLEntry, stream crlStream) (int
return 0, fmt.Errorf("retrieving entry from SA: %w", err)
}
count++
serialPrefix := entry.Serial[0:2]
if len(allowedSerialPrefixes) > 0 && !slices.Contains(allowedSerialPrefixes, serialPrefix) {
continue
}
previousEntry := crlEntries[entry.Serial]
if previousEntry == nil {
crlEntries[entry.Serial] = entry
@ -284,7 +295,7 @@ func (cu *crlUpdater) updateShard(ctx context.Context, atTime time.Time, issuerN
return fmt.Errorf("GetRevokedCerts: %w", err)
}
n, err := addFromStream(crlEntries, saStream)
n, err := addFromStream(crlEntries, saStream, cu.temporallyShardedPrefixes)
if err != nil {
return fmt.Errorf("streaming GetRevokedCerts: %w", err)
}
@ -308,7 +319,7 @@ func (cu *crlUpdater) updateShard(ctx context.Context, atTime time.Time, issuerN
return fmt.Errorf("GetRevokedCertsByShard: %w", err)
}
n, err := addFromStream(crlEntries, saStream)
n, err := addFromStream(crlEntries, saStream, nil)
if err != nil {
return fmt.Errorf("streaming GetRevokedCertsByShard: %w", err)
}

View File

@ -225,6 +225,7 @@ func TestUpdateShard(t *testing.T) {
[]*issuance.Certificate{e1, r3},
2, 18*time.Hour, 24*time.Hour,
6*time.Hour, time.Minute, 1, 1,
nil,
&fakeSAC{
revokedCerts: revokedCertsStream{},
maxNotAfter: clk.Now().Add(90 * 24 * time.Hour),
@ -405,6 +406,7 @@ func TestUpdateShardWithRetry(t *testing.T) {
[]*issuance.Certificate{e1, r3},
2, 18*time.Hour, 24*time.Hour,
6*time.Hour, time.Minute, 1, 1,
nil,
&fakeSAC{revokedCerts: revokedCertsStream{err: sentinelErr}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)},
&fakeCA{gcc: generateCRLStream{}},
&fakeStorer{uploaderStream: &noopUploader{}},
@ -643,7 +645,7 @@ func TestAddFromStream(t *testing.T) {
crlEntries := make(map[string]*corepb.CRLEntry)
var err error
for _, input := range tc.inputs {
_, err = addFromStream(crlEntries, &revokedCertsStream{entries: input})
_, err = addFromStream(crlEntries, &revokedCertsStream{entries: input}, nil)
if err != nil {
break
}
@ -664,3 +666,37 @@ func TestAddFromStream(t *testing.T) {
})
}
}
func TestAddFromStreamDisallowedSerialPrefix(t *testing.T) {
now := time.Now()
yesterday := now.Add(-24 * time.Hour)
input := []*corepb.CRLEntry{
{
Serial: "abcdefg",
Reason: ocsp.CessationOfOperation,
RevokedAt: timestamppb.New(yesterday),
},
{
Serial: "01020304",
Reason: ocsp.CessationOfOperation,
RevokedAt: timestamppb.New(yesterday),
},
}
crlEntries := make(map[string]*corepb.CRLEntry)
var err error
_, err = addFromStream(
crlEntries,
&revokedCertsStream{entries: input},
[]string{"ab"},
)
if err != nil {
t.Fatalf("addFromStream: %s", err)
}
expected := map[string]*corepb.CRLEntry{
"abcdefg": input[0],
}
if !reflect.DeepEqual(crlEntries, expected) {
t.Errorf("addFromStream=%+v, want %+v", crlEntries, expected)
}
}

View File

@ -26,15 +26,24 @@ where that issuer's CRLs will be served.
## Shard assignment
Certificates are assigned to shards two ways: temporally and explicitly.
Certificates are assigned to shards one of two ways: temporally or explicitly.
Temporal shard assignment places certificates into shards based on their
notAfter. Explicit shard assignment places certificates into shards based
on the (random) low bytes of their serial numbers. All certificates
implicitly have a temporal shard. Only certificates with the
CRLDistributionPoints extension are considered to have an explicit shard.
on the (random) low bytes of their serial numbers.
As of Jan 2025, we are planning to start assigning explicit shards at
issuance time and then, after a transition period, turn off temporal sharding.
Boulder distinguishes the two types of sharding by the one-byte (two hex
encoded bytes) prefix on the serial number, configured at the CA.
When enabling explicit sharding at the CA, operators should at the same
time change the CA's configured serial prefix. Also, the crl-updater should
be configured with `temporallyShardedPrefixes` set to the _old_ serial prefix.
An explicitly sharded certificate will always have the CRLDistributionPoints
extension, containing a URL that points to its CRL shard. A temporally sharded
certificate will never have that extension.
As of Jan 2025, we are planning to turn on explicit sharding for new
certificates soon. Once all temporally sharded certificates have expired, we
will remove the code for temporal sharding.
## Storage
@ -61,7 +70,13 @@ crl-updater de-duplicates by serial number.
Explicit sharding is enabled at the CA by configuring each issuer with a number
of CRL shards. This number must be the same across all issuers and must match
the number of shards configured on the crl-updater.
the number of shards configured on the crl-updater. As part of the same config
deploy, the CA must be updated to issue using a new serial prefix. Note: the
ocsp-responder must also be updated to recognize the new serial prefix.
The crl-updater must also be updated to add the `temporallyShardedPrefixes`
field, listing the _old_ serial prefixes (i.e., those that were issued by a CA
that did not include the CRLDistributionPoints extension).
Once we've turned on explicit sharding, we can turn it back off. However, for
the certificates we've already issued, we are still committed to serving their

View File

@ -172,7 +172,7 @@
}
]
},
"serialPrefixHex": "7f",
"serialPrefixHex": "6e",
"maxNames": 100,
"lifespanOCSP": "96h",
"goodkey": {},

View File

@ -48,6 +48,9 @@
"lookbackPeriod": "24h",
"updatePeriod": "10m",
"updateTimeout": "1m",
"temporallyShardedSerialPrefixes": [
"7f"
],
"maxParallelism": 10,
"maxAttempts": 2,
"features": {}

View File

@ -57,7 +57,8 @@
"maxInflightSignings": 20,
"maxSigningWaiters": 100,
"requiredSerialPrefixes": [
"7f"
"7f",
"6e"
],
"features": {}
},

View File

@ -3,7 +3,11 @@
package integration
import (
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"database/sql"
"fmt"
"io"
"net/http"
"os"
@ -56,7 +60,7 @@ func TestCRLPipeline(t *testing.T) {
configFile := path.Join(configDir, "crl-updater.json")
// Reset the "leasedUntil" column so that this test isn't dependent on state
// like priors runs of this test.
// like prior runs of this test.
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
test.AssertNotError(t, err, "opening database connection")
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
@ -97,3 +101,94 @@ func TestCRLPipeline(t *testing.T) {
test.AssertEquals(t, string(reason), "5")
resp.Body.Close()
}
func TestTemporalAndExplicitShardingCoexist(t *testing.T) {
db, err := sql.Open("mysql", vars.DBConnSAIntegrationFullPerms)
if err != nil {
t.Fatalf("sql.Open: %s", err)
}
// Insert an old, revoked certificate in the certificateStatus table. Importantly this
// serial has the 7f prefix, which is in test/config-next/crl-updater.json in the
// `temporallyShardedPrefixes` list.
// Random serial that is unique to this test.
oldSerial := "7faa39be44fc95f3d19befe3cb715848e601"
// This is hardcoded to match one of the issuer names in our integration test environment's
// ca.json.
issuerID := 43104258997432926
_, err = db.Exec(`DELETE FROM certificateStatus WHERE serial = ?`, oldSerial)
if err != nil {
t.Fatalf("deleting old certificateStatus row: %s", err)
}
_, err = db.Exec(`
INSERT INTO certificateStatus (serial, issuerID, notAfter, status, ocspLastUpdated, revokedDate, revokedReason, lastExpirationNagSent)
VALUES (?, ?, ?, "revoked", NOW(), NOW(), 0, 0);`,
oldSerial, issuerID, time.Now().Add(24*time.Hour).Format("2006-01-02 15:04:05"))
if err != nil {
t.Fatalf("inserting old certificateStatus row: %s", err)
}
client, err := makeClient()
if err != nil {
t.Fatalf("creating acme client: %s", err)
}
certKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("creating cert key: %s", err)
}
// Issue and revoke a certificate. In the config-next world, this will be an explicitly
// sharded certificate. In the config world, this will be a temporally sharded certificate
// (until we move `config` to explicit sharding). This means that in the config world,
// this test only handles temporal sharding, but we don't config-gate it because it passes
// in both worlds.
result, err := authAndIssue(client, certKey, []string{random_domain()}, true)
if err != nil {
t.Fatalf("authAndIssue: %s", err)
}
cert := result.certs[0]
err = client.RevokeCertificate(
client.Account,
cert,
client.PrivateKey,
0,
)
if err != nil {
t.Fatalf("revoking: %s", err)
}
// Reset the "leasedUntil" column to prepare for another round of CRLs.
fc := clock.NewFake()
_, err = db.Exec(`UPDATE crlShards SET leasedUntil = ?`, fc.Now().Add(-time.Minute))
if err != nil {
t.Fatalf("resetting crlShards.leasedUntil: %s", err)
}
runUpdater(t, path.Join(os.Getenv("BOULDER_CONFIG_DIR"), "crl-updater.json"))
allCRLs := getAllCRLs(t)
seen := make(map[string]bool)
// Range over CRLs from all issuers, because the "old" certificate (7faa...) has a
// different issuer than the "new" certificate issued by `authAndIssue`, which
// has a random issuer.
for _, crls := range allCRLs {
for _, crl := range crls {
for _, entry := range crl.RevokedCertificateEntries {
serial := fmt.Sprintf("%x", entry.SerialNumber)
if seen[serial] {
t.Errorf("revoked certificate %s seen on multiple CRLs", serial)
}
seen[serial] = true
}
}
}
newSerial := fmt.Sprintf("%x", cert.SerialNumber)
if !seen[newSerial] {
t.Errorf("revoked certificate %s not seen on any CRL", newSerial)
}
if !seen[oldSerial] {
t.Errorf("revoked certificate %s not seen on any CRL", oldSerial)
}
}