Stabilize CRL shard boundaries (#6445)

Add two new config keys to the crl-updater: * shardWidth, which controls the width of the chunks that we divide all of time into, with a default value of "16h" (approximately the same as today's shard width derived from 128 shards covering 90 days); and * lookbackPeriod, which controls the amount of already-expired certificates that should be included in our CRLs to ensure that even certificates which are revoked immediately before they expire still show up in aborts least one CRL, with a default value of "24h" (approximately the same as today's lookback period derived from our run frequency of 6h). Use these two new values to change the way CRL shards are computed. Previously, we would compute the total time we care about based on the configured certificate lifetime (to determine how far forward to look) and the configured update period (to determine how far back to look), and then divide that time evenly by the number of shards. However, this method had two fatal flaws. First, if the certificate lifetime is configured incorrectly, then the CRL updater will fail to query the database for some certs that should be included in the CRLs. Second, if the update period is changed, this would change the lookback period, which in turn would change the shard width, causing all CRL entries to suddenly change which shard they're in. Instead, first compute all chunk locations based only on the shard width and number of shards. Then determine which chunks we need to care about based on the configured lookback period and by querying the database for the farthest-future expiration, to ensure we cover all extant certificates. This may mean that more than one chunk of time will get mapped to a single shard, but that's okay -- each chunk will remain mapped to the same shard for the whole time we care about it. Fixes #6438 Fixes #6440
2022-10-27 15:59:48 -07:00 · 2022-10-27 15:59:48 -07:00 · 6efd941e3c
parent 9213bd0993
commit 6efd941e3c
4 changed files with 331 additions and 191 deletions
--- a/cmd/crl-updater/main.go
+++ b/cmd/crl-updater/main.go
@ -4,6 +4,7 @@ import (
 	"context"
 	"flag"
 	"os"
+	"time"

 	"github.com/honeycombio/beeline-go"

@ -36,6 +37,22 @@ type Config struct {
 		// in CCADB MUST be updated.
 		NumShards int

+		// ShardWidth is the amount of time (width on a timeline) that a single
+		// shard should cover. Ideally, NumShards*ShardWidth should be an amount of
+		// time noticeably larger than the current longest certificate lifetime,
+		// but the updater will continue to work if this is not the case (albeit
+		// with more confusing mappings of serials to shards).
+		// WARNING: When this number is changed, revocation entries will move
+		// between shards.
+		ShardWidth cmd.ConfigDuration
+
+		// LookbackPeriod is how far back the updater should look for revoked expired
+		// certificates. We are required to include every revoked cert in at least
+		// one CRL, even if it is revoked seconds before it expires, so this must
+		// always be greater than the UpdatePeriod, and should be increased when
+		// recovering from an outage to ensure continuity of coverage.
+		LookbackPeriod cmd.ConfigDuration
+
 		// CertificateLifetime is the validity period (usually expressed in hours,
 		// like "2160h") of the longest-lived currently-unexpired certificate. For
 		// Let's Encrypt, this is usually ninety days. If the validity period of
@ -43,6 +60,8 @@ type Config struct {
 		// immediately; if the validity period of the issued certificates ever
 		// changes downwards, the value must not change until after all certificates with
 		// the old validity period have expired.
+		// DEPRECATED: This config value is no longer used.
+		// TODO(#6438): Remove this value.
 		CertificateLifetime cmd.ConfigDuration

 		// UpdatePeriod controls how frequently the crl-updater runs and publishes
@ -113,6 +132,13 @@ func main() {
 		issuers = append(issuers, cert)
 	}

+	if c.CRLUpdater.ShardWidth.Duration == 0 {
+		c.CRLUpdater.ShardWidth.Duration = 16 * time.Hour
+	}
+	if c.CRLUpdater.LookbackPeriod.Duration == 0 {
+		c.CRLUpdater.LookbackPeriod.Duration = 24 * time.Hour
+	}
+
 	clientMetrics := bgrpc.NewClientMetrics(scope)

 	saConn, err := bgrpc.ClientSetup(c.CRLUpdater.SAService, tlsConfig, clientMetrics, clk)
@ -130,7 +156,8 @@ func main() {
 	u, err := updater.NewUpdater(
 		issuers,
 		c.CRLUpdater.NumShards,
-		c.CRLUpdater.CertificateLifetime.Duration,
+		c.CRLUpdater.ShardWidth.Duration,
+		c.CRLUpdater.LookbackPeriod.Duration,
 		c.CRLUpdater.UpdatePeriod.Duration,
 		c.CRLUpdater.UpdateOffset.Duration,
 		c.CRLUpdater.MaxParallelism,
--- a/crl/updater/updater.go
+++ b/crl/updater/updater.go
@ -14,6 +14,7 @@ import (

 	"github.com/jmhodges/clock"
 	"github.com/prometheus/client_golang/prometheus"
+	"google.golang.org/protobuf/types/known/emptypb"

 	capb "github.com/letsencrypt/boulder/ca/proto"
 	"github.com/letsencrypt/boulder/core/proto"
@ -25,13 +26,13 @@ import (
 )

 type crlUpdater struct {
-	issuers           map[issuance.IssuerNameID]*issuance.Certificate
-	numShards         int
-	lookbackPeriod    time.Duration
-	lookforwardPeriod time.Duration
-	updatePeriod      time.Duration
-	updateOffset      time.Duration
-	maxParallelism    int
+	issuers        map[issuance.IssuerNameID]*issuance.Certificate
+	numShards      int
+	shardWidth     time.Duration
+	lookbackPeriod time.Duration
+	updatePeriod   time.Duration
+	updateOffset   time.Duration
+	maxParallelism int

 	sa sapb.StorageAuthorityClient
 	ca capb.CRLGeneratorClient
@ -47,7 +48,8 @@ type crlUpdater struct {
 func NewUpdater(
 	issuers []*issuance.Certificate,
 	numShards int,
-	certLifetime time.Duration,
+	shardWidth time.Duration,
+	lookbackPeriod time.Duration,
 	updatePeriod time.Duration,
 	updateOffset time.Duration,
 	maxParallelism int,
@ -75,24 +77,8 @@ func NewUpdater(
 		return nil, fmt.Errorf("update offset must be less than period: %s !< %s", updateOffset, updatePeriod)
 	}

-	// Set the lookback period to be significantly greater than the update period.
-	// This guarantees that a certificate which was revoked very shortly before it
-	// expired will still appear on at least one CRL, as required by RFC 5280
-	// Section 3.3.
-	lookbackPeriod := 4 * updatePeriod
-
-	// Set the lookforward period to be greater than the lifetime of the longest
-	// currently-valid certificate. Ensure it overshoots by more than the width
-	// of one shard. See comment on getShardBoundaries for details.
-	tentativeShardWidth := (lookbackPeriod + certLifetime).Nanoseconds() / int64(numShards)
-	lookforwardPeriod := certLifetime + time.Duration(4*tentativeShardWidth)
-
-	// Ensure that the total window (lookback + lookforward) is evenly divisible
-	// by the number of shards, to make shard boundary calculations easy.
-	window := lookbackPeriod + lookforwardPeriod
-	offset := window.Nanoseconds() % int64(numShards)
-	if offset != 0 {
-		lookforwardPeriod += time.Duration(int64(numShards) - offset)
+	if lookbackPeriod < 2*updatePeriod {
+		return nil, fmt.Errorf("lookbackPeriod must be at least 2x updatePeriod: %s !< 2 * %s", lookbackPeriod, updatePeriod)
 	}

 	if maxParallelism <= 0 {
@ -115,8 +101,8 @@ func NewUpdater(
 	return &crlUpdater{
 		issuersByNameID,
 		numShards,
+		shardWidth,
 		lookbackPeriod,
-		lookforwardPeriod,
 		updatePeriod,
 		updateOffset,
 		maxParallelism,
@ -232,6 +218,11 @@ func (cu *crlUpdater) tickIssuer(ctx context.Context, atTime time.Time, issuerNa
 	}()
 	cu.log.Debugf("Ticking issuer %d at time %s", issuerNameID, atTime)

+	shardMap, err := cu.getShardMappings(ctx, atTime)
+	if err != nil {
+		return fmt.Errorf("computing shardmap: %w", err)
+	}
+
 	type shardResult struct {
 		shardIdx int
 		err      error
@ -245,7 +236,7 @@ func (cu *crlUpdater) tickIssuer(ctx context.Context, atTime time.Time, issuerNa
 			default:
 				out <- shardResult{
 					shardIdx: idx,
-					err:      cu.tickShard(ctx, atTime, issuerNameID, idx),
+					err:      cu.tickShard(ctx, atTime, issuerNameID, idx, shardMap[idx]),
 				}
 			}
 		}
@ -284,7 +275,7 @@ func (cu *crlUpdater) tickIssuer(ctx context.Context, atTime time.Time, issuerNa
 // the list of revoked certs in that shard from the SA, gets the CA to sign the
 // resulting CRL, and gets the crl-storer to upload it. It returns an error if
 // any of these operations fail.
-func (cu *crlUpdater) tickShard(ctx context.Context, atTime time.Time, issuerNameID issuance.IssuerNameID, shardIdx int) (err error) {
+func (cu *crlUpdater) tickShard(ctx context.Context, atTime time.Time, issuerNameID issuance.IssuerNameID, shardIdx int, chunks []chunk) (err error) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()

@ -301,41 +292,37 @@ func (cu *crlUpdater) tickShard(ctx context.Context, atTime time.Time, issuerNam
 		cu.updatedCounter.WithLabelValues(cu.issuers[issuerNameID].Subject.CommonName, result).Inc()
 	}()

-	expiresAfter, expiresBefore, err := cu.getShardBoundaries(atTime, shardIdx)
-	if err != nil {
-		return err
-	}
-
 	cu.log.Infof(
-		"Generating CRL shard: id=[%s] expiresAfter=[%s] expiresBefore=[%s]",
-		crlID, expiresAfter, expiresBefore)
+		"Generating CRL shard: id=[%s] numChunks=[%d]", crlID, len(chunks))

 	// Get the full list of CRL Entries for this shard from the SA.
-	saStream, err := cu.sa.GetRevokedCerts(ctx, &sapb.GetRevokedCertsRequest{
-		IssuerNameID:  int64(issuerNameID),
-		ExpiresAfter:  expiresAfter.UnixNano(),
-		ExpiresBefore: expiresBefore.UnixNano(),
-		RevokedBefore: atTime.UnixNano(),
-	})
-	if err != nil {
-		return fmt.Errorf("connecting to SA: %w", err)
-	}
-
 	var crlEntries []*proto.CRLEntry
-	for {
-		entry, err := saStream.Recv()
+	for _, chunk := range chunks {
+		saStream, err := cu.sa.GetRevokedCerts(ctx, &sapb.GetRevokedCertsRequest{
+			IssuerNameID:  int64(issuerNameID),
+			ExpiresAfter:  chunk.start.UnixNano(),
+			ExpiresBefore: chunk.end.UnixNano(),
+			RevokedBefore: atTime.UnixNano(),
+		})
 		if err != nil {
-			if err == io.EOF {
-				break
-			}
-			return fmt.Errorf("retrieving entry from SA: %w", err)
+			return fmt.Errorf("connecting to SA: %w", err)
 		}
-		crlEntries = append(crlEntries, entry)
-	}

-	cu.log.Infof(
-		"Queried SA for CRL shard: id=[%s] numEntries=[%d]",
-		crlID, len(crlEntries))
+		for {
+			entry, err := saStream.Recv()
+			if err != nil {
+				if err == io.EOF {
+					break
+				}
+				return fmt.Errorf("retrieving entry from SA: %w", err)
+			}
+			crlEntries = append(crlEntries, entry)
+		}
+
+		cu.log.Infof(
+			"Queried SA for CRL shard: id=[%s] expiresAfter=[%s] expiresBefore=[%s] numEntries=[%d]",
+			crlID, chunk.start, chunk.end, len(crlEntries))
+	}

 	// Send the full list of CRL Entries to the CA.
 	caStream, err := cu.ca.GenerateCRL(ctx)
@ -440,87 +427,116 @@ func anchorTime() time.Time {
 	return time.Date(2015, time.June, 04, 11, 04, 38, 0, time.UTC)
 }

-// getShardBoundaries computes the start (inclusive) and end (exclusive) times
-// for a given integer-indexed CRL shard. The idea here is that shards should be
-// stable. Picture a timeline, divided into chunks. Number those chunks from 0
-// (starting at the anchor time) up to cu.numShards, then repeat the cycle when
-// you run out of numbers:
-//
-//	   chunk:  5     0     1     2     3     4     5     0     1     2     3
-//	...-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
-//	                         ^  ^-atTime                         ^
-//	   atTime-lookbackPeriod-┘          atTime+lookforwardPeriod-┘
-//
-// The width of each chunk is determined by dividing the total time window we
-// care about (lookbackPeriod+lookforwardPeriod) by the number of shards we
-// want (numShards).
-//
-// Even as "now" (atTime) moves forward, and the total window of expiration
-// times that we care about moves forward, the boundaries of each chunk remain
-// stable:
-//
-//	   chunk:  5     0     1     2     3     4     5     0     1     2     3
-//	...-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
-//	                                 ^  ^-atTime                         ^
-//	           atTime-lookbackPeriod-┘          atTime+lookforwardPeriod-┘
-//
-// However, note that at essentially all times the window includes parts of two
-// different instances of the chunk which appears at its ends. For example,
-// in the second diagram above, the window includes almost all of the middle
-// chunk labeled "3", but also includes just a little bit of the rightmost chunk
-// also labeled "3".
-//
-// In order to handle this case, this function always treats the *leftmost*
-// (i.e. earliest) chunk with the given ID that has *any* overlap with the
-// current window as the current shard. It returns the boundaries of this chunk
-// as the boundaries of the desired shard. In the diagram below, even though
-// there is another chunk with ID "1" near the right-hand edge of the window,
-// that chunk is ignored.
-//
-//	   shard:           |  1  |  2  |  3  |  4  |  5  |  0  |
-//	...-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
-//	                         ^  ^-atTime                         ^
-//	   atTime-lookbackPeriod-┘          atTime+lookforwardPeriod-┘
-//
-// This means that the lookforwardPeriod MUST be configured large enough that
-// there is a buffer of at least one whole chunk width between the actual
-// furthest-future expiration (generally atTime+90d) and the right-hand edge of
-// the window (atTime+lookforwardPeriod).
-func (cu *crlUpdater) getShardBoundaries(atTime time.Time, shardIdx int) (time.Time, time.Time, error) {
-	// Ensure that the given shard index falls within the space of acceptable indices.
-	shardIdx = shardIdx % cu.numShards
+// chunk represents a fixed slice of time during which some certificates
+// presumably expired or will expire. Its non-unique index indicates which shard
+// it will be mapped to. The start boundary is inclusive, the end boundary is
+// exclusive.
+type chunk struct {
+	start time.Time
+	end   time.Time
+	idx   int
+}

-	// Compute the width of the full window.
-	windowWidth := cu.lookbackPeriod + cu.lookforwardPeriod
+// shardMap is a mapping of shard indices to the set of chunks which should be
+// included in that shard. Under most circumstances there is a one-to-one
+// mapping, but certain configuration (such as having very narrow shards, or
+// having a very long lookback period) can result in more than one chunk being
+// mapped to a single shard.
+type shardMap [][]chunk

+// getShardMappings determines which chunks are currently relevant, based on
+// the current time, the configured lookbackPeriod, and the farthest-future
+// certificate expiration in the database. It then maps all of those chunks to
+// their corresponding shards, and returns that mapping.
+//
+// The idea here is that shards should be stable. Picture a timeline, divided
+// into chunks. Number those chunks from 0 (starting at the anchor time) up to
+// numShards, then repeat the cycle when you run out of numbers:
+//
+//	chunk:  0     1     2     3     4     0     1     2     3     4     0
+//	     |-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
+//	     ^-anchorTime
+//
+// The total time window we care about goes from atTime-lookbackPeriod, forward
+// through the time of the farthest-future notAfter date found in the database.
+// The lookbackPeriod must be larger than the updatePeriod, to ensure that any
+// certificates which were both revoked *and* expired since the last time we
+// issued CRLs get included in this generation. Because these times are likely
+// to fall in the middle of chunks, we include the whole chunks surrounding
+// those times in our output CRLs:
+//
+//	included chunk:     4     0     1     2     3     4     0     1
+//	      ...--|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
+//	atTime-lookbackPeriod-^   ^-atTime                lastExpiry-^
+//
+// Because this total period of time may include multiple chunks with the same
+// number, we then coalesce these chunks into a single shard. Ideally, this
+// will never happen: it should only happen if the lookbackPeriod is very
+// large, or if the shardWidth is small compared to the lastExpiry (such that
+// numShards * shardWidth is less than lastExpiry - atTime). In this example,
+// shards 0, 1, and 4 all get the contents of two chunks mapped to them, while
+// shards 2 and 3 get only one chunk each.
+//
+//	included chunk:     4     0     1     2     3     4     0     1
+//	      ...--|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----...
+//	                    │     │     │     │     │     │     │     │
+//	shard 0: <────────────────┘─────────────────────────────┘     │
+//	shard 1: <──────────────────────┘─────────────────────────────┘
+//	shard 2: <────────────────────────────┘     │     │
+//	shard 3: <──────────────────────────────────┘     │
+//	shard 4: <──────────┘─────────────────────────────┘
+//
+// Under this scheme, the shard to which any given certificate will be mapped is
+// a function of only three things: that certificate's notAfter timestamp, the
+// chunk width, and the number of shards.
+func (cu *crlUpdater) getShardMappings(ctx context.Context, atTime time.Time) (shardMap, error) {
+	res := make(shardMap, cu.numShards)
+
+	// Get the farthest-future expiration timestamp to ensure we cover everything.
+	lastExpiry, err := cu.sa.GetMaxExpiration(ctx, &emptypb.Empty{})
+	if err != nil {
+		return nil, err
+	}
+
+	// Find the id number and boundaries of the earliest chunk we care about.
+	first := atTime.Add(-cu.lookbackPeriod)
+	c, err := cu.getChunkAtTime(first)
+	if err != nil {
+		return nil, err
+	}
+
+	// Iterate over chunks until we get completely beyond the farthest-future
+	// expiration.
+	for c.start.Before(lastExpiry.AsTime()) {
+		res[c.idx] = append(res[c.idx], c)
+		c = chunk{
+			start: c.end,
+			end:   c.end.Add(cu.shardWidth),
+			idx:   (c.idx + 1) % cu.numShards,
+		}
+	}
+
+	return res, nil
+}
+
+// getChunkAtTime returns the chunk whose boundaries contain the given time.
+// It is broken out solely for the purpose of unit testing.
+func (cu *crlUpdater) getChunkAtTime(atTime time.Time) (chunk, error) {
 	// Compute the amount of time between the current time and the anchor time.
 	timeSinceAnchor := atTime.Sub(anchorTime())
-	if timeSinceAnchor == time.Duration(math.MaxInt64) || timeSinceAnchor == time.Duration(math.MinInt64) {
-		return time.Time{}, time.Time{}, errors.New("shard boundary math broken: anchor time too far away")
+	if timeSinceAnchor == time.Duration(math.MaxInt64) || timeSinceAnchor < 0 {
+		return chunk{}, errors.New("shard boundary math broken: anchor time too far away")
 	}

-	// Compute the amount of time between the left-hand edge of the most recent
-	// "0" chunk and the current time.
-	timeSinceZeroChunk := time.Duration(timeSinceAnchor.Nanoseconds() % windowWidth.Nanoseconds())
-	// Compute the left-hand edge of the most recent "0" chunk.
-	zeroStart := atTime.Add(-timeSinceZeroChunk)
+	// Determine how many full chunks fit within that time, and from that the
+	// index number of the desired chunk.
+	chunksSinceAnchor := timeSinceAnchor.Nanoseconds() / cu.shardWidth.Nanoseconds()
+	chunkIdx := int(chunksSinceAnchor) % cu.numShards

-	// Compute the width of a single shard.
-	shardWidth := time.Duration(windowWidth.Nanoseconds() / int64(cu.numShards))
-	// Compute the amount of time between the left-hand edge of the most recent
-	// "0" chunk and the left-hand edge of the desired chunk.
-	shardOffset := time.Duration(int64(shardIdx) * shardWidth.Nanoseconds())
-	// Compute the left-hand edge of the most recent chunk with the given index.
-	shardStart := zeroStart.Add(shardOffset)
-	// Compute the right-hand edge of the most recent chunk with the given index.
-	shardEnd := shardStart.Add(shardWidth)
+	// Determine the boundaries of the chunk.
+	timeSinceChunk := time.Duration(timeSinceAnchor.Nanoseconds() % cu.shardWidth.Nanoseconds())
+	left := atTime.Add(-timeSinceChunk)
+	right := left.Add(cu.shardWidth)

-	// But the shard boundaries we just computed might be for a chunk that is
-	// completely behind the left-hand edge of our current window. If they are,
-	// bump them forward by one window width to bring them inside our window.
-	if shardEnd.Before(atTime.Add(-cu.lookbackPeriod)) {
-		shardStart = shardStart.Add(windowWidth)
-		shardEnd = shardEnd.Add(windowWidth)
-	}
-	return shardStart, shardEnd, nil
+	return chunk{left, right, chunkIdx}, nil
 }
--- a/crl/updater/updater_test.go
+++ b/crl/updater/updater_test.go
@ -9,6 +9,7 @@ import (

 	"google.golang.org/grpc"
 	"google.golang.org/protobuf/types/known/emptypb"
+	"google.golang.org/protobuf/types/known/timestamppb"

 	"github.com/jmhodges/clock"
 	capb "github.com/letsencrypt/boulder/ca/proto"
@ -46,16 +47,22 @@ func (f *fakeGRCC) Recv() (*corepb.CRLEntry, error) {
 }

 // fakeSAC is a fake sapb.StorageAuthorityClient which can be populated with a
-// fakeGRCC to be used as the return value for calls to GetRevokedCerts.
+// fakeGRCC to be used as the return value for calls to GetRevokedCerts, and a
+// fake timestamp to serve as the database's maximum notAfter value.
 type fakeSAC struct {
 	mocks.StorageAuthority
-	grcc fakeGRCC
+	grcc        fakeGRCC
+	maxNotAfter time.Time
 }

 func (f *fakeSAC) GetRevokedCerts(ctx context.Context, _ *sapb.GetRevokedCertsRequest, _ ...grpc.CallOption) (sapb.StorageAuthority_GetRevokedCertsClient, error) {
 	return &f.grcc, nil
 }

+func (f *fakeSAC) GetMaxExpiration(_ context.Context, req *emptypb.Empty, _ ...grpc.CallOption) (*timestamppb.Timestamp, error) {
+	return timestamppb.New(f.maxNotAfter), nil
+}
+
 // fakeGCC is a fake capb.CRLGenerator_GenerateCRLClient which can be
 // populated with some CRL entries or an error for use as the return value of
 // a faked GenerateCRL call.
@ -134,17 +141,25 @@ func TestTickShard(t *testing.T) {

 	sentinelErr := errors.New("oops")

+	clk := clock.NewFake()
+	clk.Set(time.Date(2020, time.January, 1, 0, 0, 0, 0, time.UTC))
 	cu, err := NewUpdater(
-		[]*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1,
-		&fakeSAC{grcc: fakeGRCC{}},
+		[]*issuance.Certificate{e1, r3},
+		2, 18*time.Hour, 24*time.Hour,
+		6*time.Hour, 1*time.Minute, 1,
+		&fakeSAC{grcc: fakeGRCC{}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)},
 		&fakeCGC{gcc: fakeGCC{}},
 		&fakeCSC{ucc: fakeUCC{}},
-		metrics.NoopRegisterer, blog.NewMock(), clock.NewFake(),
+		metrics.NoopRegisterer, blog.NewMock(), clk,
 	)
 	test.AssertNotError(t, err, "building test crlUpdater")

+	testChunks := []chunk{
+		{clk.Now(), clk.Now().Add(18 * time.Hour), 0},
+	}
+
 	// Ensure that getting no results from the SA still works.
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertNotError(t, err, "empty CRL")
 	test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{
 		"issuer": "(TEST) Elegant Elephant E1", "result": "success",
@ -153,7 +168,7 @@ func TestTickShard(t *testing.T) {

 	// Errors closing the Storer upload stream should bubble up.
 	cu.cs = &fakeCSC{ucc: fakeUCC{recvErr: sentinelErr}}
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertError(t, err, "storer error")
 	test.AssertContains(t, err.Error(), "closing CRLStorer upload stream")
 	test.AssertErrorIs(t, err, sentinelErr)
@ -164,7 +179,7 @@ func TestTickShard(t *testing.T) {

 	// Errors sending to the Storer should bubble up sooner.
 	cu.cs = &fakeCSC{ucc: fakeUCC{sendErr: sentinelErr}}
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertError(t, err, "storer error")
 	test.AssertContains(t, err.Error(), "sending CRLStorer metadata")
 	test.AssertErrorIs(t, err, sentinelErr)
@ -175,7 +190,7 @@ func TestTickShard(t *testing.T) {

 	// Errors reading from the CA should bubble up sooner.
 	cu.ca = &fakeCGC{gcc: fakeGCC{recvErr: sentinelErr}}
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertError(t, err, "CA error")
 	test.AssertContains(t, err.Error(), "receiving CRL bytes")
 	test.AssertErrorIs(t, err, sentinelErr)
@ -186,7 +201,7 @@ func TestTickShard(t *testing.T) {

 	// Errors sending to the CA should bubble up sooner.
 	cu.ca = &fakeCGC{gcc: fakeGCC{sendErr: sentinelErr}}
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertError(t, err, "CA error")
 	test.AssertContains(t, err.Error(), "sending CA metadata")
 	test.AssertErrorIs(t, err, sentinelErr)
@ -196,8 +211,8 @@ func TestTickShard(t *testing.T) {
 	cu.updatedCounter.Reset()

 	// Errors reading from the SA should bubble up soonest.
-	cu.sa = &fakeSAC{grcc: fakeGRCC{err: sentinelErr}}
-	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0)
+	cu.sa = &fakeSAC{grcc: fakeGRCC{err: sentinelErr}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)}
+	err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0, testChunks)
 	test.AssertError(t, err, "database error")
 	test.AssertContains(t, err.Error(), "retrieving entry from SA")
 	test.AssertErrorIs(t, err, sentinelErr)
@ -214,12 +229,16 @@ func TestTickIssuer(t *testing.T) {
 	test.AssertNotError(t, err, "loading test issuer")

 	mockLog := blog.NewMock()
+	clk := clock.NewFake()
+	clk.Set(time.Date(2020, time.January, 1, 0, 0, 0, 0, time.UTC))
 	cu, err := NewUpdater(
-		[]*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1,
-		&fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}},
+		[]*issuance.Certificate{e1, r3},
+		2, 18*time.Hour, 24*time.Hour,
+		6*time.Hour, 1*time.Minute, 1,
+		&fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)},
 		&fakeCGC{gcc: fakeGCC{}},
 		&fakeCSC{ucc: fakeUCC{}},
-		metrics.NoopRegisterer, mockLog, clock.NewFake(),
+		metrics.NoopRegisterer, mockLog, clk,
 	)
 	test.AssertNotError(t, err, "building test crlUpdater")

@ -246,12 +265,16 @@ func TestTick(t *testing.T) {
 	test.AssertNotError(t, err, "loading test issuer")

 	mockLog := blog.NewMock()
+	clk := clock.NewFake()
+	clk.Set(time.Date(2020, time.January, 1, 0, 0, 0, 0, time.UTC))
 	cu, err := NewUpdater(
-		[]*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1,
-		&fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}},
+		[]*issuance.Certificate{e1, r3},
+		2, 18*time.Hour, 24*time.Hour,
+		6*time.Hour, 1*time.Minute, 1,
+		&fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}, maxNotAfter: clk.Now().Add(90 * 24 * time.Hour)},
 		&fakeCGC{gcc: fakeGCC{}},
 		&fakeCSC{ucc: fakeUCC{}},
-		metrics.NoopRegisterer, mockLog, clock.NewFake(),
+		metrics.NoopRegisterer, mockLog, clk,
 	)
 	test.AssertNotError(t, err, "building test crlUpdater")

@ -277,48 +300,121 @@ func TestTick(t *testing.T) {
 	cu.tickHistogram.Reset()
 }

-func TestGetWindowForShard(t *testing.T) {
-	// Our test updater divides a 107-day window into 107 shards, resulting in a
-	// shard width of 24 hours.
+func TestGetShardMappings(t *testing.T) {
+	// We set atTime to be exactly one day (numShards * shardWidth) after the
+	// anchorTime for these tests, so that we know that the index of the first
+	// chunk we would normally (i.e. not taking lookback or overshoot into
+	// account) care about is 0.
+	atTime := anchorTime().Add(24 * time.Hour)
+
+	// When there is no lookback, and the maxNotAfter is exactly as far in the
+	// future as the numShards * shardWidth looks, every shard should be mapped to
+	// exactly one chunk.
 	tcu := crlUpdater{
-		numShards:         107,
-		lookbackPeriod:    7 * 24 * time.Hour,
-		lookforwardPeriod: 100 * 24 * time.Hour,
+		numShards:      24,
+		shardWidth:     1 * time.Hour,
+		sa:             &fakeSAC{maxNotAfter: atTime.Add(23*time.Hour + 30*time.Minute)},
+		lookbackPeriod: 0,
+	}
+	m, err := tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting aligned shards")
+	test.AssertEquals(t, len(m), 24)
+	for _, s := range m {
+		test.AssertEquals(t, len(s), 1)
 	}

-	// At just a moment past the anchor time, the zeroth shard should start at
-	// time 0, and end exactly one day later.
-	start, end, err := tcu.getShardBoundaries(anchorTime().Add(time.Minute), 0)
-	test.AssertNotError(t, err, "")
-	test.Assert(t, start.Equal(anchorTime()), "start time should be the anchor time")
-	test.AssertEquals(t, end, anchorTime().Add(24*time.Hour))
+	// When there is 1.5 hours each of lookback and maxNotAfter overshoot, then
+	// there should be four shards which each get two chunks mapped to them.
+	tcu = crlUpdater{
+		numShards:      24,
+		shardWidth:     1 * time.Hour,
+		sa:             &fakeSAC{maxNotAfter: atTime.Add(24*time.Hour + 90*time.Minute)},
+		lookbackPeriod: 90 * time.Minute,
+	}
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting overshoot shards")
+	test.AssertEquals(t, len(m), 24)
+	for i, s := range m {
+		if i == 0 || i == 1 || i == 22 || i == 23 {
+			test.AssertEquals(t, len(s), 2)
+		} else {
+			test.AssertEquals(t, len(s), 1)
+		}
+	}

-	// At the same moment, the 93rd shard should start 93 days later.
-	start, end, err = tcu.getShardBoundaries(anchorTime().Add(time.Minute), 93)
-	test.AssertNotError(t, err, "")
-	test.AssertEquals(t, start, anchorTime().Add(93*24*time.Hour))
-	test.AssertEquals(t, end, anchorTime().Add(94*24*time.Hour))
+	// When there is a massive amount of overshoot, many chunks should be mapped
+	// to each shard.
+	tcu = crlUpdater{
+		numShards:      24,
+		shardWidth:     1 * time.Hour,
+		sa:             &fakeSAC{maxNotAfter: atTime.Add(90 * 24 * time.Hour)},
+		lookbackPeriod: time.Minute,
+	}
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting overshoot shards")
+	test.AssertEquals(t, len(m), 24)
+	for i, s := range m {
+		if i == 23 {
+			test.AssertEquals(t, len(s), 91)
+		} else {
+			test.AssertEquals(t, len(s), 90)
+		}
+	}

-	// If we jump 100 days into the future, now the 0th shard should start 107
-	// days after the zero time.
-	start, end, err = tcu.getShardBoundaries(anchorTime().Add(100*24*time.Hour+time.Minute), 0)
-	test.AssertNotError(t, err, "")
-	test.AssertEquals(t, start, anchorTime().Add(107*24*time.Hour))
-	test.AssertEquals(t, end, anchorTime().Add(108*24*time.Hour))
-
-	// During day 100, the 93rd shard should still start at the same time (just
-	// over 7 days ago), because we haven't fully left it behind yet. The 92nd
-	// shard, however, should have jumped into the future.
-	start, end, err = tcu.getShardBoundaries(anchorTime().Add(100*24*time.Hour+time.Minute), 93)
-	test.AssertNotError(t, err, "")
-	test.AssertEquals(t, start, anchorTime().Add(93*24*time.Hour))
-	test.AssertEquals(t, end, anchorTime().Add(94*24*time.Hour))
-	start, end, err = tcu.getShardBoundaries(anchorTime().Add(100*24*time.Hour+time.Minute), 92)
-	test.AssertNotError(t, err, "")
-	test.AssertEquals(t, start, anchorTime().Add(199*24*time.Hour))
-	test.AssertEquals(t, end, anchorTime().Add(200*24*time.Hour))
-
-	// If we jump more than 290 years into the future, the math should break.
-	_, _, err = tcu.getShardBoundaries(anchorTime().Add(150*365*24*time.Hour).Add(150*365*24*time.Hour), 0)
-	test.AssertError(t, err, "")
+	// An arbitrarily-chosen chunk should always end up in the same shard no
+	// matter what the current time, lookback, and overshoot are, as long as the
+	// number of shards and the shard width remains constant.
+	tcu = crlUpdater{
+		numShards:      24,
+		shardWidth:     1 * time.Hour,
+		sa:             &fakeSAC{maxNotAfter: atTime.Add(24 * time.Hour)},
+		lookbackPeriod: time.Hour,
+	}
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting consistency shards")
+	test.AssertEquals(t, m[10][0].start, anchorTime().Add(34*time.Hour))
+	tcu.lookbackPeriod = 4 * time.Hour
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting consistency shards")
+	test.AssertEquals(t, m[10][0].start, anchorTime().Add(34*time.Hour))
+	tcu.sa = &fakeSAC{maxNotAfter: atTime.Add(300 * 24 * time.Hour)}
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting consistency shards")
+	test.AssertEquals(t, m[10][0].start, anchorTime().Add(34*time.Hour))
+	atTime = atTime.Add(6 * time.Hour)
+	m, err = tcu.getShardMappings(context.Background(), atTime)
+	test.AssertNotError(t, err, "getting consistency shards")
+	test.AssertEquals(t, m[10][0].start, anchorTime().Add(34*time.Hour))
+}
+
+func TestGetChunkAtTime(t *testing.T) {
+	// Our test updater divides time into chunks 1 day wide, numbered 0 through 9.
+	tcu := crlUpdater{
+		numShards:  10,
+		shardWidth: 24 * time.Hour,
+	}
+
+	// The chunk right at the anchor time should have index 0 and start at the
+	// anchor time. This also tests behavior when atTime is on a chunk boundary.
+	atTime := anchorTime()
+	c, err := tcu.getChunkAtTime(atTime)
+	test.AssertNotError(t, err, "getting chunk at anchor")
+	test.AssertEquals(t, c.idx, 0)
+	test.Assert(t, c.start.Equal(atTime), "getting chunk at anchor")
+	test.Assert(t, c.end.Equal(atTime.Add(24*time.Hour)), "getting chunk at anchor")
+
+	// The chunk a bit over a year in the future should have index 5.
+	atTime = anchorTime().Add(365 * 24 * time.Hour)
+	c, err = tcu.getChunkAtTime(atTime.Add(1 * time.Minute))
+	test.AssertNotError(t, err, "getting chunk")
+	test.AssertEquals(t, c.idx, 5)
+	test.Assert(t, c.start.Equal(atTime), "getting chunk")
+	test.Assert(t, c.end.Equal(atTime.Add(24*time.Hour)), "getting chunk")
+
+	// A chunk very far in the future should break the math. We have to add to
+	// the time twice, since the whole point of "very far in the future" is that
+	// it isn't representable by a time.Duration.
+	atTime = anchorTime().Add(200 * 365 * 24 * time.Hour).Add(200 * 365 * 24 * time.Hour)
+	c, err = tcu.getChunkAtTime(atTime)
+	test.AssertError(t, err, "getting far-future chunk")
 }
--- a/test/config-next/crl-updater.json
+++ b/test/config-next/crl-updater.json
@ -36,7 +36,8 @@
      "/hierarchy/intermediate-cert-ecdsa-a.pem"
    ],
    "numShards": 10,
-    "certificateLifetime": "2160h",
+    "shardWidth": "18h",
+    "lookbackPeriod": "24h",
    "updatePeriod": "6h",
    "updateOffset": "9120s",
    "maxParallelism": 10