diff --git a/crl/crl.go b/crl/crl.go index 216e6335a..95c61b59e 100644 --- a/crl/crl.go +++ b/crl/crl.go @@ -2,7 +2,6 @@ package crl import ( "encoding/json" - "fmt" "math/big" "time" @@ -31,7 +30,7 @@ func Number(thisUpdate time.Time) number { type id string // Id is a utility function which constructs a new `id`. -func Id(issuerID issuance.IssuerNameID, crlNumber number, shardIdx int) (id, error) { +func Id(issuerID issuance.IssuerNameID, crlNumber number, shardIdx int) id { type info struct { IssuerID issuance.IssuerNameID `json:"issuerID"` CRLNumber number `json:"crlNumber"` @@ -39,7 +38,7 @@ func Id(issuerID issuance.IssuerNameID, crlNumber number, shardIdx int) (id, err } jsonBytes, err := json.Marshal(info{issuerID, crlNumber, shardIdx}) if err != nil { - return "", fmt.Errorf("computing CRL Id: %w", err) + panic(err) } - return id(jsonBytes), nil + return id(jsonBytes) } diff --git a/crl/crl_test.go b/crl/crl_test.go index 1c32fc635..8a745cf31 100644 --- a/crl/crl_test.go +++ b/crl/crl_test.go @@ -11,8 +11,7 @@ import ( func TestId(t *testing.T) { thisUpdate := time.Now() - out, err := Id(1337, Number(thisUpdate), 1) - test.AssertNotError(t, err, "Failed to create CRLId") + out := Id(1337, Number(thisUpdate), 1) expectCRLId := fmt.Sprintf("{\"issuerID\":1337,\"crlNumber\":%d,\"shardIdx\":1}", big.NewInt(thisUpdate.UnixNano())) test.AssertEquals(t, string(out), expectCRLId) } diff --git a/crl/storer/storer.go b/crl/storer/storer.go index 3083bf2e2..dfc497daa 100644 --- a/crl/storer/storer.go +++ b/crl/storer/storer.go @@ -133,10 +133,7 @@ func (cs *crlStorer) UploadCRL(stream cspb.CRLStorer_UploadCRLServer) error { return errors.New("got no metadata message") } - crlId, err := crl.Id(issuer.NameID(), crlNumber, int(shardIdx)) - if err != nil { - return err - } + crlId := crl.Id(issuer.NameID(), crlNumber, int(shardIdx)) cs.sizeHistogram.WithLabelValues(issuer.Subject.CommonName).Observe(float64(len(crlBytes))) diff --git a/crl/updater/updater.go b/crl/updater/updater.go index cb1e3a1ca..062a4875d 100644 --- a/crl/updater/updater.go +++ b/crl/updater/updater.go @@ -3,9 +3,9 @@ package updater import ( "context" "crypto/sha256" - "errors" "fmt" "io" + "sort" "strings" "time" @@ -166,7 +166,9 @@ func (cu *crlUpdater) Run(ctx context.Context) error { if err != nil { // We only log, rather than return, so that the long-lived process can // continue and try again at the next tick. - cu.log.AuditErrf("tick at time %s failed: %s", atTime, err) + cu.log.AuditErrf( + "Generating CRLs failed: number=[%d] err=[%s]", + crl.Number(atTime), err) } case <-ctx.Done(): ticker.Stop() @@ -181,31 +183,31 @@ func (cu *crlUpdater) Run(ctx context.Context) error { // error at the end. func (cu *crlUpdater) Tick(ctx context.Context, atTime time.Time) (err error) { defer func() { - // Each return statement in this function assigns its returned value to the - // named return variable `err`. This deferred function closes over that - // variable, and so can reference and modify it. + // This func closes over the named return value `err`, so can reference it. result := "success" if err != nil { result = "failed" - err = fmt.Errorf("tick %s: %w", atTime.Format(time.RFC3339Nano), err) } cu.tickHistogram.WithLabelValues("all", result).Observe(cu.clk.Since(atTime).Seconds()) }() cu.log.Debugf("Ticking at time %s", atTime) - var errStrs []string + var errIssuers []string for id := range cu.issuers { // For now, process each issuer serially. This keeps the worker pool system // simple, and processing all of the issuers in parallel likely wouldn't // meaningfully speed up the overall process. err := cu.tickIssuer(ctx, atTime, id) if err != nil { - errStrs = append(errStrs, err.Error()) + cu.log.AuditErrf( + "Generating CRLs for issuer failed: number=[%d] issuer=[%s] err=[%s]", + crl.Number(atTime), cu.issuers[id].Subject.CommonName, err) + errIssuers = append(errIssuers, cu.issuers[id].Subject.CommonName) } } - if len(errStrs) != 0 { - return errors.New(strings.Join(errStrs, "; ")) + if len(errIssuers) != 0 { + return fmt.Errorf("%d issuers failed: %v", len(errIssuers), strings.Join(errIssuers, ", ")) } return nil } @@ -217,13 +219,10 @@ func (cu *crlUpdater) Tick(ctx context.Context, atTime time.Time) (err error) { func (cu *crlUpdater) tickIssuer(ctx context.Context, atTime time.Time, issuerNameID issuance.IssuerNameID) (err error) { start := cu.clk.Now() defer func() { - // Each return statement in this function assigns its returned value to the - // named return variable `err`. This deferred function closes over that - // variable, and so can reference and modify it. + // This func closes over the named return value `err`, so can reference it. result := "success" if err != nil { result = "failed" - err = fmt.Errorf("%s: %w", cu.issuers[issuerNameID].Subject.CommonName, err) } cu.tickHistogram.WithLabelValues(cu.issuers[issuerNameID].Subject.CommonName+" (Overall)", result).Observe(cu.clk.Since(start).Seconds()) }() @@ -259,16 +258,20 @@ func (cu *crlUpdater) tickIssuer(ctx context.Context, atTime time.Time, issuerNa } close(shardIdxs) - var errStrs []string + var errShards []int for i := 0; i < cu.numShards; i++ { res := <-shardResults if res.err != nil { - errStrs = append(errStrs, res.err.Error()) + cu.log.AuditErrf( + "Generating CRL failed: id=[%s] err=[%s]", + crl.Id(issuerNameID, crl.Number(atTime), res.shardIdx), err) + errShards = append(errShards, res.shardIdx) } } - if len(errStrs) != 0 { - return errors.New(strings.Join(errStrs, "; ")) + if len(errShards) != 0 { + sort.Ints(errShards) + return fmt.Errorf("%d shards failed: %v", len(errShards), errShards) } return nil } @@ -281,21 +284,12 @@ func (cu *crlUpdater) tickShard(ctx context.Context, atTime time.Time, issuerNam ctx, cancel := context.WithCancel(ctx) defer cancel() - crlId, err := crl.Id(issuerNameID, crl.Number(atTime), shardIdx) - if err != nil { - return err - } - start := cu.clk.Now() defer func() { - // Each return statement in this function assigns its returned value to the - // named return variable `err`. This deferred function closes over that - // variable, and so can reference and modify it. + // This func closes over the named return value `err`, so can reference it. result := "success" if err != nil { - cu.log.AuditErrf("Generating CRL failed: id=[%s] err=[%s]", crlId, err) result = "failed" - err = fmt.Errorf("%d: %w", shardIdx, err) } cu.tickHistogram.WithLabelValues(cu.issuers[issuerNameID].Subject.CommonName, result).Observe(cu.clk.Since(start).Seconds()) cu.updatedCounter.WithLabelValues(cu.issuers[issuerNameID].Subject.CommonName, result).Inc() @@ -406,7 +400,9 @@ func (cu *crlUpdater) tickShard(ctx context.Context, atTime time.Time, issuerNam return fmt.Errorf("closing CRLStorer upload stream: %w", err) } - cu.log.Infof("Generated CRL: id=[%s] size=[%d] hash=[%x]", crlId, crlLen, crlHash.Sum(nil)) + cu.log.Infof( + "Generated CRL shard: id=[%s] size=[%d] hash=[%x]", + crl.Id(issuerNameID, crl.Number(atTime), shardIdx), crlLen, crlHash.Sum(nil)) return nil } diff --git a/crl/updater/updater_test.go b/crl/updater/updater_test.go index 65b9f4f07..9c882ed51 100644 --- a/crl/updater/updater_test.go +++ b/crl/updater/updater_test.go @@ -1,12 +1,282 @@ package updater import ( + "context" + "errors" + "io" "testing" "time" + "google.golang.org/grpc" + "google.golang.org/protobuf/types/known/emptypb" + + "github.com/jmhodges/clock" + capb "github.com/letsencrypt/boulder/ca/proto" + corepb "github.com/letsencrypt/boulder/core/proto" + cspb "github.com/letsencrypt/boulder/crl/storer/proto" + "github.com/letsencrypt/boulder/issuance" + blog "github.com/letsencrypt/boulder/log" + "github.com/letsencrypt/boulder/metrics" + "github.com/letsencrypt/boulder/mocks" + sapb "github.com/letsencrypt/boulder/sa/proto" "github.com/letsencrypt/boulder/test" + "github.com/prometheus/client_golang/prometheus" ) +// fakeGRCC is a fake sapb.StorageAuthority_GetRevokedCertsClient which can be +// populated with some CRL entries or an error for use as the return value of +// a faked GetRevokedCerts call. +type fakeGRCC struct { + grpc.ClientStream + entries []*corepb.CRLEntry + nextIdx int + err error +} + +func (f *fakeGRCC) Recv() (*corepb.CRLEntry, error) { + if f.err != nil { + return nil, f.err + } + if f.nextIdx < len(f.entries) { + res := f.entries[f.nextIdx] + f.nextIdx++ + return res, nil + } + return nil, io.EOF +} + +// fakeSAC is a fake sapb.StorageAuthorityClient which can be populated with a +// fakeGRCC to be used as the return value for calls to GetRevokedCerts. +type fakeSAC struct { + mocks.StorageAuthority + grcc fakeGRCC +} + +func (f *fakeSAC) GetRevokedCerts(ctx context.Context, _ *sapb.GetRevokedCertsRequest, _ ...grpc.CallOption) (sapb.StorageAuthority_GetRevokedCertsClient, error) { + return &f.grcc, nil +} + +// fakeGCC is a fake capb.CRLGenerator_GenerateCRLClient which can be +// populated with some CRL entries or an error for use as the return value of +// a faked GenerateCRL call. +type fakeGCC struct { + grpc.ClientStream + chunks [][]byte + nextIdx int + sendErr error + recvErr error +} + +func (f *fakeGCC) Send(*capb.GenerateCRLRequest) error { + return f.sendErr +} + +func (f *fakeGCC) CloseSend() error { + return nil +} + +func (f *fakeGCC) Recv() (*capb.GenerateCRLResponse, error) { + if f.recvErr != nil { + return nil, f.recvErr + } + if f.nextIdx < len(f.chunks) { + res := f.chunks[f.nextIdx] + f.nextIdx++ + return &capb.GenerateCRLResponse{Chunk: res}, nil + } + return nil, io.EOF +} + +// fakeCGC is a fake capb.CRLGeneratorClient which can be populated with a +// fakeGCC to be used as the return value for calls to GenerateCRL. +type fakeCGC struct { + gcc fakeGCC +} + +func (f *fakeCGC) GenerateCRL(ctx context.Context, opts ...grpc.CallOption) (capb.CRLGenerator_GenerateCRLClient, error) { + return &f.gcc, nil +} + +// fakeUCC is a fake cspb.CRLStorer_UploadCRLClient which can be populated with +// an error for use as the return value of a faked UploadCRL call. +type fakeUCC struct { + grpc.ClientStream + sendErr error + recvErr error +} + +func (f *fakeUCC) Send(*cspb.UploadCRLRequest) error { + return f.sendErr +} + +func (f *fakeUCC) CloseAndRecv() (*emptypb.Empty, error) { + if f.recvErr != nil { + return nil, f.recvErr + } + return &emptypb.Empty{}, nil +} + +// fakeCSC is a fake cspb.CRLStorerClient which can be populated with a +// fakeUCC for use as the return value for calls to UploadCRL. +type fakeCSC struct { + ucc fakeUCC +} + +func (f *fakeCSC) UploadCRL(ctx context.Context, opts ...grpc.CallOption) (cspb.CRLStorer_UploadCRLClient, error) { + return &f.ucc, nil +} + +func TestTickShard(t *testing.T) { + e1, err := issuance.LoadCertificate("../../test/hierarchy/int-e1.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + r3, err := issuance.LoadCertificate("../../test/hierarchy/int-r3.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + + sentinelErr := errors.New("oops") + + cu, err := NewUpdater( + []*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1, + &fakeSAC{grcc: fakeGRCC{}}, + &fakeCGC{gcc: fakeGCC{}}, + &fakeCSC{ucc: fakeUCC{}}, + metrics.NoopRegisterer, blog.NewMock(), clock.NewFake(), + ) + test.AssertNotError(t, err, "building test crlUpdater") + + // Ensure that getting no results from the SA still works. + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertNotError(t, err, "empty CRL") + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "success", + }, 1) + cu.updatedCounter.Reset() + + // Errors closing the Storer upload stream should bubble up. + cu.cs = &fakeCSC{ucc: fakeUCC{recvErr: sentinelErr}} + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertError(t, err, "storer error") + test.AssertContains(t, err.Error(), "closing CRLStorer upload stream") + test.AssertErrorIs(t, err, sentinelErr) + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 1) + cu.updatedCounter.Reset() + + // Errors reading from the CA should bubble up sooner. + cu.ca = &fakeCGC{gcc: fakeGCC{recvErr: sentinelErr}} + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertError(t, err, "CA error") + test.AssertContains(t, err.Error(), "receiving CRL bytes") + test.AssertErrorIs(t, err, sentinelErr) + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 1) + cu.updatedCounter.Reset() + + // Errors sending to the Storer should bubble up sooner. + cu.cs = &fakeCSC{ucc: fakeUCC{sendErr: sentinelErr}} + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertError(t, err, "storer error") + test.AssertContains(t, err.Error(), "sending CRLStorer metadata") + test.AssertErrorIs(t, err, sentinelErr) + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 1) + cu.updatedCounter.Reset() + + // Errors reading from the SA should bubble up sooner. + cu.sa = &fakeSAC{grcc: fakeGRCC{err: sentinelErr}} + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertError(t, err, "database error") + test.AssertContains(t, err.Error(), "retrieving entry from SA") + test.AssertErrorIs(t, err, sentinelErr) + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 1) + cu.updatedCounter.Reset() + + // Errors sending to the CA should bubble up soonest. + cu.ca = &fakeCGC{gcc: fakeGCC{sendErr: sentinelErr}} + err = cu.tickShard(context.Background(), cu.clk.Now(), e1.NameID(), 0) + test.AssertError(t, err, "CA error") + test.AssertContains(t, err.Error(), "sending CA metadata") + test.AssertErrorIs(t, err, sentinelErr) + test.AssertMetricWithLabelsEquals(t, cu.updatedCounter, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 1) + cu.updatedCounter.Reset() +} + +func TestTickIssuer(t *testing.T) { + e1, err := issuance.LoadCertificate("../../test/hierarchy/int-e1.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + r3, err := issuance.LoadCertificate("../../test/hierarchy/int-r3.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + + mockLog := blog.NewMock() + cu, err := NewUpdater( + []*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1, + &fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}}, + &fakeCGC{gcc: fakeGCC{}}, + &fakeCSC{ucc: fakeUCC{}}, + metrics.NoopRegisterer, mockLog, clock.NewFake(), + ) + test.AssertNotError(t, err, "building test crlUpdater") + + // An error that affects all shards should have every shard reflected in the + // combined error message. + err = cu.tickIssuer(context.Background(), cu.clk.Now(), e1.NameID()) + test.AssertError(t, err, "database error") + test.AssertContains(t, err.Error(), "2 shards failed") + test.AssertContains(t, err.Error(), "[0 1]") + test.AssertEquals(t, len(mockLog.GetAllMatching("Generating CRL failed:")), 2) + test.AssertMetricWithLabelsEquals(t, cu.tickHistogram, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1", "result": "failed", + }, 2) + test.AssertMetricWithLabelsEquals(t, cu.tickHistogram, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1 (Overall)", "result": "failed", + }, 1) + cu.tickHistogram.Reset() +} + +func TestTick(t *testing.T) { + e1, err := issuance.LoadCertificate("../../test/hierarchy/int-e1.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + r3, err := issuance.LoadCertificate("../../test/hierarchy/int-r3.cert.pem") + test.AssertNotError(t, err, "loading test issuer") + + mockLog := blog.NewMock() + cu, err := NewUpdater( + []*issuance.Certificate{e1, r3}, 2, 10*24*time.Hour, 24*time.Hour, 0, 1, + &fakeSAC{grcc: fakeGRCC{err: errors.New("db no worky")}}, + &fakeCGC{gcc: fakeGCC{}}, + &fakeCSC{ucc: fakeUCC{}}, + metrics.NoopRegisterer, mockLog, clock.NewFake(), + ) + test.AssertNotError(t, err, "building test crlUpdater") + + // An error that affects all issuers should have every issuer reflected in the + // combined error message. + now := cu.clk.Now() + err = cu.Tick(context.Background(), now) + test.AssertError(t, err, "database error") + test.AssertContains(t, err.Error(), "2 issuers failed") + test.AssertContains(t, err.Error(), "(TEST) Elegant Elephant E1") + test.AssertContains(t, err.Error(), "(TEST) Radical Rhino R3") + test.AssertEquals(t, len(mockLog.GetAllMatching("Generating CRL failed:")), 4) + test.AssertEquals(t, len(mockLog.GetAllMatching("Generating CRLs for issuer failed:")), 2) + test.AssertMetricWithLabelsEquals(t, cu.tickHistogram, prometheus.Labels{ + "issuer": "(TEST) Elegant Elephant E1 (Overall)", "result": "failed", + }, 1) + test.AssertMetricWithLabelsEquals(t, cu.tickHistogram, prometheus.Labels{ + "issuer": "(TEST) Radical Rhino R3 (Overall)", "result": "failed", + }, 1) + test.AssertMetricWithLabelsEquals(t, cu.tickHistogram, prometheus.Labels{ + "issuer": "all", "result": "failed", + }, 1) + cu.tickHistogram.Reset() +} + func TestGetWindowForShard(t *testing.T) { // Our test updater divides a 107-day window into 107 shards, resulting in a // shard width of 24 hours.