Sign OCSP in parallel for better performance. (#2422)

Previously all OCSP signing and storage would be serial, which meant it was hard
to exercise the full capacity of our HSM. In this change, we run a limited
number of update and store requests in parallel.

This change also changes stats generation in generateOCSPResponses so we can
tell the difference between stats produced by new OCSP requests vs existing ones,
and adds a new stat that records how long the SQL query in findStaleOCSPResponses
takes.
This commit is contained in:
Jacob Hoffman-Andrews 2016-12-12 17:22:44 -08:00 committed by Roland Bracewell Shoemaker
parent 6ec93157f7
commit 26cf552ff9
5 changed files with 78 additions and 27 deletions

View File

@ -224,9 +224,10 @@ type OCSPUpdaterConfig struct {
MissingSCTBatchSize int MissingSCTBatchSize int
RevokedCertificateBatchSize int RevokedCertificateBatchSize int
OCSPMinTimeToExpiry ConfigDuration OCSPMinTimeToExpiry ConfigDuration
OCSPStaleMaxAge ConfigDuration OCSPStaleMaxAge ConfigDuration
OldestIssuedSCT ConfigDuration OldestIssuedSCT ConfigDuration
ParallelGenerateOCSPRequests int
AkamaiBaseURL string AkamaiBaseURL string
AkamaiClientToken string AkamaiClientToken string

View File

@ -59,6 +59,9 @@ type OCSPUpdater struct {
ocspStaleMaxAge time.Duration ocspStaleMaxAge time.Duration
// Used to calculate how far back missing SCT receipts should be looked for // Used to calculate how far back missing SCT receipts should be looked for
oldestIssuedSCT time.Duration oldestIssuedSCT time.Duration
// Maximum number of individual OCSP updates to attempt in parallel. Making
// these requests in parallel allows us to get higher total throughput.
parallelGenerateOCSPRequests int
// Logs we expect to have SCT receipts for. Missing logs will be resubmitted to. // Logs we expect to have SCT receipts for. Missing logs will be resubmitted to.
logs []*ctLog logs []*ctLog
@ -96,6 +99,10 @@ func newUpdater(
// Default to 30 days // Default to 30 days
config.OCSPStaleMaxAge = cmd.ConfigDuration{Duration: time.Hour * 24 * 30} config.OCSPStaleMaxAge = cmd.ConfigDuration{Duration: time.Hour * 24 * 30}
} }
if config.ParallelGenerateOCSPRequests == 0 {
// Default to 1
config.ParallelGenerateOCSPRequests = 1
}
logs := make([]*ctLog, len(logConfigs)) logs := make([]*ctLog, len(logConfigs))
for i, logConfig := range logConfigs { for i, logConfig := range logConfigs {
@ -107,17 +114,18 @@ func newUpdater(
} }
updater := OCSPUpdater{ updater := OCSPUpdater{
stats: stats, stats: stats,
clk: clk, clk: clk,
dbMap: dbMap, dbMap: dbMap,
cac: ca, cac: ca,
log: log, log: log,
sac: sac, sac: sac,
pubc: pub, pubc: pub,
logs: logs, logs: logs,
ocspMinTimeToExpiry: config.OCSPMinTimeToExpiry.Duration, ocspMinTimeToExpiry: config.OCSPMinTimeToExpiry.Duration,
ocspStaleMaxAge: config.OCSPStaleMaxAge.Duration, ocspStaleMaxAge: config.OCSPStaleMaxAge.Duration,
oldestIssuedSCT: config.OldestIssuedSCT.Duration, oldestIssuedSCT: config.OldestIssuedSCT.Duration,
parallelGenerateOCSPRequests: config.ParallelGenerateOCSPRequests,
} }
// Setup loops // Setup loops
@ -380,7 +388,7 @@ func (updater *OCSPUpdater) newCertificateTick(ctx context.Context, batchSize in
return err return err
} }
return updater.generateOCSPResponses(ctx, statuses) return updater.generateOCSPResponses(ctx, statuses, updater.stats.NewScope("newCertificateTick"))
} }
func (updater *OCSPUpdater) findRevokedCertificatesToUpdate(batchSize int) ([]core.CertificateStatus, error) { func (updater *OCSPUpdater) findRevokedCertificatesToUpdate(batchSize int) ([]core.CertificateStatus, error) {
@ -430,22 +438,46 @@ func (updater *OCSPUpdater) revokedCertificatesTick(ctx context.Context, batchSi
return nil return nil
} }
func (updater *OCSPUpdater) generateOCSPResponses(ctx context.Context, statuses []core.CertificateStatus) error { func (updater *OCSPUpdater) generateOCSPResponses(ctx context.Context, statuses []core.CertificateStatus, stats metrics.Scope) error {
for _, status := range statuses { // Use the semaphore pattern from
// https://github.com/golang/go/wiki/BoundingResourceUse to send a number of
// GenerateOCSP / storeResponse requests in parallel, while limiting the total number of
// outstanding requests. The number of outstanding requests equals the
// capacity of the channel.
sem := make(chan int, updater.parallelGenerateOCSPRequests)
wait := func() {
sem <- 1 // Block until there's capacity.
}
done := func() {
<-sem // Indicate there's more capacity.
}
work := func(status core.CertificateStatus) {
defer done()
meta, err := updater.generateResponse(ctx, status) meta, err := updater.generateResponse(ctx, status)
if err != nil { if err != nil {
updater.log.AuditErr(fmt.Sprintf("Failed to generate OCSP response: %s", err)) updater.log.AuditErr(fmt.Sprintf("Failed to generate OCSP response: %s", err))
updater.stats.Inc("Errors.ResponseGeneration", 1) stats.Inc("Errors.ResponseGeneration", 1)
return err return
} }
updater.stats.Inc("GeneratedResponses", 1) updater.stats.Inc("GeneratedResponses", 1)
err = updater.storeResponse(meta) err = updater.storeResponse(meta)
if err != nil { if err != nil {
updater.log.AuditErr(fmt.Sprintf("Failed to store OCSP response: %s", err)) updater.log.AuditErr(fmt.Sprintf("Failed to store OCSP response: %s", err))
updater.stats.Inc("Errors.StoreResponse", 1) stats.Inc("Errors.StoreResponse", 1)
continue return
} }
updater.stats.Inc("StoredResponses", 1) stats.Inc("StoredResponses", 1)
}
for _, status := range statuses {
wait()
go work(status)
}
// Block until the channel reaches its full capacity again, indicating each
// goroutine has completed.
for i := 0; i < updater.parallelGenerateOCSPRequests; i++ {
wait()
} }
return nil return nil
} }
@ -453,15 +485,16 @@ func (updater *OCSPUpdater) generateOCSPResponses(ctx context.Context, statuses
// oldOCSPResponsesTick looks for certificates with stale OCSP responses and // oldOCSPResponsesTick looks for certificates with stale OCSP responses and
// generates/stores new ones // generates/stores new ones
func (updater *OCSPUpdater) oldOCSPResponsesTick(ctx context.Context, batchSize int) error { func (updater *OCSPUpdater) oldOCSPResponsesTick(ctx context.Context, batchSize int) error {
now := time.Now() tickStart := time.Now()
statuses, err := updater.findStaleOCSPResponses(now.Add(-updater.ocspMinTimeToExpiry), batchSize) statuses, err := updater.findStaleOCSPResponses(tickStart.Add(-updater.ocspMinTimeToExpiry), batchSize)
if err != nil { if err != nil {
updater.stats.Inc("Errors.FindStaleResponses", 1) updater.stats.Inc("Errors.FindStaleResponses", 1)
updater.log.AuditErr(fmt.Sprintf("Failed to find stale OCSP responses: %s", err)) updater.log.AuditErr(fmt.Sprintf("Failed to find stale OCSP responses: %s", err))
return err return err
} }
updater.stats.TimingDuration("oldOCSPResponsesTick.QueryTime", time.Since(tickStart))
return updater.generateOCSPResponses(ctx, statuses) return updater.generateOCSPResponses(ctx, statuses, updater.stats.NewScope("oldOCSPResponsesTick"))
} }
func (updater *OCSPUpdater) getSerialsIssuedSince(since time.Time, batchSize int) ([]string, error) { func (updater *OCSPUpdater) getSerialsIssuedSince(since time.Time, batchSize int) ([]string, error) {

View File

@ -30,7 +30,9 @@ import (
var ctx = context.Background() var ctx = context.Background()
type mockCA struct{} type mockCA struct {
sleepTime time.Duration
}
func (ca *mockCA) IssueCertificate(_ context.Context, csr x509.CertificateRequest, regID int64) (core.Certificate, error) { func (ca *mockCA) IssueCertificate(_ context.Context, csr x509.CertificateRequest, regID int64) (core.Certificate, error) {
return core.Certificate{}, nil return core.Certificate{}, nil
@ -38,6 +40,7 @@ func (ca *mockCA) IssueCertificate(_ context.Context, csr x509.CertificateReques
func (ca *mockCA) GenerateOCSP(_ context.Context, xferObj core.OCSPSigningRequest) (ocsp []byte, err error) { func (ca *mockCA) GenerateOCSP(_ context.Context, xferObj core.OCSPSigningRequest) (ocsp []byte, err error) {
ocsp = []byte{1, 2, 3} ocsp = []byte{1, 2, 3}
time.Sleep(ca.sleepTime)
return return
} }
@ -220,8 +223,20 @@ func TestGenerateOCSPResponses(t *testing.T) {
test.AssertNotError(t, err, "Couldn't find stale responses") test.AssertNotError(t, err, "Couldn't find stale responses")
test.AssertEquals(t, len(certs), 2) test.AssertEquals(t, len(certs), 2)
err = updater.generateOCSPResponses(ctx, certs) // Hacky test of parallelism: Make each request to the CA take 1 second, and
// produce 2 requests to the CA. If the pair of requests complete in about a
// second, they were made in parallel.
// Note that this test also tests the basic functionality of
// generateOCSPResponses.
start := time.Now()
updater.cac = &mockCA{time.Second}
updater.parallelGenerateOCSPRequests = 10
err = updater.generateOCSPResponses(ctx, certs, metrics.NewNoopScope())
test.AssertNotError(t, err, "Couldn't generate OCSP responses") test.AssertNotError(t, err, "Couldn't generate OCSP responses")
elapsed := time.Since(start)
if elapsed > 1500*time.Millisecond {
t.Errorf("generateOCSPResponses took too long, expected it to make calls in parallel.")
}
certs, err = updater.findStaleOCSPResponses(earliest, 10) certs, err = updater.findStaleOCSPResponses(earliest, 10)
test.AssertNotError(t, err, "Failed to find stale responses") test.AssertNotError(t, err, "Failed to find stale responses")

View File

@ -9,6 +9,7 @@
"newCertificateBatchSize": 1000, "newCertificateBatchSize": 1000,
"oldOCSPBatchSize": 5000, "oldOCSPBatchSize": 5000,
"missingSCTBatchSize": 5000, "missingSCTBatchSize": 5000,
"parallelGenerateOCSPRequests": 10,
"revokedCertificateBatchSize": 1000, "revokedCertificateBatchSize": 1000,
"ocspMinTimeToExpiry": "72h", "ocspMinTimeToExpiry": "72h",
"ocspStaleMaxAge": "720h", "ocspStaleMaxAge": "720h",

View File

@ -8,6 +8,7 @@
"revokedCertificateWindow": "1s", "revokedCertificateWindow": "1s",
"newCertificateBatchSize": 1000, "newCertificateBatchSize": 1000,
"oldOCSPBatchSize": 5000, "oldOCSPBatchSize": 5000,
"parallelGenerateOCSPRequests": 10,
"missingSCTBatchSize": 5000, "missingSCTBatchSize": 5000,
"revokedCertificateBatchSize": 1000, "revokedCertificateBatchSize": 1000,
"ocspMinTimeToExpiry": "72h", "ocspMinTimeToExpiry": "72h",