189 lines
6.9 KiB
Go
189 lines
6.9 KiB
Go
// Package redis provides a Redis-based OCSP responder.
|
|
//
|
|
// This responder will first look for a response cached in Redis. If there is
|
|
// no response, or the response is too old, it will make a request to the RA
|
|
// for a freshly-signed response. If that succeeds, this responder will return
|
|
// the response to the user right away, while storing a copy to Redis in a
|
|
// separate goroutine.
|
|
//
|
|
// If the response was too old, but the request to the RA failed, this
|
|
// responder will serve the response anyhow. This allows for graceful
|
|
// degradation: it is better to serve a response that is 5 days old (outside
|
|
// the Baseline Requirements limits) than to serve no response at all.
|
|
// It's assumed that this will be wrapped in a responder.filterSource, which
|
|
// means that if a response is past its NextUpdate, we'll generate a 500.
|
|
package redis
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"time"
|
|
|
|
"github.com/jmhodges/clock"
|
|
"github.com/letsencrypt/boulder/core"
|
|
blog "github.com/letsencrypt/boulder/log"
|
|
"github.com/letsencrypt/boulder/ocsp/responder"
|
|
"github.com/letsencrypt/boulder/rocsp"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"golang.org/x/crypto/ocsp"
|
|
|
|
berrors "github.com/letsencrypt/boulder/errors"
|
|
)
|
|
|
|
type rocspClient interface {
|
|
GetResponse(ctx context.Context, serial string) ([]byte, error)
|
|
StoreResponse(ctx context.Context, resp *ocsp.Response) error
|
|
}
|
|
|
|
type redisSource struct {
|
|
client rocspClient
|
|
signer responder.Source
|
|
counter *prometheus.CounterVec
|
|
signAndSaveCounter *prometheus.CounterVec
|
|
cachedResponseAges prometheus.Histogram
|
|
clk clock.Clock
|
|
liveSigningPeriod time.Duration
|
|
// Error logs will be emitted at a rate of 1 in logSampleRate.
|
|
// If logSampleRate is 0, no logs will be emitted.
|
|
logSampleRate int
|
|
// Note: this logger is not currently used, as all audit log events are from
|
|
// the dbSource right now, but it should and will be used in the future.
|
|
log blog.Logger
|
|
}
|
|
|
|
// NewRedisSource returns a responder.Source which will look up OCSP responses in a
|
|
// Redis table.
|
|
func NewRedisSource(
|
|
client *rocsp.RWClient,
|
|
signer responder.Source,
|
|
liveSigningPeriod time.Duration,
|
|
clk clock.Clock,
|
|
stats prometheus.Registerer,
|
|
log blog.Logger,
|
|
logSampleRate int,
|
|
) (*redisSource, error) {
|
|
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ocsp_redis_responses",
|
|
Help: "Count of OCSP requests/responses by action taken by the redisSource",
|
|
}, []string{"result"})
|
|
stats.MustRegister(counter)
|
|
|
|
signAndSaveCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "ocsp_redis_sign_and_save",
|
|
Help: "Count of OCSP sign and save requests",
|
|
}, []string{"cause", "result"})
|
|
stats.MustRegister(signAndSaveCounter)
|
|
|
|
// Set up 12-hour-wide buckets, measured in seconds.
|
|
buckets := make([]float64, 14)
|
|
for i := range buckets {
|
|
buckets[i] = 43200 * float64(i)
|
|
}
|
|
|
|
cachedResponseAges := prometheus.NewHistogram(prometheus.HistogramOpts{
|
|
Name: "ocsp_redis_cached_response_ages",
|
|
Help: "How old are the cached OCSP responses when we successfully retrieve them.",
|
|
Buckets: buckets,
|
|
})
|
|
stats.MustRegister(cachedResponseAges)
|
|
|
|
var rocspReader rocspClient
|
|
if client != nil {
|
|
rocspReader = client
|
|
}
|
|
return &redisSource{
|
|
client: rocspReader,
|
|
signer: signer,
|
|
counter: counter,
|
|
signAndSaveCounter: signAndSaveCounter,
|
|
cachedResponseAges: cachedResponseAges,
|
|
liveSigningPeriod: liveSigningPeriod,
|
|
clk: clk,
|
|
log: log,
|
|
}, nil
|
|
}
|
|
|
|
// Response implements the responder.Source interface. It looks up the requested OCSP
|
|
// response in the redis cluster.
|
|
func (src *redisSource) Response(ctx context.Context, req *ocsp.Request) (*responder.Response, error) {
|
|
serialString := core.SerialToString(req.SerialNumber)
|
|
|
|
respBytes, err := src.client.GetResponse(ctx, serialString)
|
|
if err != nil {
|
|
if errors.Is(err, rocsp.ErrRedisNotFound) {
|
|
src.counter.WithLabelValues("not_found").Inc()
|
|
} else {
|
|
src.counter.WithLabelValues("lookup_error").Inc()
|
|
responder.SampledError(src.log, src.logSampleRate, "looking for cached response: %s", err)
|
|
// Proceed despite the error; when Redis is down we'd like to limp along with live signing
|
|
// rather than returning an error to the client.
|
|
}
|
|
return src.signAndSave(ctx, req, causeNotFound)
|
|
}
|
|
|
|
resp, err := ocsp.ParseResponse(respBytes, nil)
|
|
if err != nil {
|
|
src.counter.WithLabelValues("parse_error").Inc()
|
|
return nil, err
|
|
}
|
|
|
|
if src.isStale(resp) {
|
|
src.counter.WithLabelValues("stale").Inc()
|
|
freshResp, err := src.signAndSave(ctx, req, causeStale)
|
|
// Note: we could choose to return the stale response (up to its actual
|
|
// NextUpdate date), but if we pass the BR/root program limits, that
|
|
// becomes a compliance problem; returning an error is an availability
|
|
// problem and only becomes a compliance problem if we serve too many
|
|
// of them for too long (the exact conditions are not clearly defined
|
|
// by the BRs or root programs).
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return freshResp, nil
|
|
}
|
|
|
|
src.counter.WithLabelValues("success").Inc()
|
|
return &responder.Response{Response: resp, Raw: respBytes}, nil
|
|
}
|
|
|
|
func (src *redisSource) isStale(resp *ocsp.Response) bool {
|
|
age := src.clk.Since(resp.ThisUpdate)
|
|
src.cachedResponseAges.Observe(age.Seconds())
|
|
return age > src.liveSigningPeriod
|
|
}
|
|
|
|
type signAndSaveCause string
|
|
|
|
const (
|
|
causeStale signAndSaveCause = "stale"
|
|
causeNotFound signAndSaveCause = "not_found"
|
|
causeMismatch signAndSaveCause = "mismatch"
|
|
)
|
|
|
|
func (src *redisSource) signAndSave(ctx context.Context, req *ocsp.Request, cause signAndSaveCause) (*responder.Response, error) {
|
|
resp, err := src.signer.Response(ctx, req)
|
|
if errors.Is(err, responder.ErrNotFound) {
|
|
src.signAndSaveCounter.WithLabelValues(string(cause), "certificate_not_found").Inc()
|
|
return nil, responder.ErrNotFound
|
|
} else if errors.Is(err, berrors.UnknownSerial) {
|
|
// UnknownSerial is more interesting than NotFound, because it means we don't
|
|
// have a record in the `serials` table, which is kept longer-term than the
|
|
// `certificateStatus` table. That could mean someone is making up silly serial
|
|
// numbers in their requests to us, or it could mean there's site on the internet
|
|
// using a certificate that we don't have a record of in the `serials` table.
|
|
src.signAndSaveCounter.WithLabelValues(string(cause), "unknown_serial").Inc()
|
|
responder.SampledError(src.log, src.logSampleRate, "unknown serial: %s", core.SerialToString(req.SerialNumber))
|
|
return nil, responder.ErrNotFound
|
|
} else if err != nil {
|
|
src.signAndSaveCounter.WithLabelValues(string(cause), "signing_error").Inc()
|
|
return nil, err
|
|
}
|
|
src.signAndSaveCounter.WithLabelValues(string(cause), "signing_success").Inc()
|
|
go func() {
|
|
// We don't care about the error here, because if storing the response
|
|
// fails, we'll just generate a new one on the next request.
|
|
_ = src.client.StoreResponse(context.Background(), resp.Response)
|
|
}()
|
|
return resp, nil
|
|
}
|