rocsp: improve stats (#6257)
For multiSource, split out checkSecondary's metrics into their own counter. Treat NotFound as a separate error type (so we can more clearly distinguish the half-hourly pattern of fetches for expired certificates). In redisSource, add a histogram for the ages of responses fetched from cache (regardless of whether they are served or not). This parallels ocsp_respond_ages in ocsp/responder.go, but may show ages beyond the compliance limit, even under normal operations, because it is checked before signAndServe is called.
This commit is contained in:
parent
745eef159b
commit
e7bf6383d8
|
|
@ -13,11 +13,12 @@ import (
|
|||
)
|
||||
|
||||
type multiSource struct {
|
||||
primary Source
|
||||
secondary Source
|
||||
expectedFreshness time.Duration
|
||||
counter *prometheus.CounterVec
|
||||
log blog.Logger
|
||||
primary Source
|
||||
secondary Source
|
||||
expectedFreshness time.Duration
|
||||
counter *prometheus.CounterVec
|
||||
checkSecondaryCounter *prometheus.CounterVec
|
||||
log blog.Logger
|
||||
}
|
||||
|
||||
// NewMultiSource creates a source that combines a primary and a secondary source.
|
||||
|
|
@ -37,18 +38,26 @@ func NewMultiSource(primary, secondary Source, expectedFreshness time.Duration,
|
|||
if primary == nil || secondary == nil {
|
||||
return nil, errors.New("must provide both primary and secondary sources")
|
||||
}
|
||||
|
||||
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "ocsp_multiplex_responses",
|
||||
Help: "Count of OCSP requests/responses by action taken by the multiSource",
|
||||
}, []string{"result"})
|
||||
stats.MustRegister(counter)
|
||||
|
||||
checkSecondaryCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "ocsp_multiplex_check_secondary",
|
||||
Help: "Count of OCSP requests/responses by action taken by the multiSource",
|
||||
}, []string{"result"})
|
||||
stats.MustRegister(checkSecondaryCounter)
|
||||
|
||||
return &multiSource{
|
||||
primary: primary,
|
||||
secondary: secondary,
|
||||
expectedFreshness: expectedFreshness,
|
||||
counter: counter,
|
||||
log: log,
|
||||
primary: primary,
|
||||
secondary: secondary,
|
||||
expectedFreshness: expectedFreshness,
|
||||
counter: counter,
|
||||
checkSecondaryCounter: checkSecondaryCounter,
|
||||
log: log,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +100,11 @@ func (src *multiSource) Response(ctx context.Context, req *ocsp.Request) (*Respo
|
|||
// check the secondary's status against the (more reliable) primary's
|
||||
// status.
|
||||
if r.err != nil {
|
||||
src.counter.WithLabelValues("primary_error").Inc()
|
||||
if errors.Is(r.err, ErrNotFound) {
|
||||
src.counter.WithLabelValues("primary_not_found").Inc()
|
||||
} else {
|
||||
src.counter.WithLabelValues("primary_error").Inc()
|
||||
}
|
||||
return nil, r.err
|
||||
}
|
||||
primaryResponse = r.resp
|
||||
|
|
@ -156,14 +169,14 @@ func (src *multiSource) checkSecondary(primaryResponse *Response, secondaryChan
|
|||
if secondaryResult.err != nil {
|
||||
if errors.Is(secondaryResult.err, rocsp.ErrRedisNotFound) {
|
||||
// This case will happen for several hours after first issuance.
|
||||
src.counter.WithLabelValues("primary_good_secondary_not_found").Inc()
|
||||
src.checkSecondaryCounter.WithLabelValues("not_found").Inc()
|
||||
} else {
|
||||
src.counter.WithLabelValues("primary_good_secondary_error").Inc()
|
||||
src.checkSecondaryCounter.WithLabelValues("error").Inc()
|
||||
}
|
||||
}
|
||||
src.counter.WithLabelValues("primary_good_secondary_good").Inc()
|
||||
src.checkSecondaryCounter.WithLabelValues("good").Inc()
|
||||
default:
|
||||
src.counter.WithLabelValues("primary_good_secondary_slow").Inc()
|
||||
src.checkSecondaryCounter.WithLabelValues("slow").Inc()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,11 +34,12 @@ type rocspClient interface {
|
|||
}
|
||||
|
||||
type redisSource struct {
|
||||
client rocspClient
|
||||
signer responder.Source
|
||||
counter *prometheus.CounterVec
|
||||
clk clock.Clock
|
||||
liveSigningPeriod time.Duration
|
||||
client rocspClient
|
||||
signer responder.Source
|
||||
counter *prometheus.CounterVec
|
||||
cachedResponseAges prometheus.Histogram
|
||||
clk clock.Clock
|
||||
liveSigningPeriod time.Duration
|
||||
// Note: this logger is not currently used, as all audit log events are from
|
||||
// the dbSource right now, but it should and will be used in the future.
|
||||
log blog.Logger
|
||||
|
|
@ -60,17 +61,31 @@ func NewRedisSource(
|
|||
}, []string{"result"})
|
||||
stats.MustRegister(counter)
|
||||
|
||||
// Set up 12-hour-wide buckets, measured in seconds.
|
||||
buckets := make([]float64, 14)
|
||||
for i := range buckets {
|
||||
buckets[i] = 43200 * float64(i)
|
||||
}
|
||||
|
||||
cachedResponseAges := prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "ocsp_redis_cached_response_ages",
|
||||
Help: "How old are the cached OCSP responses when we successfully retrieve them.",
|
||||
Buckets: buckets,
|
||||
})
|
||||
stats.MustRegister(cachedResponseAges)
|
||||
|
||||
var rocspReader rocspClient
|
||||
if client != nil {
|
||||
rocspReader = client
|
||||
}
|
||||
return &redisSource{
|
||||
client: rocspReader,
|
||||
signer: signer,
|
||||
counter: counter,
|
||||
liveSigningPeriod: liveSigningPeriod,
|
||||
clk: clk,
|
||||
log: log,
|
||||
client: rocspReader,
|
||||
signer: signer,
|
||||
counter: counter,
|
||||
cachedResponseAges: cachedResponseAges,
|
||||
liveSigningPeriod: liveSigningPeriod,
|
||||
clk: clk,
|
||||
log: log,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
@ -108,7 +123,9 @@ func (src *redisSource) Response(ctx context.Context, req *ocsp.Request) (*respo
|
|||
}
|
||||
|
||||
func (src *redisSource) isStale(resp *ocsp.Response) bool {
|
||||
return src.clk.Since(resp.ThisUpdate) > src.liveSigningPeriod
|
||||
age := src.clk.Since(resp.ThisUpdate)
|
||||
src.cachedResponseAges.Observe(age.Seconds())
|
||||
return age > src.liveSigningPeriod
|
||||
}
|
||||
|
||||
func (src *redisSource) signAndSave(ctx context.Context, req *ocsp.Request, cause string) (*responder.Response, error) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue