rocsp: improve stats (#6257)
For multiSource, split out checkSecondary's metrics into their own counter. Treat NotFound as a separate error type (so we can more clearly distinguish the half-hourly pattern of fetches for expired certificates). In redisSource, add a histogram for the ages of responses fetched from cache (regardless of whether they are served or not). This parallels ocsp_respond_ages in ocsp/responder.go, but may show ages beyond the compliance limit, even under normal operations, because it is checked before signAndServe is called.
This commit is contained in:
parent
745eef159b
commit
e7bf6383d8
|
|
@ -13,11 +13,12 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type multiSource struct {
|
type multiSource struct {
|
||||||
primary Source
|
primary Source
|
||||||
secondary Source
|
secondary Source
|
||||||
expectedFreshness time.Duration
|
expectedFreshness time.Duration
|
||||||
counter *prometheus.CounterVec
|
counter *prometheus.CounterVec
|
||||||
log blog.Logger
|
checkSecondaryCounter *prometheus.CounterVec
|
||||||
|
log blog.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMultiSource creates a source that combines a primary and a secondary source.
|
// NewMultiSource creates a source that combines a primary and a secondary source.
|
||||||
|
|
@ -37,18 +38,26 @@ func NewMultiSource(primary, secondary Source, expectedFreshness time.Duration,
|
||||||
if primary == nil || secondary == nil {
|
if primary == nil || secondary == nil {
|
||||||
return nil, errors.New("must provide both primary and secondary sources")
|
return nil, errors.New("must provide both primary and secondary sources")
|
||||||
}
|
}
|
||||||
|
|
||||||
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
counter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
Name: "ocsp_multiplex_responses",
|
Name: "ocsp_multiplex_responses",
|
||||||
Help: "Count of OCSP requests/responses by action taken by the multiSource",
|
Help: "Count of OCSP requests/responses by action taken by the multiSource",
|
||||||
}, []string{"result"})
|
}, []string{"result"})
|
||||||
stats.MustRegister(counter)
|
stats.MustRegister(counter)
|
||||||
|
|
||||||
|
checkSecondaryCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Name: "ocsp_multiplex_check_secondary",
|
||||||
|
Help: "Count of OCSP requests/responses by action taken by the multiSource",
|
||||||
|
}, []string{"result"})
|
||||||
|
stats.MustRegister(checkSecondaryCounter)
|
||||||
|
|
||||||
return &multiSource{
|
return &multiSource{
|
||||||
primary: primary,
|
primary: primary,
|
||||||
secondary: secondary,
|
secondary: secondary,
|
||||||
expectedFreshness: expectedFreshness,
|
expectedFreshness: expectedFreshness,
|
||||||
counter: counter,
|
counter: counter,
|
||||||
log: log,
|
checkSecondaryCounter: checkSecondaryCounter,
|
||||||
|
log: log,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -91,7 +100,11 @@ func (src *multiSource) Response(ctx context.Context, req *ocsp.Request) (*Respo
|
||||||
// check the secondary's status against the (more reliable) primary's
|
// check the secondary's status against the (more reliable) primary's
|
||||||
// status.
|
// status.
|
||||||
if r.err != nil {
|
if r.err != nil {
|
||||||
src.counter.WithLabelValues("primary_error").Inc()
|
if errors.Is(r.err, ErrNotFound) {
|
||||||
|
src.counter.WithLabelValues("primary_not_found").Inc()
|
||||||
|
} else {
|
||||||
|
src.counter.WithLabelValues("primary_error").Inc()
|
||||||
|
}
|
||||||
return nil, r.err
|
return nil, r.err
|
||||||
}
|
}
|
||||||
primaryResponse = r.resp
|
primaryResponse = r.resp
|
||||||
|
|
@ -156,14 +169,14 @@ func (src *multiSource) checkSecondary(primaryResponse *Response, secondaryChan
|
||||||
if secondaryResult.err != nil {
|
if secondaryResult.err != nil {
|
||||||
if errors.Is(secondaryResult.err, rocsp.ErrRedisNotFound) {
|
if errors.Is(secondaryResult.err, rocsp.ErrRedisNotFound) {
|
||||||
// This case will happen for several hours after first issuance.
|
// This case will happen for several hours after first issuance.
|
||||||
src.counter.WithLabelValues("primary_good_secondary_not_found").Inc()
|
src.checkSecondaryCounter.WithLabelValues("not_found").Inc()
|
||||||
} else {
|
} else {
|
||||||
src.counter.WithLabelValues("primary_good_secondary_error").Inc()
|
src.checkSecondaryCounter.WithLabelValues("error").Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
src.counter.WithLabelValues("primary_good_secondary_good").Inc()
|
src.checkSecondaryCounter.WithLabelValues("good").Inc()
|
||||||
default:
|
default:
|
||||||
src.counter.WithLabelValues("primary_good_secondary_slow").Inc()
|
src.checkSecondaryCounter.WithLabelValues("slow").Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,11 +34,12 @@ type rocspClient interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type redisSource struct {
|
type redisSource struct {
|
||||||
client rocspClient
|
client rocspClient
|
||||||
signer responder.Source
|
signer responder.Source
|
||||||
counter *prometheus.CounterVec
|
counter *prometheus.CounterVec
|
||||||
clk clock.Clock
|
cachedResponseAges prometheus.Histogram
|
||||||
liveSigningPeriod time.Duration
|
clk clock.Clock
|
||||||
|
liveSigningPeriod time.Duration
|
||||||
// Note: this logger is not currently used, as all audit log events are from
|
// Note: this logger is not currently used, as all audit log events are from
|
||||||
// the dbSource right now, but it should and will be used in the future.
|
// the dbSource right now, but it should and will be used in the future.
|
||||||
log blog.Logger
|
log blog.Logger
|
||||||
|
|
@ -60,17 +61,31 @@ func NewRedisSource(
|
||||||
}, []string{"result"})
|
}, []string{"result"})
|
||||||
stats.MustRegister(counter)
|
stats.MustRegister(counter)
|
||||||
|
|
||||||
|
// Set up 12-hour-wide buckets, measured in seconds.
|
||||||
|
buckets := make([]float64, 14)
|
||||||
|
for i := range buckets {
|
||||||
|
buckets[i] = 43200 * float64(i)
|
||||||
|
}
|
||||||
|
|
||||||
|
cachedResponseAges := prometheus.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "ocsp_redis_cached_response_ages",
|
||||||
|
Help: "How old are the cached OCSP responses when we successfully retrieve them.",
|
||||||
|
Buckets: buckets,
|
||||||
|
})
|
||||||
|
stats.MustRegister(cachedResponseAges)
|
||||||
|
|
||||||
var rocspReader rocspClient
|
var rocspReader rocspClient
|
||||||
if client != nil {
|
if client != nil {
|
||||||
rocspReader = client
|
rocspReader = client
|
||||||
}
|
}
|
||||||
return &redisSource{
|
return &redisSource{
|
||||||
client: rocspReader,
|
client: rocspReader,
|
||||||
signer: signer,
|
signer: signer,
|
||||||
counter: counter,
|
counter: counter,
|
||||||
liveSigningPeriod: liveSigningPeriod,
|
cachedResponseAges: cachedResponseAges,
|
||||||
clk: clk,
|
liveSigningPeriod: liveSigningPeriod,
|
||||||
log: log,
|
clk: clk,
|
||||||
|
log: log,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -108,7 +123,9 @@ func (src *redisSource) Response(ctx context.Context, req *ocsp.Request) (*respo
|
||||||
}
|
}
|
||||||
|
|
||||||
func (src *redisSource) isStale(resp *ocsp.Response) bool {
|
func (src *redisSource) isStale(resp *ocsp.Response) bool {
|
||||||
return src.clk.Since(resp.ThisUpdate) > src.liveSigningPeriod
|
age := src.clk.Since(resp.ThisUpdate)
|
||||||
|
src.cachedResponseAges.Observe(age.Seconds())
|
||||||
|
return age > src.liveSigningPeriod
|
||||||
}
|
}
|
||||||
|
|
||||||
func (src *redisSource) signAndSave(ctx context.Context, req *ocsp.Request, cause string) (*responder.Response, error) {
|
func (src *redisSource) signAndSave(ctx context.Context, req *ocsp.Request, cause string) (*responder.Response, error) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue