ctpolicy: More stats and monitoring (#6822)
Adds new prometheus metrics from the configured log list and configured
CT logs to the ctpolicy constructor. `ct_operator_group_size_gauge`
returns the number of configured logs managed by each operator in the
log list. `ct_shard_expiration_seconds` returns a Unix timestamp
representation of the `end_exclusive` field for each configured log in
the `sctLogs` list. For posterity, Boulder retrieves SCTs from logs in
the `sctLogs` list.
```
ct_operator_group_size_gauge{operator="Operator A",source="finalLogs"} 2
ct_operator_group_size_gauge{operator="Operator A",source="sctLogs"} 4
ct_operator_group_size_gauge{operator="Operator B",source="sctLogs"} 2
ct_operator_group_size_gauge{operator="Operator D",source="sctLogs"} 1
ct_operator_group_size_gauge{operator="Operator F",source="finalLogs"} 1
ct_operator_group_size_gauge{operator="Operator F",source="infoLogs"} 1
ct_shard_expiration_seconds{logID="A1 Current",operator="Operator A"} 3.15576e+09
ct_shard_expiration_seconds{logID="A1 Future",operator="Operator A"} 3.47126688e+10
ct_shard_expiration_seconds{logID="A2 Current",operator="Operator A"} 3.15576e+09
ct_shard_expiration_seconds{logID="A2 Past",operator="Operator A"} 0
ct_shard_expiration_seconds{logID="B1",operator="Operator B"} 3.15576e+09
ct_shard_expiration_seconds{logID="B2",operator="Operator B"} 3.15576e+09
ct_shard_expiration_seconds{logID="D1",operator="Operator D"} 3.15576e+09
```
Fixes https://github.com/letsencrypt/boulder/issues/5705
This commit is contained in:
parent
33fc8c4b6f
commit
23a0a71b2d
|
|
@ -22,13 +22,15 @@ const (
|
|||
// CTPolicy is used to hold information about SCTs required from various
|
||||
// groupings
|
||||
type CTPolicy struct {
|
||||
pub pubpb.PublisherClient
|
||||
sctLogs loglist.List
|
||||
infoLogs loglist.List
|
||||
finalLogs loglist.List
|
||||
stagger time.Duration
|
||||
log blog.Logger
|
||||
winnerCounter *prometheus.CounterVec
|
||||
pub pubpb.PublisherClient
|
||||
sctLogs loglist.List
|
||||
infoLogs loglist.List
|
||||
finalLogs loglist.List
|
||||
stagger time.Duration
|
||||
log blog.Logger
|
||||
winnerCounter *prometheus.CounterVec
|
||||
operatorGroupsGauge *prometheus.GaugeVec
|
||||
shardExpiryGauge *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// New creates a new CTPolicy struct
|
||||
|
|
@ -42,14 +44,55 @@ func New(pub pubpb.PublisherClient, sctLogs loglist.List, infoLogs loglist.List,
|
|||
)
|
||||
stats.MustRegister(winnerCounter)
|
||||
|
||||
operatorGroupsGauge := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "ct_operator_group_size_gauge",
|
||||
Help: "Gauge for CT operators group size, by operator and log source (capable of providing SCT, informational logs, logs we submit final certs to).",
|
||||
},
|
||||
[]string{"operator", "source"},
|
||||
)
|
||||
stats.MustRegister(operatorGroupsGauge)
|
||||
|
||||
shardExpiryGauge := prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "ct_shard_expiration_seconds",
|
||||
Help: "CT shard end_exclusive field expressed as Unix epoch time, by operator and logID.",
|
||||
},
|
||||
[]string{"operator", "logID"},
|
||||
)
|
||||
stats.MustRegister(shardExpiryGauge)
|
||||
|
||||
for op, group := range sctLogs {
|
||||
operatorGroupsGauge.WithLabelValues(op, "sctLogs").Set(float64(len(group)))
|
||||
|
||||
for _, log := range group {
|
||||
if log.EndExclusive.IsZero() {
|
||||
// Handles the case for non-temporally sharded logs too.
|
||||
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(0))
|
||||
} else {
|
||||
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(log.EndExclusive.Unix()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for op, group := range infoLogs {
|
||||
operatorGroupsGauge.WithLabelValues(op, "infoLogs").Set(float64(len(group)))
|
||||
}
|
||||
|
||||
for op, group := range finalLogs {
|
||||
operatorGroupsGauge.WithLabelValues(op, "finalLogs").Set(float64(len(group)))
|
||||
}
|
||||
|
||||
return &CTPolicy{
|
||||
pub: pub,
|
||||
sctLogs: sctLogs,
|
||||
infoLogs: infoLogs,
|
||||
finalLogs: finalLogs,
|
||||
stagger: stagger,
|
||||
log: log,
|
||||
winnerCounter: winnerCounter,
|
||||
pub: pub,
|
||||
sctLogs: sctLogs,
|
||||
infoLogs: infoLogs,
|
||||
finalLogs: finalLogs,
|
||||
stagger: stagger,
|
||||
log: log,
|
||||
winnerCounter: winnerCounter,
|
||||
operatorGroupsGauge: operatorGroupsGauge,
|
||||
shardExpiryGauge: shardExpiryGauge,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/jmhodges/clock"
|
||||
"github.com/letsencrypt/boulder/core"
|
||||
"github.com/letsencrypt/boulder/ctpolicy/loglist"
|
||||
berrors "github.com/letsencrypt/boulder/errors"
|
||||
|
|
@ -182,3 +183,80 @@ func TestGetSCTsFailMetrics(t *testing.T) {
|
|||
test.AssertContains(t, err.Error(), context.DeadlineExceeded.Error())
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.winnerCounter, prometheus.Labels{"url": "UrlA1", "result": failed}, 1)
|
||||
}
|
||||
|
||||
func TestLogListMetrics(t *testing.T) {
|
||||
// Multiple operator groups with configured logs.
|
||||
ctp := New(&mockPub{}, loglist.List{
|
||||
"OperA": {
|
||||
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
|
||||
"LogA2": {Url: "UrlA2", Key: "KeyA2"},
|
||||
},
|
||||
"OperB": {
|
||||
"LogB1": {Url: "UrlB1", Key: "KeyB1"},
|
||||
},
|
||||
"OperC": {
|
||||
"LogC1": {Url: "UrlC1", Key: "KeyC1"},
|
||||
},
|
||||
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 2)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 1)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "sctLogs"}, 1)
|
||||
|
||||
// Multiple operator groups, no configured logs in one group
|
||||
ctp = New(&mockPub{}, loglist.List{
|
||||
"OperA": {
|
||||
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
|
||||
"LogA2": {Url: "UrlA2", Key: "KeyA2"},
|
||||
},
|
||||
"OperB": {
|
||||
"LogB1": {Url: "UrlB1", Key: "KeyB1"},
|
||||
},
|
||||
"OperC": {},
|
||||
}, nil, loglist.List{
|
||||
"OperA": {
|
||||
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
|
||||
},
|
||||
"OperB": {},
|
||||
"OperC": {
|
||||
"LogC1": {Url: "UrlC1", Key: "KeyC1"},
|
||||
},
|
||||
}, 0, blog.NewMock(), metrics.NoopRegisterer)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 2)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 1)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "sctLogs"}, 0)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "finalLogs"}, 1)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "finalLogs"}, 0)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "finalLogs"}, 1)
|
||||
|
||||
// Multiple operator groups with no configured logs.
|
||||
ctp = New(&mockPub{}, loglist.List{
|
||||
"OperA": {},
|
||||
"OperB": {},
|
||||
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 0)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 0)
|
||||
|
||||
// Single operator group with no configured logs.
|
||||
ctp = New(&mockPub{}, loglist.List{
|
||||
"OperA": {},
|
||||
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "allLogs"}, 0)
|
||||
|
||||
fc := clock.NewFake()
|
||||
Tomorrow := fc.Now().Add(24 * time.Hour)
|
||||
NextWeek := fc.Now().Add(7 * 24 * time.Hour)
|
||||
|
||||
// Multiple operator groups with configured logs.
|
||||
ctp = New(&mockPub{}, loglist.List{
|
||||
"OperA": {
|
||||
"LogA1": {Url: "UrlA1", Key: "KeyA1", Name: "LogA1", EndExclusive: Tomorrow},
|
||||
"LogA2": {Url: "UrlA2", Key: "KeyA2", Name: "LogA2", EndExclusive: NextWeek},
|
||||
},
|
||||
"OperB": {
|
||||
"LogB1": {Url: "UrlB1", Key: "KeyB1", Name: "LogB1", EndExclusive: Tomorrow},
|
||||
},
|
||||
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperA", "logID": "LogA1"}, 86400)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperA", "logID": "LogA2"}, 604800)
|
||||
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperB", "logID": "LogB1"}, 86400)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue