ctpolicy: More stats and monitoring (#6822)

Adds new prometheus metrics from the configured log list and configured
CT logs to the ctpolicy constructor. `ct_operator_group_size_gauge`
returns the number of configured logs managed by each operator in the
log list. `ct_shard_expiration_seconds` returns a Unix timestamp
representation of the `end_exclusive` field for each configured log in
the `sctLogs` list. For posterity, Boulder retrieves SCTs from logs in
the `sctLogs` list.

```
  ct_operator_group_size_gauge{operator="Operator A",source="finalLogs"} 2
ct_operator_group_size_gauge{operator="Operator A",source="sctLogs"} 4
ct_operator_group_size_gauge{operator="Operator B",source="sctLogs"} 2
ct_operator_group_size_gauge{operator="Operator D",source="sctLogs"} 1
ct_operator_group_size_gauge{operator="Operator F",source="finalLogs"} 1
ct_operator_group_size_gauge{operator="Operator F",source="infoLogs"} 1


ct_shard_expiration_seconds{logID="A1 Current",operator="Operator A"} 3.15576e+09
ct_shard_expiration_seconds{logID="A1 Future",operator="Operator A"} 3.47126688e+10
ct_shard_expiration_seconds{logID="A2 Current",operator="Operator A"} 3.15576e+09
ct_shard_expiration_seconds{logID="A2 Past",operator="Operator A"} 0
ct_shard_expiration_seconds{logID="B1",operator="Operator B"} 3.15576e+09
ct_shard_expiration_seconds{logID="B2",operator="Operator B"} 3.15576e+09
ct_shard_expiration_seconds{logID="D1",operator="Operator D"} 3.15576e+09
```

Fixes https://github.com/letsencrypt/boulder/issues/5705
This commit is contained in:
Phil Porada 2023-05-25 17:25:08 -04:00 committed by GitHub
parent 33fc8c4b6f
commit 23a0a71b2d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 135 additions and 14 deletions

View File

@ -22,13 +22,15 @@ const (
// CTPolicy is used to hold information about SCTs required from various
// groupings
type CTPolicy struct {
pub pubpb.PublisherClient
sctLogs loglist.List
infoLogs loglist.List
finalLogs loglist.List
stagger time.Duration
log blog.Logger
winnerCounter *prometheus.CounterVec
pub pubpb.PublisherClient
sctLogs loglist.List
infoLogs loglist.List
finalLogs loglist.List
stagger time.Duration
log blog.Logger
winnerCounter *prometheus.CounterVec
operatorGroupsGauge *prometheus.GaugeVec
shardExpiryGauge *prometheus.GaugeVec
}
// New creates a new CTPolicy struct
@ -42,14 +44,55 @@ func New(pub pubpb.PublisherClient, sctLogs loglist.List, infoLogs loglist.List,
)
stats.MustRegister(winnerCounter)
operatorGroupsGauge := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "ct_operator_group_size_gauge",
Help: "Gauge for CT operators group size, by operator and log source (capable of providing SCT, informational logs, logs we submit final certs to).",
},
[]string{"operator", "source"},
)
stats.MustRegister(operatorGroupsGauge)
shardExpiryGauge := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "ct_shard_expiration_seconds",
Help: "CT shard end_exclusive field expressed as Unix epoch time, by operator and logID.",
},
[]string{"operator", "logID"},
)
stats.MustRegister(shardExpiryGauge)
for op, group := range sctLogs {
operatorGroupsGauge.WithLabelValues(op, "sctLogs").Set(float64(len(group)))
for _, log := range group {
if log.EndExclusive.IsZero() {
// Handles the case for non-temporally sharded logs too.
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(0))
} else {
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(log.EndExclusive.Unix()))
}
}
}
for op, group := range infoLogs {
operatorGroupsGauge.WithLabelValues(op, "infoLogs").Set(float64(len(group)))
}
for op, group := range finalLogs {
operatorGroupsGauge.WithLabelValues(op, "finalLogs").Set(float64(len(group)))
}
return &CTPolicy{
pub: pub,
sctLogs: sctLogs,
infoLogs: infoLogs,
finalLogs: finalLogs,
stagger: stagger,
log: log,
winnerCounter: winnerCounter,
pub: pub,
sctLogs: sctLogs,
infoLogs: infoLogs,
finalLogs: finalLogs,
stagger: stagger,
log: log,
winnerCounter: winnerCounter,
operatorGroupsGauge: operatorGroupsGauge,
shardExpiryGauge: shardExpiryGauge,
}
}

View File

@ -7,6 +7,7 @@ import (
"testing"
"time"
"github.com/jmhodges/clock"
"github.com/letsencrypt/boulder/core"
"github.com/letsencrypt/boulder/ctpolicy/loglist"
berrors "github.com/letsencrypt/boulder/errors"
@ -182,3 +183,80 @@ func TestGetSCTsFailMetrics(t *testing.T) {
test.AssertContains(t, err.Error(), context.DeadlineExceeded.Error())
test.AssertMetricWithLabelsEquals(t, ctp.winnerCounter, prometheus.Labels{"url": "UrlA1", "result": failed}, 1)
}
func TestLogListMetrics(t *testing.T) {
// Multiple operator groups with configured logs.
ctp := New(&mockPub{}, loglist.List{
"OperA": {
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
"LogA2": {Url: "UrlA2", Key: "KeyA2"},
},
"OperB": {
"LogB1": {Url: "UrlB1", Key: "KeyB1"},
},
"OperC": {
"LogC1": {Url: "UrlC1", Key: "KeyC1"},
},
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 2)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 1)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "sctLogs"}, 1)
// Multiple operator groups, no configured logs in one group
ctp = New(&mockPub{}, loglist.List{
"OperA": {
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
"LogA2": {Url: "UrlA2", Key: "KeyA2"},
},
"OperB": {
"LogB1": {Url: "UrlB1", Key: "KeyB1"},
},
"OperC": {},
}, nil, loglist.List{
"OperA": {
"LogA1": {Url: "UrlA1", Key: "KeyA1"},
},
"OperB": {},
"OperC": {
"LogC1": {Url: "UrlC1", Key: "KeyC1"},
},
}, 0, blog.NewMock(), metrics.NoopRegisterer)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 2)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 1)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "sctLogs"}, 0)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "finalLogs"}, 1)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "finalLogs"}, 0)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperC", "source": "finalLogs"}, 1)
// Multiple operator groups with no configured logs.
ctp = New(&mockPub{}, loglist.List{
"OperA": {},
"OperB": {},
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "sctLogs"}, 0)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperB", "source": "sctLogs"}, 0)
// Single operator group with no configured logs.
ctp = New(&mockPub{}, loglist.List{
"OperA": {},
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
test.AssertMetricWithLabelsEquals(t, ctp.operatorGroupsGauge, prometheus.Labels{"operator": "OperA", "source": "allLogs"}, 0)
fc := clock.NewFake()
Tomorrow := fc.Now().Add(24 * time.Hour)
NextWeek := fc.Now().Add(7 * 24 * time.Hour)
// Multiple operator groups with configured logs.
ctp = New(&mockPub{}, loglist.List{
"OperA": {
"LogA1": {Url: "UrlA1", Key: "KeyA1", Name: "LogA1", EndExclusive: Tomorrow},
"LogA2": {Url: "UrlA2", Key: "KeyA2", Name: "LogA2", EndExclusive: NextWeek},
},
"OperB": {
"LogB1": {Url: "UrlB1", Key: "KeyB1", Name: "LogB1", EndExclusive: Tomorrow},
},
}, nil, nil, 0, blog.NewMock(), metrics.NoopRegisterer)
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperA", "logID": "LogA1"}, 86400)
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperA", "logID": "LogA2"}, 604800)
test.AssertMetricWithLabelsEquals(t, ctp.shardExpiryGauge, prometheus.Labels{"operator": "OperB", "logID": "LogB1"}, 86400)
}