SA: refactor DB stat collection & collect more stats. (#4096)
Go 1.11+ updated the `sql.DBStats` struct with new fields that are of interest to us. This PR routes these stats to Prometheus by replacing the existing autoprom stats code with new first-class Prometheus metrics. Resolves https://github.com/letsencrypt/boulder/issues/4095 The `max_db_connections` stat from the SA is removed because the Go 1.11+ `sql.DBStats.MaxOpenConnections` field will give us a better view of the same information. The autoprom "reused_authz" stat that was being incremented in `SA.GetPendingAuthorization` was also removed. It wasn't doing what it says it was (counting reused authorizations) and was instead counting the number of times `GetPendingAuthorization` returned an authz.
This commit is contained in:
parent
2f6626afca
commit
0ecdf80709
|
|
@ -9,7 +9,6 @@ import (
|
|||
bgrpc "github.com/letsencrypt/boulder/grpc"
|
||||
"github.com/letsencrypt/boulder/sa"
|
||||
sapb "github.com/letsencrypt/boulder/sa/proto"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type config struct {
|
||||
|
|
@ -62,15 +61,8 @@ func main() {
|
|||
dbMap, err := sa.NewDbMap(dbURL, saConf.DBConfig.MaxDBConns)
|
||||
cmd.FailOnError(err, "Couldn't connect to SA database")
|
||||
|
||||
// Export the MaxDBConns
|
||||
dbConnStat := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "max_db_connections",
|
||||
Help: "Maximum number of DB connections allowed.",
|
||||
})
|
||||
scope.MustRegister(dbConnStat)
|
||||
dbConnStat.Set(float64(saConf.DBConfig.MaxDBConns))
|
||||
|
||||
go sa.ReportDbConnCount(dbMap, scope)
|
||||
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
|
||||
sa.InitDBMetrics(dbMap, scope)
|
||||
|
||||
clk := cmd.Clock()
|
||||
|
||||
|
|
|
|||
|
|
@ -370,7 +370,9 @@ func main() {
|
|||
saDbMap, err := sa.NewDbMap(saDbURL, config.CertChecker.DBConfig.MaxDBConns)
|
||||
cmd.FailOnError(err, "Could not connect to database")
|
||||
scope := metrics.NewPromScope(prometheus.DefaultRegisterer)
|
||||
go sa.ReportDbConnCount(saDbMap, scope)
|
||||
|
||||
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
|
||||
sa.InitDBMetrics(saDbMap, scope)
|
||||
|
||||
pa, err := policy.New(config.PA.Challenges)
|
||||
cmd.FailOnError(err, "Failed to create PA")
|
||||
|
|
|
|||
|
|
@ -473,7 +473,9 @@ func main() {
|
|||
dbMap, err := sa.NewDbMap(dbURL, c.Mailer.DBConfig.MaxDBConns)
|
||||
cmd.FailOnError(err, "Could not connect to database")
|
||||
sa.SetSQLDebug(dbMap, logger)
|
||||
go sa.ReportDbConnCount(dbMap, scope)
|
||||
|
||||
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
|
||||
sa.InitDBMetrics(dbMap, scope)
|
||||
|
||||
tlsConfig, err := c.Mailer.TLS.Load()
|
||||
cmd.FailOnError(err, "TLS config")
|
||||
|
|
|
|||
|
|
@ -262,7 +262,9 @@ as generated by Boulder's single-ocsp command.
|
|||
dbMap, err := sa.NewDbMap(dbConnect, config.DBConfig.MaxDBConns)
|
||||
cmd.FailOnError(err, "Could not connect to database")
|
||||
sa.SetSQLDebug(dbMap, logger)
|
||||
go sa.ReportDbConnCount(dbMap, scope)
|
||||
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
|
||||
sa.InitDBMetrics(dbMap, scope)
|
||||
|
||||
source, err = makeDBSource(
|
||||
dbMap,
|
||||
c.Common.IssuerCert,
|
||||
|
|
|
|||
|
|
@ -564,7 +564,9 @@ func main() {
|
|||
cmd.FailOnError(err, "Couldn't load DB URL")
|
||||
dbMap, err := sa.NewDbMap(dbURL, conf.DBConfig.MaxDBConns)
|
||||
cmd.FailOnError(err, "Could not connect to database")
|
||||
go sa.ReportDbConnCount(dbMap, scope)
|
||||
|
||||
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
|
||||
sa.InitDBMetrics(dbMap, scope)
|
||||
|
||||
clk := cmd.Clock()
|
||||
cac, sac, apc := setupClients(conf, scope, clk)
|
||||
|
|
|
|||
|
|
@ -3,14 +3,12 @@ package sa
|
|||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-sql-driver/mysql"
|
||||
"gopkg.in/go-gorp/gorp.v2"
|
||||
|
||||
"github.com/letsencrypt/boulder/core"
|
||||
blog "github.com/letsencrypt/boulder/log"
|
||||
"github.com/letsencrypt/boulder/metrics"
|
||||
)
|
||||
|
||||
// NewDbMap creates the root gorp mapping object. Create one of these for each
|
||||
|
|
@ -111,14 +109,6 @@ func (log *SQLLogger) Printf(format string, v ...interface{}) {
|
|||
log.Debugf(format, v...)
|
||||
}
|
||||
|
||||
func ReportDbConnCount(dbMap *gorp.DbMap, statter metrics.Scope) {
|
||||
db := dbMap.Db
|
||||
for {
|
||||
statter.Gauge("OpenConnections", int64(db.Stats().OpenConnections))
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
// initTables constructs the table map for the ORM.
|
||||
// NOTE: For tables with an auto-increment primary key (SetKeys(true, ...)),
|
||||
// it is very important to declare them as a such here. It produces a side
|
||||
|
|
|
|||
|
|
@ -0,0 +1,125 @@
|
|||
package sa
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"time"
|
||||
|
||||
"github.com/letsencrypt/boulder/metrics"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"gopkg.in/go-gorp/gorp.v2"
|
||||
)
|
||||
|
||||
// dbMetrics is a struct holding prometheus stats related to the dbMap. Each of
|
||||
// the prometheus stats corresponds to a field of sql.DBStats.
|
||||
type dbMetrics struct {
|
||||
dbMap *gorp.DbMap
|
||||
maxOpenConnections prometheus.Gauge
|
||||
openConnections prometheus.Gauge
|
||||
inUse prometheus.Gauge
|
||||
idle prometheus.Gauge
|
||||
waitCount prometheus.Counter
|
||||
waitDuration prometheus.Counter
|
||||
maxIdleClosed prometheus.Counter
|
||||
maxLifetimeClosed prometheus.Counter
|
||||
}
|
||||
|
||||
// InitDBMetrics will register prometheus stats for the provided dbMap under the
|
||||
// given metrics.Scope. Every 1 second in a separate go routine the prometheus
|
||||
// stats will be updated based on the gorp dbMap's inner sql.DBMap's DBStats
|
||||
// structure values.
|
||||
func InitDBMetrics(dbMap *gorp.DbMap, scope metrics.Scope) {
|
||||
// Create a dbMetrics instance and register prometheus metrics
|
||||
dbm := newDbMetrics(dbMap, scope)
|
||||
|
||||
// Start the metric reporting goroutine to update the metrics periodically.
|
||||
go dbm.reportDBMetrics()
|
||||
}
|
||||
|
||||
// newDbMetrics constructs a dbMetrics instance by registering prometheus stats.
|
||||
func newDbMetrics(dbMap *gorp.DbMap, scope metrics.Scope) *dbMetrics {
|
||||
maxOpenConns := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "db_max_open_connections",
|
||||
Help: "Maximum number of DB connections allowed.",
|
||||
})
|
||||
scope.MustRegister(maxOpenConns)
|
||||
|
||||
openConns := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "db_open_connections",
|
||||
Help: "Number of established DB connections (in-use and idle).",
|
||||
})
|
||||
scope.MustRegister(openConns)
|
||||
|
||||
inUse := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "db_inuse",
|
||||
Help: "Number of DB connections currently in use.",
|
||||
})
|
||||
scope.MustRegister(inUse)
|
||||
|
||||
idle := prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "db_idle",
|
||||
Help: "Number of idle DB connections.",
|
||||
})
|
||||
scope.MustRegister(idle)
|
||||
|
||||
waitCount := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "db_wait_count",
|
||||
Help: "Total number of DB connections waited for.",
|
||||
})
|
||||
scope.MustRegister(waitCount)
|
||||
|
||||
waitDuration := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "db_wait_duration_seconds",
|
||||
Help: "The total time blocked waiting for a new connection.",
|
||||
})
|
||||
scope.MustRegister(waitDuration)
|
||||
|
||||
maxIdleClosed := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "db_max_idle_closed",
|
||||
Help: "Total number of connections closed due to SetMaxIdleConns.",
|
||||
})
|
||||
scope.MustRegister(maxIdleClosed)
|
||||
|
||||
maxLifetimeClosed := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "db_max_lifetime_closed",
|
||||
Help: "Total number of connections closed due to SetConnMaxLifetime.",
|
||||
})
|
||||
scope.MustRegister(maxLifetimeClosed)
|
||||
|
||||
// Construct a dbMetrics instance with all of the registered metrics and the
|
||||
// gorp DBMap
|
||||
return &dbMetrics{
|
||||
dbMap: dbMap,
|
||||
maxOpenConnections: maxOpenConns,
|
||||
openConnections: openConns,
|
||||
inUse: inUse,
|
||||
idle: idle,
|
||||
waitCount: waitCount,
|
||||
waitDuration: waitDuration,
|
||||
maxIdleClosed: maxIdleClosed,
|
||||
maxLifetimeClosed: maxLifetimeClosed,
|
||||
}
|
||||
}
|
||||
|
||||
// updateFrom updates the dbMetrics prometheus stats based on the provided
|
||||
// sql.DBStats object.
|
||||
func (dbm *dbMetrics) updateFrom(dbStats sql.DBStats) {
|
||||
dbm.maxOpenConnections.Set(float64(dbStats.MaxOpenConnections))
|
||||
dbm.openConnections.Set(float64(dbStats.OpenConnections))
|
||||
dbm.inUse.Set(float64(dbStats.InUse))
|
||||
dbm.idle.Set(float64(dbStats.InUse))
|
||||
dbm.waitCount.Set(float64(dbStats.WaitCount))
|
||||
dbm.waitDuration.Set(float64(dbStats.WaitDuration.Seconds()))
|
||||
dbm.maxIdleClosed.Set(float64(dbStats.MaxIdleClosed))
|
||||
dbm.maxLifetimeClosed.Set(float64(dbStats.MaxLifetimeClosed))
|
||||
}
|
||||
|
||||
// reportDBMetrics is an infinite loop that will update the dbm with the gorp
|
||||
// dbMap's inner sql.DBMap's DBStats structure every second. It is intended to
|
||||
// be run in a dedicated goroutine spawned by InitDBMetrics.
|
||||
func (dbm *dbMetrics) reportDBMetrics() {
|
||||
for {
|
||||
stats := dbm.dbMap.Db.Stats()
|
||||
dbm.updateFrom(stats)
|
||||
time.Sleep(1 * time.Second)
|
||||
}
|
||||
}
|
||||
3
sa/sa.go
3
sa/sa.go
|
|
@ -36,7 +36,6 @@ type SQLStorageAuthority struct {
|
|||
dbMap *gorp.DbMap
|
||||
clk clock.Clock
|
||||
log blog.Logger
|
||||
scope metrics.Scope
|
||||
|
||||
// For RPCs that generate multiple, parallelizable SQL queries, this is the
|
||||
// max parallelism they will use (to avoid consuming too many MariaDB
|
||||
|
|
@ -103,7 +102,6 @@ func NewSQLStorageAuthority(
|
|||
dbMap: dbMap,
|
||||
clk: clk,
|
||||
log: logger,
|
||||
scope: scope,
|
||||
parallelismPerRPC: parallelismPerRPC,
|
||||
}
|
||||
|
||||
|
|
@ -779,7 +777,6 @@ func (ssa *SQLStorageAuthority) GetPendingAuthorization(
|
|||
} else if err == nil {
|
||||
// We found an authz, but we still need to fetch its challenges. To
|
||||
// simplify things, just call GetAuthorization, which takes care of that.
|
||||
ssa.scope.Inc("reused_authz", 1)
|
||||
authz, err := ssa.GetAuthorization(ctx, pa.ID)
|
||||
return &authz, err
|
||||
} else {
|
||||
|
|
|
|||
Loading…
Reference in New Issue