SA: refactor DB stat collection & collect more stats. (#4096)

Go 1.11+ updated the `sql.DBStats` struct with new fields that are of
interest to us. This PR routes these stats to Prometheus by replacing
the existing autoprom stats code with new first-class Prometheus
metrics. Resolves https://github.com/letsencrypt/boulder/issues/4095

The `max_db_connections` stat from the SA is removed because the Go 1.11+
`sql.DBStats.MaxOpenConnections` field will give us a better view of
the same information.

The autoprom "reused_authz" stat that was being incremented in
`SA.GetPendingAuthorization` was also removed. It wasn't doing what it
says it was (counting reused authorizations) and was instead counting
the number of times `GetPendingAuthorization` returned an authz.
This commit is contained in:
Daniel McCarney 2019-03-06 20:08:53 -05:00 committed by Jacob Hoffman-Andrews
parent 2f6626afca
commit 0ecdf80709
8 changed files with 139 additions and 27 deletions

View File

@ -9,7 +9,6 @@ import (
bgrpc "github.com/letsencrypt/boulder/grpc"
"github.com/letsencrypt/boulder/sa"
sapb "github.com/letsencrypt/boulder/sa/proto"
"github.com/prometheus/client_golang/prometheus"
)
type config struct {
@ -62,15 +61,8 @@ func main() {
dbMap, err := sa.NewDbMap(dbURL, saConf.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Couldn't connect to SA database")
// Export the MaxDBConns
dbConnStat := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "max_db_connections",
Help: "Maximum number of DB connections allowed.",
})
scope.MustRegister(dbConnStat)
dbConnStat.Set(float64(saConf.DBConfig.MaxDBConns))
go sa.ReportDbConnCount(dbMap, scope)
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
sa.InitDBMetrics(dbMap, scope)
clk := cmd.Clock()

View File

@ -370,7 +370,9 @@ func main() {
saDbMap, err := sa.NewDbMap(saDbURL, config.CertChecker.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Could not connect to database")
scope := metrics.NewPromScope(prometheus.DefaultRegisterer)
go sa.ReportDbConnCount(saDbMap, scope)
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
sa.InitDBMetrics(saDbMap, scope)
pa, err := policy.New(config.PA.Challenges)
cmd.FailOnError(err, "Failed to create PA")

View File

@ -473,7 +473,9 @@ func main() {
dbMap, err := sa.NewDbMap(dbURL, c.Mailer.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Could not connect to database")
sa.SetSQLDebug(dbMap, logger)
go sa.ReportDbConnCount(dbMap, scope)
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
sa.InitDBMetrics(dbMap, scope)
tlsConfig, err := c.Mailer.TLS.Load()
cmd.FailOnError(err, "TLS config")

View File

@ -262,7 +262,9 @@ as generated by Boulder's single-ocsp command.
dbMap, err := sa.NewDbMap(dbConnect, config.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Could not connect to database")
sa.SetSQLDebug(dbMap, logger)
go sa.ReportDbConnCount(dbMap, scope)
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
sa.InitDBMetrics(dbMap, scope)
source, err = makeDBSource(
dbMap,
c.Common.IssuerCert,

View File

@ -564,7 +564,9 @@ func main() {
cmd.FailOnError(err, "Couldn't load DB URL")
dbMap, err := sa.NewDbMap(dbURL, conf.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Could not connect to database")
go sa.ReportDbConnCount(dbMap, scope)
// Collect and periodically report DB metrics using the DBMap and prometheus scope.
sa.InitDBMetrics(dbMap, scope)
clk := cmd.Clock()
cac, sac, apc := setupClients(conf, scope, clk)

View File

@ -3,14 +3,12 @@ package sa
import (
"database/sql"
"fmt"
"time"
"github.com/go-sql-driver/mysql"
"gopkg.in/go-gorp/gorp.v2"
"github.com/letsencrypt/boulder/core"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
)
// NewDbMap creates the root gorp mapping object. Create one of these for each
@ -111,14 +109,6 @@ func (log *SQLLogger) Printf(format string, v ...interface{}) {
log.Debugf(format, v...)
}
func ReportDbConnCount(dbMap *gorp.DbMap, statter metrics.Scope) {
db := dbMap.Db
for {
statter.Gauge("OpenConnections", int64(db.Stats().OpenConnections))
time.Sleep(1 * time.Second)
}
}
// initTables constructs the table map for the ORM.
// NOTE: For tables with an auto-increment primary key (SetKeys(true, ...)),
// it is very important to declare them as a such here. It produces a side

125
sa/metrics.go Normal file
View File

@ -0,0 +1,125 @@
package sa
import (
"database/sql"
"time"
"github.com/letsencrypt/boulder/metrics"
"github.com/prometheus/client_golang/prometheus"
"gopkg.in/go-gorp/gorp.v2"
)
// dbMetrics is a struct holding prometheus stats related to the dbMap. Each of
// the prometheus stats corresponds to a field of sql.DBStats.
type dbMetrics struct {
dbMap *gorp.DbMap
maxOpenConnections prometheus.Gauge
openConnections prometheus.Gauge
inUse prometheus.Gauge
idle prometheus.Gauge
waitCount prometheus.Counter
waitDuration prometheus.Counter
maxIdleClosed prometheus.Counter
maxLifetimeClosed prometheus.Counter
}
// InitDBMetrics will register prometheus stats for the provided dbMap under the
// given metrics.Scope. Every 1 second in a separate go routine the prometheus
// stats will be updated based on the gorp dbMap's inner sql.DBMap's DBStats
// structure values.
func InitDBMetrics(dbMap *gorp.DbMap, scope metrics.Scope) {
// Create a dbMetrics instance and register prometheus metrics
dbm := newDbMetrics(dbMap, scope)
// Start the metric reporting goroutine to update the metrics periodically.
go dbm.reportDBMetrics()
}
// newDbMetrics constructs a dbMetrics instance by registering prometheus stats.
func newDbMetrics(dbMap *gorp.DbMap, scope metrics.Scope) *dbMetrics {
maxOpenConns := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "db_max_open_connections",
Help: "Maximum number of DB connections allowed.",
})
scope.MustRegister(maxOpenConns)
openConns := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "db_open_connections",
Help: "Number of established DB connections (in-use and idle).",
})
scope.MustRegister(openConns)
inUse := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "db_inuse",
Help: "Number of DB connections currently in use.",
})
scope.MustRegister(inUse)
idle := prometheus.NewGauge(prometheus.GaugeOpts{
Name: "db_idle",
Help: "Number of idle DB connections.",
})
scope.MustRegister(idle)
waitCount := prometheus.NewCounter(prometheus.CounterOpts{
Name: "db_wait_count",
Help: "Total number of DB connections waited for.",
})
scope.MustRegister(waitCount)
waitDuration := prometheus.NewCounter(prometheus.CounterOpts{
Name: "db_wait_duration_seconds",
Help: "The total time blocked waiting for a new connection.",
})
scope.MustRegister(waitDuration)
maxIdleClosed := prometheus.NewCounter(prometheus.CounterOpts{
Name: "db_max_idle_closed",
Help: "Total number of connections closed due to SetMaxIdleConns.",
})
scope.MustRegister(maxIdleClosed)
maxLifetimeClosed := prometheus.NewCounter(prometheus.CounterOpts{
Name: "db_max_lifetime_closed",
Help: "Total number of connections closed due to SetConnMaxLifetime.",
})
scope.MustRegister(maxLifetimeClosed)
// Construct a dbMetrics instance with all of the registered metrics and the
// gorp DBMap
return &dbMetrics{
dbMap: dbMap,
maxOpenConnections: maxOpenConns,
openConnections: openConns,
inUse: inUse,
idle: idle,
waitCount: waitCount,
waitDuration: waitDuration,
maxIdleClosed: maxIdleClosed,
maxLifetimeClosed: maxLifetimeClosed,
}
}
// updateFrom updates the dbMetrics prometheus stats based on the provided
// sql.DBStats object.
func (dbm *dbMetrics) updateFrom(dbStats sql.DBStats) {
dbm.maxOpenConnections.Set(float64(dbStats.MaxOpenConnections))
dbm.openConnections.Set(float64(dbStats.OpenConnections))
dbm.inUse.Set(float64(dbStats.InUse))
dbm.idle.Set(float64(dbStats.InUse))
dbm.waitCount.Set(float64(dbStats.WaitCount))
dbm.waitDuration.Set(float64(dbStats.WaitDuration.Seconds()))
dbm.maxIdleClosed.Set(float64(dbStats.MaxIdleClosed))
dbm.maxLifetimeClosed.Set(float64(dbStats.MaxLifetimeClosed))
}
// reportDBMetrics is an infinite loop that will update the dbm with the gorp
// dbMap's inner sql.DBMap's DBStats structure every second. It is intended to
// be run in a dedicated goroutine spawned by InitDBMetrics.
func (dbm *dbMetrics) reportDBMetrics() {
for {
stats := dbm.dbMap.Db.Stats()
dbm.updateFrom(stats)
time.Sleep(1 * time.Second)
}
}

View File

@ -36,7 +36,6 @@ type SQLStorageAuthority struct {
dbMap *gorp.DbMap
clk clock.Clock
log blog.Logger
scope metrics.Scope
// For RPCs that generate multiple, parallelizable SQL queries, this is the
// max parallelism they will use (to avoid consuming too many MariaDB
@ -103,7 +102,6 @@ func NewSQLStorageAuthority(
dbMap: dbMap,
clk: clk,
log: logger,
scope: scope,
parallelismPerRPC: parallelismPerRPC,
}
@ -779,7 +777,6 @@ func (ssa *SQLStorageAuthority) GetPendingAuthorization(
} else if err == nil {
// We found an authz, but we still need to fetch its challenges. To
// simplify things, just call GetAuthorization, which takes care of that.
ssa.scope.Inc("reused_authz", 1)
authz, err := ssa.GetAuthorization(ctx, pa.ID)
return &authz, err
} else {