Remove global state from metrics gathering (#3167)

Previously, we used prometheus.DefaultRegisterer to register our stats, which uses global state to export its HTTP stats. We also used net/http/pprof's behavior of registering to the default global HTTP ServeMux, via DebugServer, which starts an HTTP server that uses that global ServeMux.

In this change, I merge DebugServer's functions into StatsAndLogging. StatsAndLogging now takes an address parameter and fires off an HTTP server in a goroutine. That HTTP server is newly defined, and doesn't use DefaultServeMux. On it is registered the Prometheus stats handler, and handlers for the various pprof traces. In the process I split StatsAndLogging internally into two functions: makeStats and MakeLogger. I didn't port across the expvar variable exporting, which serves a similar function to Prometheus stats but which we never use.

One nice immediate effect of this change: Since StatsAndLogging now requires and address, I noticed a bunch of commands that called StatsAndLogging, and passed around the resulting Scope, but never made use of it because they didn't run a DebugServer. Under the old StatsD world, these command still could have exported their stats by pushing, but since we moved to Prometheus their stats stopped being collected. We haven't used any of these stats, so instead of adding debug ports to all short-lived commands, or setting up a push gateway, I simply removed them and switched those commands to initialize only a Logger, no stats.
This commit is contained in:
Jacob Hoffman-Andrews 2017-10-13 11:58:01 -07:00 committed by Roland Bracewell Shoemaker
parent 0a72f768a7
commit f366e45756
18 changed files with 88 additions and 95 deletions

View File

@ -60,8 +60,8 @@ type config struct {
Syslog cmd.SyslogConfig
}
func setupContext(c config) (core.RegistrationAuthority, blog.Logger, *gorp.DbMap, core.StorageAuthority, metrics.Scope) {
scope, logger := cmd.StatsAndLogging(c.Syslog)
func setupContext(c config) (core.RegistrationAuthority, blog.Logger, *gorp.DbMap, core.StorageAuthority) {
logger := cmd.NewLogger(c.Syslog)
var tls *tls.Config
var err error
@ -70,7 +70,7 @@ func setupContext(c config) (core.RegistrationAuthority, blog.Logger, *gorp.DbMa
cmd.FailOnError(err, "TLS config")
}
raConn, err := bgrpc.ClientSetup(c.Revoker.RAService, tls, scope)
raConn, err := bgrpc.ClientSetup(c.Revoker.RAService, tls, metrics.NewNoopScope())
cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to RA")
rac := bgrpc.NewRegistrationAuthorityClient(rapb.NewRegistrationAuthorityClient(raConn))
@ -78,13 +78,12 @@ func setupContext(c config) (core.RegistrationAuthority, blog.Logger, *gorp.DbMa
cmd.FailOnError(err, "Couldn't load DB URL")
dbMap, err := sa.NewDbMap(dbURL, c.Revoker.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Couldn't setup database connection")
go sa.ReportDbConnCount(dbMap, scope)
saConn, err := bgrpc.ClientSetup(c.Revoker.SAService, tls, scope)
saConn, err := bgrpc.ClientSetup(c.Revoker.SAService, tls, metrics.NewNoopScope())
cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to SA")
sac := bgrpc.NewStorageAuthorityClient(sapb.NewStorageAuthorityClient(saConn))
return rac, logger, dbMap, sac, scope
return rac, logger, dbMap, sac
}
func revokeBySerial(ctx context.Context, serial string, reasonCode revocation.Reason, rac core.RegistrationAuthority, logger blog.Logger, tx *gorp.Transaction) (err error) {
@ -172,7 +171,7 @@ func main() {
reasonCode, err := strconv.Atoi(args[1])
cmd.FailOnError(err, "Reason code argument must be an integer")
rac, logger, dbMap, _, _ := setupContext(c)
rac, logger, dbMap, _ := setupContext(c)
tx, err := dbMap.Begin()
if err != nil {
@ -194,7 +193,7 @@ func main() {
reasonCode, err := strconv.Atoi(args[1])
cmd.FailOnError(err, "Reason code argument must be an integer")
rac, logger, dbMap, sac, _ := setupContext(c)
rac, logger, dbMap, sac := setupContext(c)
defer logger.AuditPanic()
tx, err := dbMap.Begin()
@ -228,7 +227,7 @@ func main() {
case command == "auth-revoke" && len(args) == 1:
domain := args[0]
_, logger, _, sac, stats := setupContext(c)
_, logger, _, sac := setupContext(c)
ident := core.AcmeIdentifier{Value: domain, Type: core.IdentifierDNS}
authsRevoked, pendingAuthsRevoked, err := sac.RevokeAuthorizationsByDomain(ctx, ident)
cmd.FailOnError(err, fmt.Sprintf("Failed to revoke authorizations for %s", ident.Value))
@ -237,8 +236,6 @@ func main() {
pendingAuthsRevoked,
authsRevoked,
))
stats.Inc("RevokedAuthorizations", authsRevoked)
stats.Inc("RevokedPendingAuthorizations", pendingAuthsRevoked)
default:
usage()

View File

@ -135,7 +135,7 @@ func main() {
err = features.Set(c.CA.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.CA.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -211,7 +211,5 @@ func main() {
}
})
go cmd.DebugServer(c.CA.DebugAddr)
select {}
}

View File

@ -49,7 +49,7 @@ func main() {
err = features.Set(c.Publisher.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.Publisher.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -107,7 +107,5 @@ func main() {
}
})
go cmd.DebugServer(c.Publisher.DebugAddr)
select {}
}

View File

@ -103,7 +103,7 @@ func main() {
err = features.Set(c.RA.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.RA.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -233,7 +233,5 @@ func main() {
}
})
go cmd.DebugServer(c.RA.DebugAddr)
select {}
}

View File

@ -43,7 +43,7 @@ func main() {
err = features.Set(c.SA.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.SA.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -85,7 +85,5 @@ func main() {
}
})
go cmd.DebugServer(c.SA.DebugAddr)
select {}
}

View File

@ -66,7 +66,7 @@ func main() {
err = features.Set(c.VA.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.VA.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -159,7 +159,5 @@ func main() {
}
})
go cmd.DebugServer(c.VA.DebugAddr)
select {}
}

View File

@ -98,7 +98,7 @@ func main() {
err = features.Set(c.WFE.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.WFE.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -140,8 +140,6 @@ func main() {
Handler: wfe.Handler(),
}
go cmd.DebugServer(c.WFE.DebugAddr)
hd := &httpdown.HTTP{
StopTimeout: c.WFE.ShutdownStopTimeout.Duration,
KillTimeout: c.WFE.ShutdownKillTimeout.Duration,

View File

@ -98,7 +98,7 @@ func main() {
err = features.Set(c.WFE.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.WFE.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -140,8 +140,6 @@ func main() {
Handler: wfe.Handler(),
}
go cmd.DebugServer(c.WFE.DebugAddr)
hd := &httpdown.HTTP{
StopTimeout: c.WFE.ShutdownStopTimeout.Duration,
KillTimeout: c.WFE.ShutdownKillTimeout.Duration,

View File

@ -453,7 +453,7 @@ func main() {
err = features.Set(c.Mailer.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.Mailer.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -547,8 +547,6 @@ func main() {
stats: initStats(scope),
}
go cmd.DebugServer(c.Mailer.DebugAddr)
if *daemon {
if c.Mailer.Frequency.Duration == 0 {
fmt.Fprintln(os.Stderr, "mailer.runPeriod is not set")

View File

@ -18,7 +18,6 @@ import (
"github.com/letsencrypt/boulder/cmd"
"github.com/letsencrypt/boulder/features"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/sa"
)
@ -37,10 +36,9 @@ type eapConfig struct {
}
type expiredAuthzPurger struct {
stats metrics.Scope
log blog.Logger
clk clock.Clock
db *gorp.DbMap
log blog.Logger
clk clock.Clock
db *gorp.DbMap
batchSize int64
}
@ -170,8 +168,7 @@ func main() {
err = features.Set(config.ExpiredAuthzPurger.Features)
cmd.FailOnError(err, "Failed to set feature flags")
// Set up logging
scope, auditlogger := cmd.StatsAndLogging(config.ExpiredAuthzPurger.Syslog)
auditlogger := cmd.NewLogger(config.ExpiredAuthzPurger.Syslog)
auditlogger.Info(cmd.VersionString())
defer auditlogger.AuditPanic()
@ -181,10 +178,8 @@ func main() {
cmd.FailOnError(err, "Couldn't load DB URL")
dbMap, err := sa.NewDbMap(dbURL, config.ExpiredAuthzPurger.DBConfig.MaxDBConns)
cmd.FailOnError(err, "Could not connect to database")
go sa.ReportDbConnCount(dbMap, scope)
purger := &expiredAuthzPurger{
stats: scope,
log: auditlogger,
clk: cmd.Clock(),
db: dbMap,

View File

@ -30,9 +30,8 @@ func TestPurgeAuthzs(t *testing.T) {
}
cleanUp := test.ResetSATestDatabase(t)
defer cleanUp()
stats := metrics.NewNoopScope()
p := expiredAuthzPurger{stats, log, fc, dbMap, 1}
p := expiredAuthzPurger{log, fc, dbMap, 1}
err = p.purgeAuthzs(time.Time{}, true, 10)
test.AssertNotError(t, err, "purgeAuthzs failed")

View File

@ -166,7 +166,7 @@ func main() {
os.Exit(1)
}
_, log := cmd.StatsAndLogging(cmd.SyslogConfig{StdoutLevel: 7})
log := cmd.NewLogger(cmd.SyslogConfig{StdoutLevel: 7})
configData, err := ioutil.ReadFile(*configFile)
cmd.FailOnError(err, fmt.Sprintf("Reading %q", *configFile))

View File

@ -16,6 +16,7 @@ import (
"github.com/letsencrypt/boulder/features"
blog "github.com/letsencrypt/boulder/log"
bmail "github.com/letsencrypt/boulder/mail"
"github.com/letsencrypt/boulder/metrics"
"github.com/letsencrypt/boulder/sa"
)
@ -313,14 +314,13 @@ func main() {
err = features.Set(cfg.NotifyMailer.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, log := cmd.StatsAndLogging(cfg.Syslog)
log := cmd.NewLogger(cfg.Syslog)
defer log.AuditPanic()
dbURL, err := cfg.NotifyMailer.DBConfig.URL()
cmd.FailOnError(err, "Couldn't load DB URL")
dbMap, err := sa.NewDbMap(dbURL, 10)
cmd.FailOnError(err, "Could not connect to database")
go sa.ReportDbConnCount(dbMap, scope)
// Load email body
body, err := ioutil.ReadFile(*bodyFile)
@ -350,7 +350,7 @@ func main() {
smtpPassword,
*address,
log,
scope,
metrics.NewNoopScope(),
*reconnBase,
*reconnMax)
}

View File

@ -170,7 +170,7 @@ as generated by Boulder's single-ocsp command.
err = features.Set(c.OCSPResponder.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, c.OCSPResponder.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -214,8 +214,6 @@ as generated by Boulder's single-ocsp command.
Handler: m,
}
go cmd.DebugServer(c.OCSPResponder.DebugAddr)
hd := &httpdown.HTTP{
StopTimeout: stopTimeout,
KillTimeout: killTimeout,

View File

@ -757,7 +757,7 @@ func main() {
err = features.Set(conf.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(c.Syslog)
scope, logger := cmd.StatsAndLogging(c.Syslog, conf.DebugAddr)
defer logger.AuditPanic()
logger.Info(cmd.VersionString())
@ -796,7 +796,6 @@ func main() {
}
go cmd.CatchSignals(logger, nil)
go cmd.DebugServer(conf.DebugAddr)
// Sleep forever (until signaled)
select {}

View File

@ -117,7 +117,7 @@ func parseLogLine(sa certificateStorage, logger blog.Logger, line string) (found
return true, true
}
func setup(configFile string) (metrics.Scope, blog.Logger, core.StorageAuthority) {
func setup(configFile string) (blog.Logger, core.StorageAuthority) {
configJSON, err := ioutil.ReadFile(configFile)
cmd.FailOnError(err, "Failed to read config file")
var conf config
@ -125,7 +125,7 @@ func setup(configFile string) (metrics.Scope, blog.Logger, core.StorageAuthority
cmd.FailOnError(err, "Failed to parse config file")
err = features.Set(conf.Features)
cmd.FailOnError(err, "Failed to set feature flags")
scope, logger := cmd.StatsAndLogging(conf.Syslog)
logger := cmd.NewLogger(conf.Syslog)
var tls *tls.Config
if conf.TLS.CertFile != nil {
@ -133,10 +133,10 @@ func setup(configFile string) (metrics.Scope, blog.Logger, core.StorageAuthority
cmd.FailOnError(err, "TLS config")
}
conn, err := bgrpc.ClientSetup(conf.SAService, tls, scope)
conn, err := bgrpc.ClientSetup(conf.SAService, tls, metrics.NewNoopScope())
cmd.FailOnError(err, "Failed to load credentials and create gRPC connection to SA")
sac := bgrpc.NewStorageAuthorityClient(sapb.NewStorageAuthorityClient(conn))
return scope, logger, sac
return logger, sac
}
func main() {
@ -166,7 +166,7 @@ func main() {
switch command {
case "parse-ca-log":
stats, logger, sa := setup(*configFile)
logger, sa := setup(*configFile)
if *logPath == "" {
usage()
}
@ -186,13 +186,10 @@ func main() {
}
}
logger.Info(fmt.Sprintf("Found %d orphans and added %d to the database\n", orphansFound, orphansAdded))
stats.Inc("Found", orphansFound)
stats.Inc("Added", orphansAdded)
stats.Inc("AddingFailed", orphansFound-orphansAdded)
case "parse-der":
ctx := context.Background()
_, _, sa := setup(*configFile)
_, sa := setup(*configFile)
if *derPath == "" || *regID == 0 {
usage()
}

View File

@ -20,14 +20,12 @@ import (
"encoding/json"
"encoding/pem"
"errors"
"expvar" // For DebugServer, below.
"fmt"
"io/ioutil"
"log"
"log/syslog"
"net"
"net/http"
_ "net/http/pprof" // HTTP performance profiling, added transparently to HTTP APIs
"net/http/pprof"
"os"
"os/signal"
"path"
@ -43,7 +41,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/letsencrypt/boulder/core"
"github.com/letsencrypt/boulder/features"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
)
@ -109,15 +106,29 @@ func (log grpcLogger) Println(args ...interface{}) {
log.AuditErr(fmt.Sprintln(args...))
}
type promLogger struct {
blog.Logger
}
func (log promLogger) Println(args ...interface{}) {
log.AuditErr(fmt.Sprintln(args...))
}
// StatsAndLogging constructs a metrics.Scope and an AuditLogger based on its config
// parameters, and return them both. Crashes if any setup fails.
// parameters, and return them both. It also spawns off an HTTP server on the
// provided port to report the stats and provide pprof profiling handlers.
// Crashes if any setup fails.
// Also sets the constructed AuditLogger as the default logger, and configures
// the cfssl, mysql, and grpc packages to use our logger.
// This must be called before any gRPC code is called, because gRPC's SetLogger
// doesn't use any locking.
func StatsAndLogging(logConf SyslogConfig) (metrics.Scope, blog.Logger) {
scope := metrics.NewPromScope(prometheus.DefaultRegisterer)
func StatsAndLogging(logConf SyslogConfig, addr string) (metrics.Scope, blog.Logger) {
logger := NewLogger(logConf)
scope := newScope(addr, logger)
return scope, logger
}
func NewLogger(logConf SyslogConfig) blog.Logger {
tag := path.Base(os.Args[0])
syslogger, err := syslog.Dial(
"",
@ -136,8 +147,43 @@ func StatsAndLogging(logConf SyslogConfig) (metrics.Scope, blog.Logger) {
cfsslLog.SetLogger(cfsslLogger{logger})
_ = mysql.SetLogger(mysqlLogger{logger})
grpclog.SetLogger(grpcLogger{logger})
return logger
}
return scope, logger
func newScope(addr string, logger blog.Logger) metrics.Scope {
registry := prometheus.NewRegistry()
registry.MustRegister(prometheus.NewGoCollector())
registry.MustRegister(prometheus.NewProcessCollector(os.Getpid(), "boulder"))
mux := http.NewServeMux()
// Register each of the available pprof handlers. These are all registered on
// DefaultServeMux just by importing pprof, but since we eschew
// DefaultServeMux, we need to explicitly register them on our own mux.
reg := func(name string) {
mux.Handle("/debug/pprof/"+name, pprof.Handler(name))
}
reg("block")
reg("goroutine")
reg("heap")
reg("mutex")
reg("profile")
reg("threadcreate")
reg("trace")
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
ErrorLog: promLogger{logger},
}))
server := http.Server{
Addr: addr,
Handler: mux,
}
go func() {
err := server.ListenAndServe()
if err != nil {
log.Fatalf("unable to boot debug server on %s: %v", addr, err)
}
}()
return metrics.NewPromScope(registry)
}
// FailOnError exits and prints an error message if we encountered a problem
@ -172,28 +218,6 @@ func LoadCert(path string) (cert []byte, err error) {
return
}
// DebugServer starts a server to receive debug information. Typical
// usage is to start it in a goroutine, configured with an address
// from the appropriate configuration object:
//
// go cmd.DebugServer(c.XA.DebugAddr)
func DebugServer(addr string) {
m := expvar.NewMap("enabled-features")
features.Export(m)
if addr == "" {
log.Fatalf("unable to boot debug server because no address was given for it. Set debugAddr.")
}
ln, err := net.Listen("tcp", addr)
if err != nil {
log.Fatalf("unable to boot debug server on %#v", addr)
}
http.Handle("/metrics", promhttp.Handler())
err = http.Serve(ln, nil)
if err != nil {
log.Fatalf("unable to boot debug server: %v", err)
}
}
// ReadConfigFile takes a file path as an argument and attempts to
// unmarshal the content of the file into a struct containing a
// configuration of a boulder component.

View File

@ -99,7 +99,7 @@ func main() {
limit = int(c.BatchSize)
}
_, log := cmd.StatsAndLogging(c.SyslogConfig)
log := cmd.NewLogger(c.SyslogConfig)
moreThan, err := time.Parse(time.RFC3339, c.IssuedMoreThan)
cmd.FailOnError(err, fmt.Sprintf("failed to parse issuedMoreThan %q", c.IssuedMoreThan))