558 lines
18 KiB
Go
558 lines
18 KiB
Go
// Package cmd provides utilities that underlie the specific commands.
|
|
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"expvar"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"log/syslog"
|
|
"net/http"
|
|
"net/http/pprof"
|
|
"os"
|
|
"os/signal"
|
|
"runtime"
|
|
"runtime/debug"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/go-logr/stdr"
|
|
"github.com/go-sql-driver/mysql"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"github.com/redis/go-redis/v9"
|
|
"go.opentelemetry.io/otel"
|
|
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
|
"go.opentelemetry.io/otel/propagation"
|
|
"go.opentelemetry.io/otel/sdk/resource"
|
|
"go.opentelemetry.io/otel/sdk/trace"
|
|
semconv "go.opentelemetry.io/otel/semconv/v1.25.0"
|
|
"google.golang.org/grpc/grpclog"
|
|
|
|
"github.com/letsencrypt/boulder/config"
|
|
"github.com/letsencrypt/boulder/core"
|
|
blog "github.com/letsencrypt/boulder/log"
|
|
"github.com/letsencrypt/boulder/strictyaml"
|
|
"github.com/letsencrypt/validator/v10"
|
|
)
|
|
|
|
// Because we don't know when this init will be called with respect to
|
|
// flag.Parse() and other flag definitions, we can't rely on the regular
|
|
// flag mechanism. But this one is fine.
|
|
func init() {
|
|
for _, v := range os.Args {
|
|
if v == "--version" || v == "-version" {
|
|
fmt.Println(VersionString())
|
|
os.Exit(0)
|
|
}
|
|
}
|
|
}
|
|
|
|
// mysqlLogger implements the mysql.Logger interface.
|
|
type mysqlLogger struct {
|
|
blog.Logger
|
|
}
|
|
|
|
func (m mysqlLogger) Print(v ...interface{}) {
|
|
m.AuditErrf("[mysql] %s", fmt.Sprint(v...))
|
|
}
|
|
|
|
// grpcLogger implements the grpclog.LoggerV2 interface.
|
|
type grpcLogger struct {
|
|
blog.Logger
|
|
}
|
|
|
|
// Ensure that fatal logs exit, because we use neither the gRPC default logger
|
|
// nor the stdlib default logger, both of which would call os.Exit(1) for us.
|
|
func (log grpcLogger) Fatal(args ...interface{}) {
|
|
log.Error(args...)
|
|
os.Exit(1)
|
|
}
|
|
func (log grpcLogger) Fatalf(format string, args ...interface{}) {
|
|
log.Errorf(format, args...)
|
|
os.Exit(1)
|
|
}
|
|
func (log grpcLogger) Fatalln(args ...interface{}) {
|
|
log.Errorln(args...)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Treat all gRPC error logs as potential audit events.
|
|
func (log grpcLogger) Error(args ...interface{}) {
|
|
log.Logger.AuditErr(fmt.Sprint(args...))
|
|
}
|
|
func (log grpcLogger) Errorf(format string, args ...interface{}) {
|
|
log.Logger.AuditErrf(format, args...)
|
|
}
|
|
func (log grpcLogger) Errorln(args ...interface{}) {
|
|
log.Logger.AuditErr(fmt.Sprintln(args...))
|
|
}
|
|
|
|
// Pass through most Warnings, but filter out a few noisy ones.
|
|
func (log grpcLogger) Warning(args ...interface{}) {
|
|
log.Logger.Warning(fmt.Sprint(args...))
|
|
}
|
|
func (log grpcLogger) Warningf(format string, args ...interface{}) {
|
|
log.Logger.Warningf(format, args...)
|
|
}
|
|
func (log grpcLogger) Warningln(args ...interface{}) {
|
|
msg := fmt.Sprintln(args...)
|
|
// See https://github.com/letsencrypt/boulder/issues/4628
|
|
if strings.Contains(msg, `ccResolverWrapper: error parsing service config: no JSON service config provided`) {
|
|
return
|
|
}
|
|
// See https://github.com/letsencrypt/boulder/issues/4379
|
|
if strings.Contains(msg, `Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"`) {
|
|
return
|
|
}
|
|
// Since we've already formatted the message, just pass through to .Warning()
|
|
log.Logger.Warning(msg)
|
|
}
|
|
|
|
// Don't log any INFO-level gRPC stuff. In practice this is all noise, like
|
|
// failed TXT lookups for service discovery (we only use A records).
|
|
func (log grpcLogger) Info(args ...interface{}) {}
|
|
func (log grpcLogger) Infof(format string, args ...interface{}) {}
|
|
func (log grpcLogger) Infoln(args ...interface{}) {}
|
|
|
|
// V returns true if the verbosity level l is less than the verbosity we want to
|
|
// log at.
|
|
func (log grpcLogger) V(l int) bool {
|
|
// We always return false. This causes gRPC to not log some things which are
|
|
// only logged conditionally if the logLevel is set below a certain value.
|
|
// TODO: Use the wrapped log.Logger.stdoutLevel and log.Logger.syslogLevel
|
|
// to determine a correct return value here.
|
|
return false
|
|
}
|
|
|
|
// promLogger implements the promhttp.Logger interface.
|
|
type promLogger struct {
|
|
blog.Logger
|
|
}
|
|
|
|
func (log promLogger) Println(args ...interface{}) {
|
|
log.AuditErr(fmt.Sprint(args...))
|
|
}
|
|
|
|
type redisLogger struct {
|
|
blog.Logger
|
|
}
|
|
|
|
func (rl redisLogger) Printf(ctx context.Context, format string, v ...interface{}) {
|
|
rl.Infof(format, v...)
|
|
}
|
|
|
|
// logWriter implements the io.Writer interface.
|
|
type logWriter struct {
|
|
blog.Logger
|
|
}
|
|
|
|
func (lw logWriter) Write(p []byte) (n int, err error) {
|
|
// Lines received by logWriter will always have a trailing newline.
|
|
lw.Logger.Info(strings.Trim(string(p), "\n"))
|
|
return
|
|
}
|
|
|
|
// logOutput implements the log.Logger interface's Output method for use with logr
|
|
type logOutput struct {
|
|
blog.Logger
|
|
}
|
|
|
|
func (l logOutput) Output(calldepth int, logline string) error {
|
|
l.Logger.Info(logline)
|
|
return nil
|
|
}
|
|
|
|
// StatsAndLogging sets up an AuditLogger, Prometheus Registerer, and
|
|
// OpenTelemetry tracing. It returns the Registerer and AuditLogger, along
|
|
// with a graceful shutdown function to be deferred.
|
|
//
|
|
// It spawns off an HTTP server on the provided port to report the stats and
|
|
// provide pprof profiling handlers.
|
|
//
|
|
// The constructed AuditLogger as the default logger, and configures the mysql
|
|
// and grpc packages to use our logger. This must be called before any gRPC code
|
|
// is called, because gRPC's SetLogger doesn't use any locking.
|
|
//
|
|
// This function does not return an error, and will panic on problems.
|
|
func StatsAndLogging(logConf SyslogConfig, otConf OpenTelemetryConfig, addr string) (prometheus.Registerer, blog.Logger, func(context.Context)) {
|
|
logger := NewLogger(logConf)
|
|
|
|
shutdown := NewOpenTelemetry(otConf, logger)
|
|
|
|
return newStatsRegistry(addr, logger), logger, shutdown
|
|
}
|
|
|
|
// NewLogger creates a logger object with the provided settings, sets it as
|
|
// the global logger, and returns it.
|
|
//
|
|
// It also sets the logging systems for various packages we use to go through
|
|
// the created logger, and sets up a periodic log event for the current timestamp.
|
|
func NewLogger(logConf SyslogConfig) blog.Logger {
|
|
var logger blog.Logger
|
|
if logConf.SyslogLevel >= 0 {
|
|
syslogger, err := syslog.Dial(
|
|
"",
|
|
"",
|
|
syslog.LOG_INFO, // default, not actually used
|
|
core.Command())
|
|
FailOnError(err, "Could not connect to Syslog")
|
|
syslogLevel := int(syslog.LOG_INFO)
|
|
if logConf.SyslogLevel != 0 {
|
|
syslogLevel = logConf.SyslogLevel
|
|
}
|
|
logger, err = blog.New(syslogger, logConf.StdoutLevel, syslogLevel)
|
|
FailOnError(err, "Could not connect to Syslog")
|
|
} else {
|
|
logger = blog.StdoutLogger(logConf.StdoutLevel)
|
|
}
|
|
|
|
_ = blog.Set(logger)
|
|
_ = mysql.SetLogger(mysqlLogger{logger})
|
|
grpclog.SetLoggerV2(grpcLogger{logger})
|
|
log.SetOutput(logWriter{logger})
|
|
redis.SetLogger(redisLogger{logger})
|
|
|
|
// Periodically log the current timestamp, to ensure syslog timestamps match
|
|
// Boulder's conception of time.
|
|
go func() {
|
|
for {
|
|
time.Sleep(time.Hour)
|
|
logger.Info(fmt.Sprintf("time=%s", time.Now().Format(time.RFC3339Nano)))
|
|
}
|
|
}()
|
|
return logger
|
|
}
|
|
|
|
func newVersionCollector() prometheus.Collector {
|
|
buildTime := core.Unspecified
|
|
if core.GetBuildTime() != core.Unspecified {
|
|
// core.BuildTime is set by our Makefile using the shell command 'date
|
|
// -u' which outputs in a consistent format across all POSIX systems.
|
|
bt, err := time.Parse(time.UnixDate, core.BuildTime)
|
|
if err != nil {
|
|
// Should never happen unless the Makefile is changed.
|
|
buildTime = "Unparsable"
|
|
} else {
|
|
buildTime = bt.Format(time.RFC3339)
|
|
}
|
|
}
|
|
return prometheus.NewGaugeFunc(
|
|
prometheus.GaugeOpts{
|
|
Name: "version",
|
|
Help: fmt.Sprintf(
|
|
"A metric with a constant value of '1' labeled by the short commit-id (buildId), build timestamp in RFC3339 format (buildTime), and Go release tag like 'go1.3' (goVersion) from which %s was built.",
|
|
core.Command(),
|
|
),
|
|
ConstLabels: prometheus.Labels{
|
|
"buildId": core.GetBuildID(),
|
|
"buildTime": buildTime,
|
|
"goVersion": runtime.Version(),
|
|
},
|
|
},
|
|
func() float64 { return 1 },
|
|
)
|
|
}
|
|
|
|
func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer {
|
|
registry := prometheus.NewRegistry()
|
|
|
|
if addr == "" {
|
|
logger.Info("No debug listen address specified")
|
|
return registry
|
|
}
|
|
|
|
registry.MustRegister(collectors.NewGoCollector())
|
|
registry.MustRegister(collectors.NewProcessCollector(
|
|
collectors.ProcessCollectorOpts{}))
|
|
registry.MustRegister(newVersionCollector())
|
|
|
|
mux := http.NewServeMux()
|
|
// Register the available pprof handlers. These are all registered on
|
|
// DefaultServeMux just by importing pprof, but since we eschew
|
|
// DefaultServeMux, we need to explicitly register them on our own mux.
|
|
mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
|
|
mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
|
|
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
|
|
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
|
|
// These handlers are defined in runtime/pprof instead of net/http/pprof, and
|
|
// have to be accessed through net/http/pprof's Handler func.
|
|
mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
|
|
mux.Handle("/debug/pprof/block", pprof.Handler("block"))
|
|
mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
|
|
mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
|
|
mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
|
|
|
|
mux.Handle("/debug/vars", expvar.Handler())
|
|
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
|
ErrorLog: promLogger{logger},
|
|
}))
|
|
|
|
logger.Infof("Debug server listening on %s", addr)
|
|
|
|
server := http.Server{
|
|
Addr: addr,
|
|
Handler: mux,
|
|
ReadTimeout: time.Minute,
|
|
}
|
|
go func() {
|
|
err := server.ListenAndServe()
|
|
if err != nil {
|
|
logger.Errf("unable to boot debug server on %s: %v", addr, err)
|
|
os.Exit(1)
|
|
}
|
|
}()
|
|
return registry
|
|
}
|
|
|
|
// NewOpenTelemetry sets up our OpenTelemetry tracing
|
|
// It returns a graceful shutdown function to be deferred.
|
|
func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx context.Context) {
|
|
otel.SetLogger(stdr.New(logOutput{logger}))
|
|
otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logger.Errf("OpenTelemetry error: %v", err) }))
|
|
|
|
resources := resource.NewWithAttributes(
|
|
semconv.SchemaURL,
|
|
semconv.ServiceName(core.Command()),
|
|
semconv.ServiceVersion(core.GetBuildID()),
|
|
semconv.ProcessPID(os.Getpid()),
|
|
)
|
|
|
|
opts := []trace.TracerProviderOption{
|
|
trace.WithResource(resources),
|
|
// Use a ParentBased sampler to respect the sample decisions on incoming
|
|
// traces, and TraceIDRatioBased to randomly sample new traces.
|
|
trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))),
|
|
}
|
|
|
|
if config.Endpoint != "" {
|
|
exporter, err := otlptracegrpc.New(
|
|
context.Background(),
|
|
otlptracegrpc.WithInsecure(),
|
|
otlptracegrpc.WithEndpoint(config.Endpoint))
|
|
if err != nil {
|
|
FailOnError(err, "Could not create OpenTelemetry OTLP exporter")
|
|
}
|
|
|
|
opts = append(opts, trace.WithBatcher(exporter))
|
|
}
|
|
|
|
tracerProvider := trace.NewTracerProvider(opts...)
|
|
otel.SetTracerProvider(tracerProvider)
|
|
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}))
|
|
|
|
return func(ctx context.Context) {
|
|
err := tracerProvider.Shutdown(ctx)
|
|
if err != nil {
|
|
logger.Errf("Error while shutting down OpenTelemetry: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// AuditPanic catches and logs panics, then exits with exit code 1.
|
|
// This method should be called in a defer statement as early as possible.
|
|
func AuditPanic() {
|
|
err := recover()
|
|
// No panic, no problem
|
|
if err == nil {
|
|
return
|
|
}
|
|
// Get the global logger if it's initialized, or create a default one if not.
|
|
// We could wind up creating a default logger if we panic so early in a process'
|
|
// lifetime that we haven't yet parsed the config and created a logger.
|
|
log := blog.Get()
|
|
// For the special type `failure`, audit log the message and exit quietly
|
|
fail, ok := err.(failure)
|
|
if ok {
|
|
log.AuditErr(fail.msg)
|
|
} else {
|
|
// For all other values passed to `panic`, log them and a stack trace
|
|
log.AuditErrf("Panic caused by err: %s", err)
|
|
|
|
log.AuditErrf("Stack Trace (Current goroutine) %s", debug.Stack())
|
|
}
|
|
// Because this function is deferred as early as possible, there's no further defers to run after this one
|
|
// So it is safe to os.Exit to set the exit code and exit without losing any defers we haven't executed.
|
|
os.Exit(1)
|
|
}
|
|
|
|
// failure is a sentinel type that `Fail` passes to `panic` so `AuditPanic` can exit
|
|
// quietly and print the msg.
|
|
type failure struct {
|
|
msg string
|
|
}
|
|
|
|
func (f failure) String() string {
|
|
return f.msg
|
|
}
|
|
|
|
// Fail raises a panic with a special type that causes `AuditPanic` to audit log the provided message
|
|
// and then exit nonzero (without printing a stack trace).
|
|
func Fail(msg string) {
|
|
panic(failure{msg})
|
|
}
|
|
|
|
// FailOnError calls Fail if the provided error is non-nil.
|
|
// This is useful for one-line error handling in top-level executables,
|
|
// but should generally be avoided in libraries. The message argument is optional.
|
|
func FailOnError(err error, msg string) {
|
|
if err == nil {
|
|
return
|
|
}
|
|
if msg == "" {
|
|
Fail(err.Error())
|
|
} else {
|
|
Fail(fmt.Sprintf("%s: %s", msg, err))
|
|
}
|
|
}
|
|
|
|
func decodeJSONStrict(in io.Reader, out interface{}) error {
|
|
decoder := json.NewDecoder(in)
|
|
decoder.DisallowUnknownFields()
|
|
|
|
return decoder.Decode(out)
|
|
}
|
|
|
|
// ReadConfigFile takes a file path as an argument and attempts to
|
|
// unmarshal the content of the file into a struct containing a
|
|
// configuration of a boulder component. Any config keys in the JSON
|
|
// file which do not correspond to expected keys in the config struct
|
|
// will result in errors.
|
|
func ReadConfigFile(filename string, out interface{}) error {
|
|
file, err := os.Open(filename)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
return decodeJSONStrict(file, out)
|
|
}
|
|
|
|
// ValidateJSONConfig takes a *ConfigValidator and an io.Reader containing a
|
|
// JSON representation of a config. The JSON data is unmarshaled into the
|
|
// *ConfigValidator's inner Config and then validated according to the
|
|
// 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
|
|
// to get a *ConfigValidator for a given Boulder component. This is exported for
|
|
// use in SRE CI tooling.
|
|
func ValidateJSONConfig(cv *ConfigValidator, in io.Reader) error {
|
|
if cv == nil {
|
|
return errors.New("config validator cannot be nil")
|
|
}
|
|
|
|
// Initialize the validator and load any custom tags.
|
|
validate := validator.New()
|
|
for tag, v := range cv.Validators {
|
|
err := validate.RegisterValidation(tag, v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Register custom types for use with existing validation tags.
|
|
validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{})
|
|
|
|
err := decodeJSONStrict(in, cv.Config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = validate.Struct(cv.Config)
|
|
if err != nil {
|
|
errs, ok := err.(validator.ValidationErrors)
|
|
if !ok {
|
|
// This should never happen.
|
|
return err
|
|
}
|
|
if len(errs) > 0 {
|
|
allErrs := []string{}
|
|
for _, e := range errs {
|
|
allErrs = append(allErrs, e.Error())
|
|
}
|
|
return errors.New(strings.Join(allErrs, ", "))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ValidateYAMLConfig takes a *ConfigValidator and an io.Reader containing a
|
|
// YAML representation of a config. The YAML data is unmarshaled into the
|
|
// *ConfigValidator's inner Config and then validated according to the
|
|
// 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
|
|
// to get a *ConfigValidator for a given Boulder component. This is exported for
|
|
// use in SRE CI tooling.
|
|
func ValidateYAMLConfig(cv *ConfigValidator, in io.Reader) error {
|
|
if cv == nil {
|
|
return errors.New("config validator cannot be nil")
|
|
}
|
|
|
|
// Initialize the validator and load any custom tags.
|
|
validate := validator.New()
|
|
for tag, v := range cv.Validators {
|
|
err := validate.RegisterValidation(tag, v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Register custom types for use with existing validation tags.
|
|
validate.RegisterCustomTypeFunc(config.DurationCustomTypeFunc, config.Duration{})
|
|
|
|
inBytes, err := io.ReadAll(in)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = strictyaml.Unmarshal(inBytes, cv.Config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = validate.Struct(cv.Config)
|
|
if err != nil {
|
|
errs, ok := err.(validator.ValidationErrors)
|
|
if !ok {
|
|
// This should never happen.
|
|
return err
|
|
}
|
|
if len(errs) > 0 {
|
|
allErrs := []string{}
|
|
for _, e := range errs {
|
|
allErrs = append(allErrs, e.Error())
|
|
}
|
|
return errors.New(strings.Join(allErrs, ", "))
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// VersionString produces a friendly Application version string.
|
|
func VersionString() string {
|
|
return fmt.Sprintf("Versions: %s=(%s %s) Golang=(%s) BuildHost=(%s)", core.Command(), core.GetBuildID(), core.GetBuildTime(), runtime.Version(), core.GetBuildHost())
|
|
}
|
|
|
|
// CatchSignals blocks until a SIGTERM, SIGINT, or SIGHUP is received, then
|
|
// executes the given callback. The callback should not block, it should simply
|
|
// signal other goroutines (particularly the main goroutine) to clean themselves
|
|
// up and exit. This function is intended to be called in its own goroutine,
|
|
// while the main goroutine waits for an indication that the other goroutines
|
|
// have exited cleanly.
|
|
func CatchSignals(callback func()) {
|
|
WaitForSignal()
|
|
callback()
|
|
}
|
|
|
|
// WaitForSignal blocks until a SIGTERM, SIGINT, or SIGHUP is received. It then
|
|
// returns, allowing execution to resume, generally allowing a main() function
|
|
// to return and trigger and deferred cleanup functions. This function is
|
|
// intended to be called directly from the main goroutine, while a gRPC or HTTP
|
|
// server runs in a background goroutine.
|
|
func WaitForSignal() {
|
|
sigChan := make(chan os.Signal, 1)
|
|
signal.Notify(sigChan, syscall.SIGTERM)
|
|
signal.Notify(sigChan, syscall.SIGINT)
|
|
signal.Notify(sigChan, syscall.SIGHUP)
|
|
<-sigChan
|
|
}
|