boulder/cmd/shell.go

512 lines
16 KiB
Go

// Package cmd provides utilities that underlie the specific commands.
package cmd
import (
"context"
"encoding/json"
"errors"
"expvar"
"fmt"
"io"
"log"
"log/syslog"
"net/http"
"net/http/pprof"
"os"
"os/signal"
"runtime"
"strings"
"syscall"
"time"
"github.com/go-logr/stdr"
"github.com/go-redis/redis/v8"
"github.com/go-sql-driver/mysql"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
"google.golang.org/grpc/grpclog"
"github.com/letsencrypt/boulder/core"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/strictyaml"
"github.com/letsencrypt/validator/v10"
)
// Because we don't know when this init will be called with respect to
// flag.Parse() and other flag definitions, we can't rely on the regular
// flag mechanism. But this one is fine.
func init() {
for _, v := range os.Args {
if v == "--version" || v == "-version" {
fmt.Println(VersionString())
os.Exit(0)
}
}
}
// mysqlLogger implements the mysql.Logger interface.
type mysqlLogger struct {
blog.Logger
}
func (m mysqlLogger) Print(v ...interface{}) {
m.AuditErrf("[mysql] %s", fmt.Sprint(v...))
}
// grpcLogger implements the grpclog.LoggerV2 interface.
type grpcLogger struct {
blog.Logger
}
// Ensure that fatal logs exit, because we use neither the gRPC default logger
// nor the stdlib default logger, both of which would call os.Exit(1) for us.
func (log grpcLogger) Fatal(args ...interface{}) {
log.Error(args...)
os.Exit(1)
}
func (log grpcLogger) Fatalf(format string, args ...interface{}) {
log.Errorf(format, args...)
os.Exit(1)
}
func (log grpcLogger) Fatalln(args ...interface{}) {
log.Errorln(args...)
os.Exit(1)
}
// Treat all gRPC error logs as potential audit events.
func (log grpcLogger) Error(args ...interface{}) {
log.Logger.AuditErr(fmt.Sprint(args...))
}
func (log grpcLogger) Errorf(format string, args ...interface{}) {
log.Logger.AuditErrf(format, args...)
}
func (log grpcLogger) Errorln(args ...interface{}) {
log.Logger.AuditErr(fmt.Sprintln(args...))
}
// Pass through most Warnings, but filter out a few noisy ones.
func (log grpcLogger) Warning(args ...interface{}) {
log.Logger.Warning(fmt.Sprint(args...))
}
func (log grpcLogger) Warningf(format string, args ...interface{}) {
log.Logger.Warningf(format, args...)
}
func (log grpcLogger) Warningln(args ...interface{}) {
msg := fmt.Sprintln(args...)
// See https://github.com/letsencrypt/boulder/issues/4628
if strings.Contains(msg, `ccResolverWrapper: error parsing service config: no JSON service config provided`) {
return
}
// See https://github.com/letsencrypt/boulder/issues/4379
if strings.Contains(msg, `Server.processUnaryRPC failed to write status: connection error: desc = "transport is closing"`) {
return
}
// Since we've already formatted the message, just pass through to .Warning()
log.Logger.Warning(msg)
}
// Don't log any INFO-level gRPC stuff. In practice this is all noise, like
// failed TXT lookups for service discovery (we only use A records).
func (log grpcLogger) Info(args ...interface{}) {}
func (log grpcLogger) Infof(format string, args ...interface{}) {}
func (log grpcLogger) Infoln(args ...interface{}) {}
// V returns true if the verbosity level l is less than the verbosity we want to
// log at.
func (log grpcLogger) V(l int) bool {
// We always return false. This causes gRPC to not log some things which are
// only logged conditionally if the logLevel is set below a certain value.
// TODO: Use the wrapped log.Logger.stdoutLevel and log.Logger.syslogLevel
// to determine a correct return value here.
return false
}
// promLogger implements the promhttp.Logger interface.
type promLogger struct {
blog.Logger
}
func (log promLogger) Println(args ...interface{}) {
log.AuditErr(fmt.Sprint(args...))
}
type redisLogger struct {
blog.Logger
}
func (rl redisLogger) Printf(ctx context.Context, format string, v ...interface{}) {
rl.Infof(format, v...)
}
// logWriter implements the io.Writer interface.
type logWriter struct {
blog.Logger
}
func (lw logWriter) Write(p []byte) (n int, err error) {
// Lines received by logWriter will always have a trailing newline.
lw.Logger.Info(strings.Trim(string(p), "\n"))
return
}
// logOutput implements the log.Logger interface's Output method for use with logr
type logOutput struct {
blog.Logger
}
func (l logOutput) Output(calldepth int, logline string) error {
l.Logger.Info(logline)
return nil
}
// StatsAndLogging sets up an AuditLogger, Prometheus Registerer, and
// OpenTelemetry tracing. It returns the Registerer and AuditLogger, along
// with a graceful shutdown function to be deferred.
//
// It spawns off an HTTP server on the provided port to report the stats and
// provide pprof profiling handlers.
//
// The constructed AuditLogger as the default logger, and configures the mysql
// and grpc packages to use our logger. This must be called before any gRPC code
// is called, because gRPC's SetLogger doesn't use any locking.
//
// This function does not return an error, and will panic on problems.
func StatsAndLogging(logConf SyslogConfig, otConf OpenTelemetryConfig, addr string) (prometheus.Registerer, blog.Logger, func(context.Context)) {
logger := NewLogger(logConf)
shutdown := NewOpenTelemetry(otConf, logger)
return newStatsRegistry(addr, logger), logger, shutdown
}
func NewLogger(logConf SyslogConfig) blog.Logger {
var logger blog.Logger
if logConf.SyslogLevel >= 0 {
syslogger, err := syslog.Dial(
"",
"",
syslog.LOG_INFO, // default, not actually used
core.Command())
FailOnError(err, "Could not connect to Syslog")
syslogLevel := int(syslog.LOG_INFO)
if logConf.SyslogLevel != 0 {
syslogLevel = logConf.SyslogLevel
}
logger, err = blog.New(syslogger, logConf.StdoutLevel, syslogLevel)
FailOnError(err, "Could not connect to Syslog")
} else {
logger = blog.StdoutLogger(logConf.StdoutLevel)
}
_ = blog.Set(logger)
_ = mysql.SetLogger(mysqlLogger{logger})
grpclog.SetLoggerV2(grpcLogger{logger})
log.SetOutput(logWriter{logger})
redis.SetLogger(redisLogger{logger})
// Periodically log the current timestamp, to ensure syslog timestamps match
// Boulder's conception of time.
go func() {
for {
time.Sleep(time.Minute)
logger.Info(fmt.Sprintf("time=%s", time.Now().Format(time.RFC3339Nano)))
}
}()
return logger
}
func newVersionCollector() prometheus.Collector {
buildTime := core.Unspecified
if core.GetBuildTime() != core.Unspecified {
// core.BuildTime is set by our Makefile using the shell command 'date
// -u' which outputs in a consistent format across all POSIX systems.
bt, err := time.Parse(time.UnixDate, core.BuildTime)
if err != nil {
// Should never happen unless the Makefile is changed.
buildTime = "Unparsable"
} else {
buildTime = bt.Format(time.RFC3339)
}
}
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "version",
Help: fmt.Sprintf(
"A metric with a constant value of '1' labeled by the short commit-id (buildId), build timestamp in RFC3339 format (buildTime), and Go release tag like 'go1.3' (goVersion) from which %s was built.",
core.Command(),
),
ConstLabels: prometheus.Labels{
"buildId": core.GetBuildID(),
"buildTime": buildTime,
"goVersion": runtime.Version(),
},
},
func() float64 { return 1 },
)
}
func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer {
registry := prometheus.NewRegistry()
registry.MustRegister(collectors.NewGoCollector())
registry.MustRegister(collectors.NewProcessCollector(
collectors.ProcessCollectorOpts{}))
registry.MustRegister(newVersionCollector())
mux := http.NewServeMux()
// Register the available pprof handlers. These are all registered on
// DefaultServeMux just by importing pprof, but since we eschew
// DefaultServeMux, we need to explicitly register them on our own mux.
mux.Handle("/debug/pprof/", http.HandlerFunc(pprof.Index))
mux.Handle("/debug/pprof/profile", http.HandlerFunc(pprof.Profile))
mux.Handle("/debug/pprof/symbol", http.HandlerFunc(pprof.Symbol))
mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
// These handlers are defined in runtime/pprof instead of net/http/pprof, and
// have to be accessed through net/http/pprof's Handler func.
mux.Handle("/debug/pprof/goroutine", pprof.Handler("goroutine"))
mux.Handle("/debug/pprof/block", pprof.Handler("block"))
mux.Handle("/debug/pprof/heap", pprof.Handler("heap"))
mux.Handle("/debug/pprof/mutex", pprof.Handler("mutex"))
mux.Handle("/debug/pprof/threadcreate", pprof.Handler("threadcreate"))
mux.Handle("/debug/vars", expvar.Handler())
mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{
ErrorLog: promLogger{logger},
}))
server := http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: time.Minute,
}
go func() {
err := server.ListenAndServe()
if err != nil {
logger.Errf("unable to boot debug server on %s: %v", addr, err)
os.Exit(1)
}
}()
return registry
}
// NewOpenTelemetry sets up our OpenTelemetry tracing
// It returns a graceful shutdown function to be deferred.
func NewOpenTelemetry(config OpenTelemetryConfig, logger blog.Logger) func(ctx context.Context) {
otel.SetLogger(stdr.New(logOutput{logger}))
otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { logger.Errf("OpenTelemetry error: %v", err) }))
r, err := resource.Merge(
resource.Default(),
resource.NewWithAttributes(
semconv.SchemaURL,
semconv.ServiceNameKey.String(core.Command()),
semconv.ServiceVersionKey.String(core.GetBuildID()),
),
)
if err != nil {
FailOnError(err, "Could not create OpenTelemetry resource")
}
opts := []trace.TracerProviderOption{
trace.WithResource(r),
// Use a ParentBased sampler to respect the sample decisions on incoming
// traces, and TraceIDRatioBased to randomly sample new traces.
trace.WithSampler(trace.ParentBased(trace.TraceIDRatioBased(config.SampleRatio))),
}
if config.Endpoint != "" {
exporter, err := otlptracegrpc.New(
context.Background(),
otlptracegrpc.WithInsecure(),
otlptracegrpc.WithEndpoint(config.Endpoint))
if err != nil {
FailOnError(err, "Could not create OpenTelemetry OTLP exporter")
}
opts = append(opts, trace.WithBatcher(exporter))
}
tracerProvider := trace.NewTracerProvider(opts...)
otel.SetTracerProvider(tracerProvider)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}))
return func(ctx context.Context) {
err := tracerProvider.Shutdown(ctx)
if err != nil {
logger.Errf("Error while shutting down OpenTelemetry: %v", err)
}
}
}
// Fail prints a message to the audit log, then panics, causing the process to exit but
// allowing deferred cleanup functions to run on the way out.
func Fail(msg string) {
logger := blog.Get()
logger.AuditErr(msg)
panic(msg)
}
// FailOnError prints an error message and panics, but only if the provided
// error is actually non-nil. This is useful for one-line error handling in
// top-level executables, but should generally be avoided in libraries. The
// message argument is optional.
func FailOnError(err error, msg string) {
if err == nil {
return
}
if msg == "" {
Fail(err.Error())
} else {
Fail(fmt.Sprintf("%s: %s", msg, err))
}
}
func decodeJSONStrict(in io.Reader, out interface{}) error {
decoder := json.NewDecoder(in)
decoder.DisallowUnknownFields()
return decoder.Decode(out)
}
// ReadConfigFile takes a file path as an argument and attempts to
// unmarshal the content of the file into a struct containing a
// configuration of a boulder component. Any config keys in the JSON
// file which do not correspond to expected keys in the config struct
// will result in errors.
func ReadConfigFile(filename string, out interface{}) error {
file, err := os.Open(filename)
if err != nil {
return err
}
defer file.Close()
return decodeJSONStrict(file, out)
}
// ValidateJSONConfig takes a *ConfigValidator and an io.Reader containing a
// JSON representation of a config. The JSON data is unmarshaled into the
// *ConfigValidator's inner Config and then validated according to the
// 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
// to get a *ConfigValidator for a given Boulder component. This is exported for
// use in SRE CI tooling.
func ValidateJSONConfig(cv *ConfigValidator, in io.Reader) error {
if cv == nil {
return errors.New("config validator cannot be nil")
}
// Initialize the validator and load any custom tags.
validate := validator.New()
if cv.Validators != nil {
for tag, v := range cv.Validators {
err := validate.RegisterValidation(tag, v)
if err != nil {
return err
}
}
}
err := decodeJSONStrict(in, cv.Config)
if err != nil {
return err
}
err = validate.Struct(cv.Config)
if err != nil {
errs, ok := err.(validator.ValidationErrors)
if !ok {
// This should never happen.
return err
}
if len(errs) > 0 {
allErrs := []string{}
for _, e := range errs {
allErrs = append(allErrs, e.Error())
}
return errors.New(strings.Join(allErrs, ", "))
}
}
return nil
}
// ValidateYAMLConfig takes a *ConfigValidator and an io.Reader containing a
// YAML representation of a config. The YAML data is unmarshaled into the
// *ConfigValidator's inner Config and then validated according to the
// 'validate' tags for on each field. Callers can use cmd.LookupConfigValidator
// to get a *ConfigValidator for a given Boulder component. This is exported for
// use in SRE CI tooling.
func ValidateYAMLConfig(cv *ConfigValidator, in io.Reader) error {
if cv == nil {
return errors.New("config validator cannot be nil")
}
// Initialize the validator and load any custom tags.
validate := validator.New()
if cv.Validators != nil {
for tag, v := range cv.Validators {
err := validate.RegisterValidation(tag, v)
if err != nil {
return err
}
}
}
inBytes, err := io.ReadAll(in)
if err != nil {
return err
}
err = strictyaml.Unmarshal(inBytes, cv.Config)
if err != nil {
return err
}
err = validate.Struct(cv.Config)
if err != nil {
errs, ok := err.(validator.ValidationErrors)
if !ok {
// This should never happen.
return err
}
if len(errs) > 0 {
allErrs := []string{}
for _, e := range errs {
allErrs = append(allErrs, e.Error())
}
return errors.New(strings.Join(allErrs, ", "))
}
}
return nil
}
// VersionString produces a friendly Application version string.
func VersionString() string {
return fmt.Sprintf("Versions: %s=(%s %s) Golang=(%s) BuildHost=(%s)", core.Command(), core.GetBuildID(), core.GetBuildTime(), runtime.Version(), core.GetBuildHost())
}
// CatchSignals blocks until a SIGTERM, SIGINT, or SIGHUP is received, then
// executes the given callback. The callback should not block, it should simply
// signal other goroutines (particularly the main goroutine) to clean themselves
// up and exit. This function is intended to be called in its own goroutine,
// while the main goroutine waits for an indication that the other goroutines
// have exited cleanly.
func CatchSignals(callback func()) {
WaitForSignal()
callback()
}
// WaitForSignal blocks until a SIGTERM, SIGINT, or SIGHUP is received. It then
// returns, allowing execution to resume, generally allowing a main() function
// to return and trigger and deferred cleanup functions. This function is
// intended to be called directly from the main goroutine, while a gRPC or HTTP
// server runs in a background goroutine.
func WaitForSignal() {
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGTERM)
signal.Notify(sigChan, syscall.SIGINT)
signal.Notify(sigChan, syscall.SIGHUP)
<-sigChan
}