diff --git a/cmd/akamai-purger/main.go b/cmd/akamai-purger/main.go index f0b63aa18..36fae1003 100644 --- a/cmd/akamai-purger/main.go +++ b/cmd/akamai-purger/main.go @@ -400,13 +400,9 @@ func daemon(c Config, ap *akamaiPurger, logger blog.Logger, scope prometheus.Reg stopped <- true }() - start, stopFn, err := bgrpc.NewServer(c.AkamaiPurger.GRPC).Add( - &akamaipb.AkamaiPurger_ServiceDesc, ap).Build(tlsConfig, scope, clk) - cmd.FailOnError(err, "Unable to setup Akamai purger gRPC server") - - go cmd.CatchSignals(logger, func() { - stopFn() - + // When the gRPC server finally exits, run a clean-up routine that stops the + // ticker and waits for the goroutine above to finish purging the stack. + defer func() { // Stop the ticker and signal that we want to shutdown by writing to the // stop channel. We wait 15 seconds for any remaining URLs to be emptied // from the current stack, if we pass that deadline we exit early. @@ -417,14 +413,13 @@ func daemon(c Config, ap *akamaiPurger, logger blog.Logger, scope prometheus.Reg cmd.Fail("Timed out waiting for purger to finish work") case <-stopped: } - }) - cmd.FailOnError(start(), "akamai-purger gRPC service failed") + }() - // When we get a SIGTERM, we will exit from grpcSrv.Serve as soon as all - // extant RPCs have been processed, but we want the process to stick around - // while we still have a goroutine purging the last elements from the stack. - // Once that's done, CatchSignals will call os.Exit(). - select {} + start, err := bgrpc.NewServer(c.AkamaiPurger.GRPC).Add( + &akamaipb.AkamaiPurger_ServiceDesc, ap).Build(tlsConfig, scope, clk) + cmd.FailOnError(err, "Unable to setup Akamai purger gRPC server") + + cmd.FailOnError(start(), "akamai-purger gRPC service failed") } func init() { diff --git a/cmd/boulder-ca/main.go b/cmd/boulder-ca/main.go index 1ca575e43..0164738e3 100644 --- a/cmd/boulder-ca/main.go +++ b/cmd/boulder-ca/main.go @@ -249,8 +249,8 @@ func main() { var entries int ecdsaAllowList, entries, err = ca.NewECDSAAllowListFromFile(c.CA.ECDSAAllowListFilename, logger, allowListGauge) cmd.FailOnError(err, "Unable to load ECDSA allow list from YAML file") + defer ecdsaAllowList.Stop() logger.Infof("Created a reloadable allow list, it was initialized with %d entries", entries) - } srv := bgrpc.NewServer(c.CA.GRPCCA) @@ -272,6 +272,7 @@ func main() { ) cmd.FailOnError(err, "Failed to create OCSP impl") go ocspi.LogOCSPLoop() + defer ocspi.Stop() srv = srv.Add(&capb.OCSPGenerator_ServiceDesc, ocspi) } @@ -316,16 +317,9 @@ func main() { srv = srv.Add(&capb.CertificateAuthority_ServiceDesc, cai) } - start, stop, err := srv.Build(tlsConfig, scope, clk) + start, err := srv.Build(tlsConfig, scope, clk) cmd.FailOnError(err, "Unable to setup CA gRPC server") - go cmd.CatchSignals(logger, func() { - stop() - ecdsaAllowList.Stop() - if ocspi != nil { - ocspi.Stop() - } - }) cmd.FailOnError(start(), "CA gRPC service failed") } diff --git a/cmd/boulder-publisher/main.go b/cmd/boulder-publisher/main.go index 8b8daa3fd..d2c9e5c8b 100644 --- a/cmd/boulder-publisher/main.go +++ b/cmd/boulder-publisher/main.go @@ -92,11 +92,10 @@ func main() { pubi := publisher.New(bundles, c.Publisher.UserAgent, logger, scope) - start, stop, err := bgrpc.NewServer(c.Publisher.GRPC).Add( + start, err := bgrpc.NewServer(c.Publisher.GRPC).Add( &pubpb.Publisher_ServiceDesc, pubi).Build(tlsConfig, scope, clk) cmd.FailOnError(err, "Unable to setup Publisher gRPC server") - go cmd.CatchSignals(logger, stop) cmd.FailOnError(start(), "Publisher gRPC service failed") } diff --git a/cmd/boulder-ra/main.go b/cmd/boulder-ra/main.go index a12127c49..205b79814 100644 --- a/cmd/boulder-ra/main.go +++ b/cmd/boulder-ra/main.go @@ -243,6 +243,7 @@ func main() { apc, issuerCerts, ) + defer rai.DrainFinalize() policyErr := rai.SetRateLimitPoliciesFile(c.RA.RateLimitPoliciesFilename) cmd.FailOnError(policyErr, "Couldn't load rate limit policies file") @@ -253,14 +254,10 @@ func main() { rai.OCSP = ocspc rai.SA = sac - start, stop, err := bgrpc.NewServer(c.RA.GRPC).Add( + start, err := bgrpc.NewServer(c.RA.GRPC).Add( &rapb.RegistrationAuthority_ServiceDesc, rai).Build(tlsConfig, scope, clk) cmd.FailOnError(err, "Unable to setup RA gRPC server") - go cmd.CatchSignals(logger, func() { - stop() - rai.DrainFinalize() - }) cmd.FailOnError(start(), "RA gRPC service failed") } diff --git a/cmd/boulder-sa/main.go b/cmd/boulder-sa/main.go index 27dcbeaa8..35e4fbfb6 100644 --- a/cmd/boulder-sa/main.go +++ b/cmd/boulder-sa/main.go @@ -93,13 +93,12 @@ func main() { sai, err := sa.NewSQLStorageAuthorityWrapping(saroi, dbMap, scope) cmd.FailOnError(err, "Failed to create SA impl") - start, stop, err := bgrpc.NewServer(c.SA.GRPC).Add( + start, err := bgrpc.NewServer(c.SA.GRPC).Add( &sapb.StorageAuthorityReadOnly_ServiceDesc, saroi).Add( &sapb.StorageAuthority_ServiceDesc, sai).Build( tls, scope, clk) cmd.FailOnError(err, "Unable to setup SA gRPC server") - go cmd.CatchSignals(logger, stop) cmd.FailOnError(start(), "SA gRPC service failed") } diff --git a/cmd/boulder-va/main.go b/cmd/boulder-va/main.go index c91be2a79..f3390749c 100644 --- a/cmd/boulder-va/main.go +++ b/cmd/boulder-va/main.go @@ -96,6 +96,7 @@ func main() { } servers, err = bdns.StartDynamicProvider(c.VA.DNSResolver, 60*time.Second) cmd.FailOnError(err, "Couldn't start dynamic DNS server resolver") + defer servers.Stop() var resolver bdns.Client if !(c.VA.DNSAllowLoopbackAddresses || c.Common.DNSAllowLoopbackAddresses) { @@ -147,16 +148,11 @@ func main() { c.VA.AccountURIPrefixes) cmd.FailOnError(err, "Unable to create VA server") - start, stop, err := bgrpc.NewServer(c.VA.GRPC).Add( + start, err := bgrpc.NewServer(c.VA.GRPC).Add( &vapb.VA_ServiceDesc, vai).Add( &vapb.CAA_ServiceDesc, vai).Build(tlsConfig, scope, clk) cmd.FailOnError(err, "Unable to setup VA gRPC server") - go cmd.CatchSignals(logger, func() { - servers.Stop() - stop() - }) - cmd.FailOnError(start(), "VA gRPC service failed") } diff --git a/cmd/crl-storer/main.go b/cmd/crl-storer/main.go index bc1339957..36b9d2fc6 100644 --- a/cmd/crl-storer/main.go +++ b/cmd/crl-storer/main.go @@ -123,11 +123,10 @@ func main() { csi, err := storer.New(issuers, s3client, c.CRLStorer.S3Bucket, scope, logger, clk) cmd.FailOnError(err, "Failed to create CRLStorer impl") - start, stop, err := bgrpc.NewServer(c.CRLStorer.GRPC).Add( + start, err := bgrpc.NewServer(c.CRLStorer.GRPC).Add( &cspb.CRLStorer_ServiceDesc, csi).Build(tlsConfig, scope, clk) cmd.FailOnError(err, "Unable to setup CRLStorer gRPC server") - go cmd.CatchSignals(logger, stop) cmd.FailOnError(start(), "CRLStorer gRPC service failed") } diff --git a/cmd/nonce-service/main.go b/cmd/nonce-service/main.go index beeeab54c..97eee3fa9 100644 --- a/cmd/nonce-service/main.go +++ b/cmd/nonce-service/main.go @@ -109,11 +109,10 @@ func main() { cmd.FailOnError(err, "tlsConfig config") nonceServer := nonce.NewServer(ns) - start, stop, err := bgrpc.NewServer(c.NonceService.GRPC).Add( + start, err := bgrpc.NewServer(c.NonceService.GRPC).Add( &noncepb.NonceService_ServiceDesc, nonceServer).Build(tlsConfig, scope, cmd.Clock()) cmd.FailOnError(err, "Unable to setup nonce service gRPC server") - go cmd.CatchSignals(logger, stop) cmd.FailOnError(start(), "Nonce service gRPC server failed") } diff --git a/cmd/shell.go b/cmd/shell.go index ac35dea42..e2e3ab859 100644 --- a/cmd/shell.go +++ b/cmd/shell.go @@ -157,7 +157,7 @@ func (lw logWriter) Write(p []byte) (n int, err error) { // StatsAndLogging constructs a prometheus registerer and an AuditLogger based // on its config parameters, and return them both. It also spawns off an HTTP // server on the provided port to report the stats and provide pprof profiling -// handlers. NewLogger and newStatsRegistry will call os.Exit on errors. +// handlers. NewLogger and newStatsRegistry will panic on errors. // Also sets the constructed AuditLogger as the default logger, and configures // the mysql and grpc packages to use our logger. // This must be called before any gRPC code is called, because gRPC's SetLogger @@ -276,15 +276,18 @@ func newStatsRegistry(addr string, logger blog.Logger) prometheus.Registerer { return registry } -// Fail exits and prints an error message to stderr and the logger audit log. +// Fail prints a message to the audit log, then panics, causing the process to exit but +// allowing deferred cleanup functions to run on the way out. func Fail(msg string) { logger := blog.Get() logger.AuditErr(msg) - os.Exit(1) + panic(msg) } -// FailOnError exits and prints an error message, but only if we encountered -// a problem and err != nil. err is required but msg can be "". +// FailOnError prints an error message and panics, but only if the provided +// error is actually non-nil. This is useful for one-line error handling in +// top-level executables, but should generally be avoided in libraries. The +// message argument is optional. func FailOnError(err error, msg string) { if err == nil { return diff --git a/grpc/server.go b/grpc/server.go index 7d23b852c..bf8825c49 100644 --- a/grpc/server.go +++ b/grpc/server.go @@ -5,7 +5,10 @@ import ( "errors" "fmt" "net" + "os" + "os/signal" "strings" + "syscall" grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/jmhodges/clock" @@ -64,16 +67,18 @@ func (sb *serverBuilder) Add(desc *grpc.ServiceDesc, impl any) *serverBuilder { // Build creates a gRPC server that uses the provided *tls.Config and exposes // all of the services added to the builder. It also exposes a health check -// service. It returns two functions, start() and stop(), which should be used -// to start and gracefully stop the server. -func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.Registerer, clk clock.Clock) (func() error, func(), error) { +// service. It returns one functions, start(), which should be used to start +// the server. It spawns a goroutine which will listen for OS signals and +// gracefully stop the server if one is caught, causing the start() function to +// exit. +func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.Registerer, clk clock.Clock) (func() error, error) { // Add the health service to all servers. healthSrv := health.NewServer() sb = sb.Add(&healthpb.Health_ServiceDesc, healthSrv) // Check to see if any of the calls to .Add() resulted in an error. if sb.err != nil { - return nil, nil, sb.err + return nil, sb.err } // Ensure that every configured service also got added. @@ -84,12 +89,12 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R for serviceName := range sb.cfg.Services { _, ok := sb.services[serviceName] if !ok { - return nil, nil, fmt.Errorf("gRPC service %q in config does not match any service: %s", serviceName, strings.Join(registeredServices, ", ")) + return nil, fmt.Errorf("gRPC service %q in config does not match any service: %s", serviceName, strings.Join(registeredServices, ", ")) } } if tlsConfig == nil { - return nil, nil, errNilTLS + return nil, errNilTLS } // Collect all names which should be allowed to connect to the server at all. @@ -108,14 +113,14 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R creds, err := bcreds.NewServerCredentials(tlsConfig, acceptedSANs) if err != nil { - return nil, nil, err + return nil, err } // Set up all of our interceptors which handle metrics, traces, error // propagation, and more. metrics, err := newServerMetrics(statsRegistry) if err != nil { - return nil, nil, err + return nil, err } var ai serverInterceptor @@ -160,18 +165,27 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R // Finally return the functions which will start and stop the server. listener, err := net.Listen("tcp", sb.cfg.Address) if err != nil { - return nil, nil, err + return nil, err } start := func() error { - return filterShutdownErrors(server.Serve(listener)) - } - stop := func() { - healthSrv.Shutdown() - server.GracefulStop() + return server.Serve(listener) } - return start, stop, nil + // Start a goroutine which listens for a termination signal, and then + // gracefully stops the gRPC server. This in turn causes the start() function + // to exit, allowing its caller (generally a main() function) to exit. + go func() { + sigChan := make(chan os.Signal, 1) + signal.Notify(sigChan, syscall.SIGTERM) + signal.Notify(sigChan, syscall.SIGINT) + signal.Notify(sigChan, syscall.SIGHUP) + <-sigChan + healthSrv.Shutdown() + server.GracefulStop() + }() + + return start, nil } // serverMetrics is a struct type used to return a few registered metrics from @@ -222,19 +236,3 @@ func newServerMetrics(stats prometheus.Registerer) (serverMetrics, error) { rpcLag: rpcLag, }, nil } - -// filterShutdownErrors returns the input error, with the exception of "use of -// closed network connection," on which it returns nil -// Per https://github.com/grpc/grpc-go/issues/1017, a gRPC server's `Serve()` -// will always return an error, even when GracefulStop() is called. We don't -// want to log graceful stops as errors, so we filter out the meaningless -// error we get in that situation. -func filterShutdownErrors(err error) error { - if err == nil { - return nil - } - if strings.Contains(err.Error(), "use of closed network connection") { - return nil - } - return err -}