diff --git a/cmd/config.go b/cmd/config.go index b2c646223..13842fdf9 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -465,7 +465,7 @@ type GRPCServerConfig struct { // These service names must match the service names advertised by gRPC itself, // which are identical to the names set in our gRPC .proto files prefixed by // the package names set in those files (e.g. "ca.CertificateAuthority"). - Services map[string]GRPCServiceConfig `json:"services" validate:"required,dive,required"` + Services map[string]*GRPCServiceConfig `json:"services" validate:"required,dive,required"` // MaxConnectionAge specifies how long a connection may live before the server sends a GoAway to the // client. Because gRPC connections re-resolve DNS after a connection close, // this controls how long it takes before a client learns about changes to its @@ -476,10 +476,10 @@ type GRPCServerConfig struct { // GRPCServiceConfig contains the information needed to configure a gRPC service. type GRPCServiceConfig struct { - // PerServiceClientNames is a map of gRPC service names to client certificate - // SANs. The upstream listening server will reject connections from clients - // which do not appear in this list, and the server interceptor will reject - // RPC calls for this service from clients which are not listed here. + // ClientNames is the list of accepted gRPC client certificate SANs. + // Connections from clients not in this list will be rejected by the + // upstream listener, and RPCs from unlisted clients will be denied by the + // server interceptor. ClientNames []string `json:"clientNames" validate:"min=1,dive,hostname,required"` } diff --git a/grpc/client.go b/grpc/client.go index b7773cdf0..87ff82f79 100644 --- a/grpc/client.go +++ b/grpc/client.go @@ -14,11 +14,13 @@ import ( "github.com/letsencrypt/boulder/cmd" bcreds "github.com/letsencrypt/boulder/grpc/creds" - // 'grpc/health' is imported for its init function, which causes clients to - // rely on the Health Service for load-balancing. // 'grpc/internal/resolver/dns' is imported for its init function, which // registers the SRV resolver. "google.golang.org/grpc/balancer/roundrobin" + + // 'grpc/health' is imported for its init function, which causes clients to + // rely on the Health Service for load-balancing as long as a + // "healthCheckConfig" is specified in the gRPC service config. _ "google.golang.org/grpc/health" _ "github.com/letsencrypt/boulder/grpc/internal/resolver/dns" @@ -61,7 +63,21 @@ func ClientSetup(c *cmd.GRPCClientConfig, tlsConfig *tls.Config, statsRegistry p creds := bcreds.NewClientCredentials(tlsConfig.RootCAs, tlsConfig.Certificates, hostOverride) return grpc.NewClient( target, - grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, roundrobin.Name)), + grpc.WithDefaultServiceConfig( + fmt.Sprintf( + // By setting the service name to an empty string in + // healthCheckConfig, we're instructing the gRPC client to query + // the overall health status of each server. The grpc-go health + // server, as constructed by health.NewServer(), unconditionally + // sets the overall service (e.g. "") status to SERVING. If a + // specific service name were set, the server would need to + // explicitly transition that service to SERVING; otherwise, + // clients would receive a NOT_FOUND status and the connection + // would be marked as unhealthy (TRANSIENT_FAILURE). + `{"healthCheckConfig": {"serviceName": ""},"loadBalancingConfig": [{"%s":{}}]}`, + roundrobin.Name, + ), + ), grpc.WithTransportCredentials(creds), grpc.WithChainUnaryInterceptor(unaryInterceptors...), grpc.WithChainStreamInterceptor(streamInterceptors...), diff --git a/grpc/server.go b/grpc/server.go index 4029c77d2..2fb09f7f0 100644 --- a/grpc/server.go +++ b/grpc/server.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net" + "slices" "strings" "time" @@ -123,12 +124,21 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R // This is the names which are allowlisted at the server level, plus the union // of all names which are allowlisted for any individual service. acceptedSANs := make(map[string]struct{}) + var acceptedSANsSlice []string for _, service := range sb.cfg.Services { for _, name := range service.ClientNames { acceptedSANs[name] = struct{}{} + if !slices.Contains(acceptedSANsSlice, name) { + acceptedSANsSlice = append(acceptedSANsSlice, name) + } } } + // Ensure that the health service has the same ClientNames as the other + // services, so that health checks can be performed by clients which are + // allowed to connect to the server. + sb.cfg.Services[healthpb.Health_ServiceDesc.ServiceName].ClientNames = acceptedSANsSlice + creds, err := bcreds.NewServerCredentials(tlsConfig, acceptedSANs) if err != nil { return nil, err @@ -224,8 +234,12 @@ func (sb *serverBuilder) Build(tlsConfig *tls.Config, statsRegistry prometheus.R // initLongRunningCheck initializes a goroutine which will periodically check // the health of the provided service and update the health server accordingly. +// +// TODO(#8255): Remove the service parameter and instead rely on transitioning +// the overall health of the server (e.g. "") instead of individual services. func (sb *serverBuilder) initLongRunningCheck(shutdownCtx context.Context, service string, checkImpl func(context.Context) error) { // Set the initial health status for the service. + sb.healthSrv.SetServingStatus("", healthpb.HealthCheckResponse_NOT_SERVING) sb.healthSrv.SetServingStatus(service, healthpb.HealthCheckResponse_NOT_SERVING) // check is a helper function that checks the health of the service and, if @@ -249,10 +263,13 @@ func (sb *serverBuilder) initLongRunningCheck(shutdownCtx context.Context, servi } if next != healthpb.HealthCheckResponse_SERVING { + sb.logger.Errf("transitioning overall health from %q to %q, due to: %s", last, next, err) sb.logger.Errf("transitioning health of %q from %q to %q, due to: %s", service, last, next, err) } else { + sb.logger.Infof("transitioning overall health from %q to %q", last, next) sb.logger.Infof("transitioning health of %q from %q to %q", service, last, next) } + sb.healthSrv.SetServingStatus("", next) sb.healthSrv.SetServingStatus(service, next) return next } diff --git a/grpc/server_test.go b/grpc/server_test.go index 7553e24c7..16c2e86a4 100644 --- a/grpc/server_test.go +++ b/grpc/server_test.go @@ -11,7 +11,7 @@ import ( "google.golang.org/grpc/health" ) -func Test_serverBuilder_initLongRunningCheck(t *testing.T) { +func TestServerBuilderInitLongRunningCheck(t *testing.T) { t.Parallel() hs := health.NewServer() mockLogger := blog.NewMock() @@ -41,8 +41,8 @@ func Test_serverBuilder_initLongRunningCheck(t *testing.T) { // - ~100ms 3rd check failed, SERVING to NOT_SERVING serving := mockLogger.GetAllMatching(".*\"NOT_SERVING\" to \"SERVING\"") notServing := mockLogger.GetAllMatching((".*\"SERVING\" to \"NOT_SERVING\"")) - test.Assert(t, len(serving) == 1, "expected one serving log line") - test.Assert(t, len(notServing) == 1, "expected one not serving log line") + test.Assert(t, len(serving) == 2, "expected two serving log lines") + test.Assert(t, len(notServing) == 2, "expected two not serving log lines") mockLogger.Clear() @@ -67,6 +67,6 @@ func Test_serverBuilder_initLongRunningCheck(t *testing.T) { // - ~100ms 3rd check passed, NOT_SERVING to SERVING serving = mockLogger.GetAllMatching(".*\"NOT_SERVING\" to \"SERVING\"") notServing = mockLogger.GetAllMatching((".*\"SERVING\" to \"NOT_SERVING\"")) - test.Assert(t, len(serving) == 2, "expected two serving log lines") - test.Assert(t, len(notServing) == 1, "expected one not serving log line") + test.Assert(t, len(serving) == 4, "expected four serving log lines") + test.Assert(t, len(notServing) == 2, "expected two not serving log lines") }