Remove `service1` / `service2` names in consul (#7266)

These names corresponded to single instances of a service, and were
primarily used for (a) specifying which interface to bind a gRPC port on
and (b) allowing `health-checker` to check individual instances rather
than a service as a whole.

For (a), change the `--grpc-addr` flags to bind to "all interfaces." For
(b), provide a specific IP address and port for health checking. This
required adding a `--hostOverride` flag for `health-checker` because the
service certificates contain hostname SANs, not IP address SANs.

Clarify the situation with nonce services a little bit. Previously we
had one nonce "service" in Consul and got nonces from that (i.e.
randomly between the two nonce-service instances). Now we have two nonce
services in consul, representing multiple datacenters, and one of them
is explicitly configured as the "get" service, while both are configured
as the "redeem" service.

Part of #7245.

Note this change does not yet get rid of the rednet/bluenet distinction,
nor does it get rid of all use of 10.88.88.88. That will be a followup
change.
This commit is contained in:
Jacob Hoffman-Andrews 2024-01-22 09:34:20 -08:00 committed by GitHub
parent f54292e7d1
commit ce5632b480
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 109 additions and 171 deletions

View File

@ -52,17 +52,23 @@ func derivePrefix(key string, grpcAddr string) (string, error) {
if err != nil {
return "", fmt.Errorf("parsing gRPC listen address: %w", err)
}
if host == "" {
return "", fmt.Errorf("nonce service gRPC address must include an IP address: got %q", grpcAddr)
}
if host != "" && port != "" {
hostIP := net.ParseIP(host)
if hostIP == nil {
return "", fmt.Errorf("parsing IP from gRPC listen address: %w", err)
return "", fmt.Errorf("gRPC address host part was not an IP address")
}
if hostIP.IsUnspecified() {
return "", fmt.Errorf("nonce service gRPC address must be a specific IP address: got %q", grpcAddr)
}
}
return nonce.DerivePrefix(grpcAddr, key), nil
}
func main() {
grpcAddr := flag.String("addr", "", "gRPC listen address override")
grpcAddr := flag.String("addr", "", "gRPC listen address override. Also used to derive the nonce prefix.")
debugAddr := flag.String("debug-addr", "", "Debug server address override")
configFile := flag.String("config", "", "File path to the configuration file for this service")
flag.Parse()

View File

@ -46,7 +46,7 @@
"getNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookup": {
"service": "nonce",
"service": "nonce-taro",
"domain": "service.consul"
},
"timeout": "15s",
@ -57,11 +57,11 @@
"dnsAuthority": "consul.service.consul",
"srvLookups": [
{
"service": "nonce1",
"service": "nonce-taro",
"domain": "service.consul"
},
{
"service": "nonce2",
"service": "nonce-zinc",
"domain": "service.consul"
}
],

View File

@ -48,7 +48,7 @@
"getNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookup": {
"service": "nonce",
"service": "nonce-taro",
"domain": "service.consul"
},
"timeout": "15s",
@ -59,11 +59,11 @@
"dnsAuthority": "consul.service.consul",
"srvLookups": [
{
"service": "nonce1",
"service": "nonce-taro",
"domain": "service.consul"
},
{
"service": "nonce2",
"service": "nonce-zinc",
"domain": "service.consul"
}
],

View File

@ -61,22 +61,6 @@ services {
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "ca1"
name = "ca1"
address = "10.77.77.77"
port = 9393
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "ca2"
name = "ca2"
address = "10.88.88.88"
port = 9493
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "crl-storer-a"
name = "crl-storer"
@ -117,34 +101,29 @@ services {
tags = ["tcp"]
}
# Unlike most components, we have two completely independent nonce services,
# simulating two sets of nonce servers running in two different datacenters:
# taro and zinc.
services {
id = "nonce-a"
name = "nonce"
id = "nonce-taro-a"
name = "nonce-taro"
address = "10.77.77.77"
port = 9301
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "nonce-b"
name = "nonce"
address = "10.88.88.88"
port = 9401
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "nonce1"
name = "nonce1"
id = "nonce-taro-b"
name = "nonce-taro"
address = "10.77.77.77"
port = 9301
port = 9501
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "nonce2"
name = "nonce2"
address = "10.88.88.88"
id = "nonce-zinc"
name = "nonce-zinc"
address = "10.77.77.77"
port = 9401
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
@ -165,22 +144,6 @@ services {
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "publisher1"
name = "publisher1"
address = "10.77.77.77"
port = 9391
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "publisher2"
name = "publisher2"
address = "10.88.88.88"
port = 9491
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "ra-a"
name = "ra"
@ -197,22 +160,6 @@ services {
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "ra1"
name = "ra1"
address = "10.77.77.77"
port = 9394
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "ra2"
name = "ra2"
address = "10.88.88.88"
port = 9494
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "rva1-a"
name = "rva1"
@ -303,22 +250,6 @@ services {
]
}
services {
id = "sa1"
name = "sa1"
address = "10.77.77.77"
port = 9395
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "sa2"
name = "sa2"
address = "10.88.88.88"
port = 9495
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "va-a"
name = "va"
@ -335,22 +266,6 @@ services {
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "va1"
name = "va1"
address = "10.77.77.77"
port = 9392
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "va2"
name = "va2"
address = "10.88.88.88"
port = 9492
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
services {
id = "bredis3"
name = "redisratelimits"
@ -395,7 +310,7 @@ services {
services {
id = "case1b"
name = "case1b"
address = "10.88.88.88"
address = "10.77.77.77"
port = 9401
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
@ -405,7 +320,7 @@ services {
services {
id = "case2b"
name = "case2b"
address = "10.88.88.88"
address = "10.77.77.77"
port = 9401
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
}
@ -436,7 +351,7 @@ services {
services {
id = "case4b"
name = "case4b"
address = "10.88.88.88"
address = "10.77.77.77"
port = 9401
tags = ["tcp"] // Required for SRV RR support in gRPC DNS resolution.
checks = [

View File

@ -26,6 +26,7 @@ func main() {
// Flag and config parsing and validation.
configFile := flag.String("config", "", "Path to the TLS configuration file")
serverAddr := flag.String("addr", "", "Address of the gRPC server to check")
hostOverride := flag.String("host-override", "", "Hostname to use for TLS certificate validation")
flag.Parse()
if *configFile == "" {
flag.Usage()
@ -47,6 +48,10 @@ func main() {
tlsConfig, err := c.TLS.Load(metrics.NoopRegisterer)
cmd.FailOnError(err, "failed to load TLS credentials")
if *hostOverride != "" {
c.GRPC.HostOverride = *hostOverride
}
// GRPC connection prerequisites.
clk := cmd.Clock()
@ -58,12 +63,12 @@ func main() {
for {
select {
case <-ticker.C:
fmt.Fprintf(os.Stderr, "Connecting to %s health service\n", *serverAddr)
_, hostOverride, err := c.GRPC.MakeTargetAndHostOverride()
cmd.FailOnError(err, "")
// Set the hostOverride to match the dNSName in the server certificate.
c.GRPC.HostOverride = strings.Replace(hostOverride, ".service.consul", ".boulder", 1)
fmt.Fprintf(os.Stderr, "health checking %s (%s)\n", c.GRPC.HostOverride, *serverAddr)
// Set up the GRPC connection.
conn, err := bgrpc.ClientSetup(c.GRPC, tlsConfig, metrics.NoopRegisterer, clk)
@ -79,9 +84,9 @@ func main() {
resp, err := client.Check(ctx2, req)
if err != nil {
if strings.Contains(err.Error(), "authentication handshake failed") {
cmd.Fail(fmt.Sprintf("error connecting to health service %s: %s\n", *serverAddr, err))
cmd.Fail(fmt.Sprintf("health checking %s (%s): %s\n", c.GRPC.HostOverride, *serverAddr, err))
}
fmt.Fprintf(os.Stderr, "got error connecting to health service %s: %s\n", *serverAddr, err)
fmt.Fprintf(os.Stderr, "health checking %s (%s): %s\n", c.GRPC.HostOverride, *serverAddr, err)
} else if resp.Status == healthpb.HealthCheckResponse_SERVING {
return
} else {

View File

@ -177,8 +177,9 @@ def waitport(port, prog, perTickCheck=None):
raise
raise(Exception("timed out waiting for debug port %d (%s)" % (port, prog)))
def waithealth(prog, addr):
def waithealth(prog, port, host_override):
subprocess.check_call([
'./bin/health-checker',
'-addr', addr,
'-addr', ("localhost:%d" % (port)),
'-host-override', host_override,
'-config', os.path.join(config_dir, 'health-checker.json')])

View File

@ -125,16 +125,16 @@ def check_balance():
metric is present, which means that backend handled at least one request.
"""
addresses = [
"sa1.service.consul:8003",
"sa2.service.consul:8103",
"publisher1.service.consul:8009",
"publisher2.service.consul:8109",
"va1.service.consul:8004",
"va2.service.consul:8104",
"ca1.service.consul:8001",
"ca2.service.consul:8101",
"ra1.service.consul:8002",
"ra2.service.consul:8102",
"localhost:8003", # SA
"localhost:8103", # SA
"localhost:8009", # publisher
"localhost:8109", # publisher
"localhost:8004", # VA
"localhost:8104", # VA
"localhost:8001", # CA
"localhost:8101", # CA
"localhost:8002", # RA
"localhost:8102", # RA
]
for address in addresses:
metrics = requests.get("http://%s/metrics" % address)

View File

@ -8,7 +8,7 @@
"getNonceService": {
"dnsAuthority": "consul.service.consul",
"srvLookup": {
"service": "nonce",
"service": "nonce-taro",
"domain": "service.consul"
},
"timeout": "15s",
@ -19,11 +19,11 @@
"dnsAuthority": "consul.service.consul",
"srvLookups": [
{
"service": "nonce1",
"service": "nonce-taro",
"domain": "service.consul"
},
{
"service": "nonce2",
"service": "nonce-zinc",
"domain": "service.consul"
}
],

View File

@ -12,107 +12,118 @@ import time
from helpers import waithealth, waitport, config_dir, CONFIG_NEXT
Service = collections.namedtuple('Service', ('name', 'debug_port', 'grpc_addr', 'cmd', 'deps'))
Service = collections.namedtuple('Service', ('name', 'debug_port', 'grpc_port', 'host_override', 'cmd', 'deps'))
# Keep these ports in sync with consul/config.hcl
SERVICES = (
Service('boulder-remoteva-a',
8011, 'rva1.service.consul:9397',
8011, 9397, 'rva.boulder',
('./bin/boulder', 'boulder-remoteva', '--config', os.path.join(config_dir, 'va-remote-a.json'), '--addr', ':9397', '--debug-addr', ':8011'),
None),
Service('boulder-remoteva-b',
8012, 'rva1.service.consul:9498',
8012, 9498, 'rva.boulder',
('./bin/boulder', 'boulder-remoteva', '--config', os.path.join(config_dir, 'va-remote-b.json'), '--addr', ':9498', '--debug-addr', ':8012'),
None),
Service('boulder-sa-1',
8003, 'sa1.service.consul:9395',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', 'sa1.service.consul:9395', '--debug-addr', ':8003'),
8003, 9395, 'sa.boulder',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9395', '--debug-addr', ':8003'),
None),
Service('boulder-sa-2',
8103, 'sa2.service.consul:9495',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', 'sa2.service.consul:9495', '--debug-addr', ':8103'),
8103, 9495, 'sa.boulder',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9495', '--debug-addr', ':8103'),
None),
Service('ct-test-srv',
4500, None,
4500, None, None,
('./bin/ct-test-srv', '--config', 'test/ct-test-srv/ct-test-srv.json'), None),
Service('boulder-publisher-1',
8009, 'publisher1.service.consul:9391',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', 'publisher1.service.consul:9391', '--debug-addr', ':8009'),
8009, 9391, 'publisher.boulder',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9391', '--debug-addr', ':8009'),
None),
Service('boulder-publisher-2',
8109, 'publisher2.service.consul:9491',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', 'publisher2.service.consul:9491', '--debug-addr', ':8109'),
8109, 9491, 'publisher.boulder',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9491', '--debug-addr', ':8109'),
None),
Service('mail-test-srv',
9380, None,
9380, None, None,
('./bin/mail-test-srv', '--closeFirst', '5', '--cert', 'test/mail-test-srv/localhost/cert.pem', '--key', 'test/mail-test-srv/localhost/key.pem'),
None),
Service('ocsp-responder',
8005, None,
('./bin/boulder', 'ocsp-responder', '--config', os.path.join(config_dir, 'ocsp-responder.json'), '--addr', '0.0.0.0:4002', '--debug-addr', ':8005'),
8005, None, None,
('./bin/boulder', 'ocsp-responder', '--config', os.path.join(config_dir, 'ocsp-responder.json'), '--addr', ':4002', '--debug-addr', ':8005'),
('boulder-ra-1', 'boulder-ra-2')),
Service('boulder-va-1',
8004, 'va1.service.consul:9392',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', 'va1.service.consul:9392', '--debug-addr', ':8004'),
8004, 9392, 'va.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9392', '--debug-addr', ':8004'),
('boulder-remoteva-a', 'boulder-remoteva-b')),
Service('boulder-va-2',
8104, 'va2.service.consul:9492',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', 'va2.service.consul:9492', '--debug-addr', ':8104'),
8104, 9492, 'va.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9492', '--debug-addr', ':8104'),
('boulder-remoteva-a', 'boulder-remoteva-b')),
Service('boulder-ca-1',
8001, 'ca1.service.consul:9393',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', 'ca1.service.consul:9393', '--debug-addr', ':8001'),
8001, 9393, 'ca.boulder',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9393', '--debug-addr', ':8001'),
('boulder-sa-1', 'boulder-sa-2')),
Service('boulder-ca-2',
8101, 'ca2.service.consul:9493',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', 'ca2.service.consul:9493', '--debug-addr', ':8101'),
8101, 9493, 'ca.boulder',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9493', '--debug-addr', ':8101'),
('boulder-sa-1', 'boulder-sa-2')),
Service('akamai-test-srv',
6789, None,
6789, None, None,
('./bin/akamai-test-srv', '--listen', 'localhost:6789', '--secret', 'its-a-secret'),
None),
Service('akamai-purger',
9666, None,
9666, None, None,
('./bin/boulder', 'akamai-purger', '--addr', ':9399', '--config', os.path.join(config_dir, 'akamai-purger.json'), '--debug-addr', ':9666'),
('akamai-test-srv',)),
Service('s3-test-srv',
7890, None,
7890, None, None,
('./bin/s3-test-srv', '--listen', 'localhost:7890'),
None),
Service('crl-storer',
9667, None,
9667, None, None,
('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'),
('s3-test-srv',)),
Service('crl-updater',
8021, None,
8021, None, None,
('./bin/boulder', 'crl-updater', '--config', os.path.join(config_dir, 'crl-updater.json'), '--debug-addr', ':8021'),
('boulder-ca-1', 'boulder-ca-2', 'boulder-sa-1', 'boulder-sa-2', 'crl-storer')),
Service('boulder-ra-1',
8002, 'ra1.service.consul:9394',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', 'ra1.service.consul:9394', '--debug-addr', ':8002'),
8002, 9394, 'ra.boulder',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'),
('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'akamai-purger', 'boulder-publisher-1', 'boulder-publisher-2')),
Service('boulder-ra-2',
8102, 'ra2.service.consul:9494',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', 'ra2.service.consul:9494', '--debug-addr', ':8102'),
8102, 9494, 'ra.boulder',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9494', '--debug-addr', ':8102'),
('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'akamai-purger', 'boulder-publisher-1', 'boulder-publisher-2')),
Service('bad-key-revoker',
8020, None,
8020, None, None,
('./bin/boulder', 'bad-key-revoker', '--config', os.path.join(config_dir, 'bad-key-revoker.json'), '--debug-addr', ':8020'),
('boulder-ra-1', 'boulder-ra-2', 'mail-test-srv')),
Service('nonce-service-taro',
8111, 'nonce1.service.consul:9301',
# Note: the nonce-service instances bind to specific ports, not "all interfaces",
# because they use their explicitly bound port in calculating the nonce
# prefix, which is used by WFEs when deciding where to redeem nonces.
# The `taro` and `zinc` instances simulate nonce services in two different
# datacenters. The WFE is configured to get nonces from one of these
# services, and potentially redeeem from either service (though in practice
# it will only redeem from the one that is configured for getting nonces).
Service('nonce-service-taro-1',
8111, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9301', '--debug-addr', ':8111',),
None),
Service('nonce-service-zinc',
8112, 'nonce2.service.consul:9401',
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-b.json'), '--addr', '10.88.88.88:9401', '--debug-addr', ':8112',),
Service('nonce-service-taro-2',
8113, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9501', '--debug-addr', ':8113',),
None),
Service('nonce-service-zinc-1',
8112, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-b.json'), '--addr', '10.77.77.77:9401', '--debug-addr', ':8112',),
None),
Service('boulder-wfe2',
4001, None,
('./bin/boulder', 'boulder-wfe2', '--config', os.path.join(config_dir, 'wfe2.json'), '--addr', '0.0.0.0:4001', '--tls-addr', '0.0.0.0:4431', '--debug-addr', ':8013'),
('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'nonce-service-taro', 'nonce-service-zinc')),
4001, None, None,
('./bin/boulder', 'boulder-wfe2', '--config', os.path.join(config_dir, 'wfe2.json'), '--addr', ':4001', '--tls-addr', ':4431', '--debug-addr', ':8013'),
('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'nonce-service-taro-1', 'nonce-service-taro-2', 'nonce-service-zinc-1')),
Service('log-validator',
8016, None,
8016, None, None,
('./bin/boulder', 'log-validator', '--config', os.path.join(config_dir, 'log-validator.json'), '--debug-addr', ':8016'),
None),
)
@ -189,7 +200,7 @@ def start(fakeclock):
# Check that we can resolve the service names before we try to start any
# services. This prevents a confusing error (timed out health check).
try:
socket.getaddrinfo('publisher1.service.consul', None)
socket.getaddrinfo('publisher.service.consul', None)
except Exception as e:
print("Error querying DNS. Is consul running? `docker compose ps bconsul`. %s" % (e))
return False
@ -207,8 +218,8 @@ def start(fakeclock):
global processes
p = run(service.cmd, fakeclock)
processes.append(p)
if service.grpc_addr is not None:
waithealth(' '.join(p.args), service.grpc_addr)
if service.grpc_port is not None:
waithealth(' '.join(p.args), service.grpc_port, service.host_override)
else:
if not waitport(service.debug_port, ' '.join(p.args), perTickCheck=check):
return False
@ -259,7 +270,7 @@ def startChallSrv():
'--defaultIPv4', os.environ.get("FAKE_DNS"),
'-defaultIPv6', '',
'--dns01', ':8053,:8054',
'--doh', '10.77.77.77:8343,10.88.88.88:8443',
'--doh', ':8343,:8443',
'--doh-cert', 'test/grpc-creds/10.77.77.77/cert.pem',
'--doh-cert-key', 'test/grpc-creds/10.77.77.77/key.pem',
'--management', ':8055',