boulder/test/startservers.py

312 lines
14 KiB
Python

import atexit
import collections
import os
import shutil
import signal
import socket
import subprocess
import sys
import tempfile
import threading
import time
from helpers import waithealth, waitport, config_dir, CONFIG_NEXT
Service = collections.namedtuple('Service', ('name', 'debug_port', 'grpc_port', 'host_override', 'cmd', 'deps'))
# Keep these ports in sync with consul/config.hcl
SERVICES = (
Service('boulder-remoteva-a',
8011, 9397, 'rva.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va-remote-a.json'), '--addr', ':9397', '--debug-addr', ':8011'),
None),
Service('boulder-remoteva-b',
8012, 9498, 'rva.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va-remote-b.json'), '--addr', ':9498', '--debug-addr', ':8012'),
None),
Service('remoteva-a',
8211, 9897, 'rva.boulder',
('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-a.json'), '--addr', ':9897', '--debug-addr', ':8211'),
None),
Service('remoteva-b',
8212, 9998, 'rva.boulder',
('./bin/boulder', 'remoteva', '--config', os.path.join(config_dir, 'remoteva-b.json'), '--addr', ':9998', '--debug-addr', ':8212'),
None),
Service('boulder-sa-1',
8003, 9395, 'sa.boulder',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9395', '--debug-addr', ':8003'),
None),
Service('boulder-sa-2',
8103, 9495, 'sa.boulder',
('./bin/boulder', 'boulder-sa', '--config', os.path.join(config_dir, 'sa.json'), '--addr', ':9495', '--debug-addr', ':8103'),
None),
Service('aia-test-srv',
4502, None, None,
('./bin/aia-test-srv', '--addr', ':4502', '--hierarchy', 'test/certs/webpki/'), None),
Service('ct-test-srv',
4600, None, None,
('./bin/ct-test-srv', '--config', 'test/ct-test-srv/ct-test-srv.json'), None),
Service('boulder-publisher-1',
8009, 9391, 'publisher.boulder',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9391', '--debug-addr', ':8009'),
None),
Service('boulder-publisher-2',
8109, 9491, 'publisher.boulder',
('./bin/boulder', 'boulder-publisher', '--config', os.path.join(config_dir, 'publisher.json'), '--addr', ':9491', '--debug-addr', ':8109'),
None),
Service('mail-test-srv',
9380, None, None,
('./bin/mail-test-srv', '--closeFirst', '5', '--cert', 'test/certs/ipki/localhost/cert.pem', '--key', 'test/certs/ipki/localhost/key.pem'),
None),
Service('ocsp-responder',
8005, None, None,
('./bin/boulder', 'ocsp-responder', '--config', os.path.join(config_dir, 'ocsp-responder.json'), '--addr', ':4002', '--debug-addr', ':8005'),
('boulder-ra-1', 'boulder-ra-2')),
Service('boulder-va-1',
8004, 9392, 'va.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9392', '--debug-addr', ':8004'),
('boulder-remoteva-a', 'boulder-remoteva-b', 'remoteva-a', 'remoteva-b')),
Service('boulder-va-2',
8104, 9492, 'va.boulder',
('./bin/boulder', 'boulder-va', '--config', os.path.join(config_dir, 'va.json'), '--addr', ':9492', '--debug-addr', ':8104'),
('boulder-remoteva-a', 'boulder-remoteva-b', 'remoteva-a', 'remoteva-b')),
Service('boulder-ca-1',
8001, 9393, 'ca.boulder',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9393', '--debug-addr', ':8001'),
('boulder-sa-1', 'boulder-sa-2')),
Service('boulder-ca-2',
8101, 9493, 'ca.boulder',
('./bin/boulder', 'boulder-ca', '--config', os.path.join(config_dir, 'ca.json'), '--addr', ':9493', '--debug-addr', ':8101'),
('boulder-sa-1', 'boulder-sa-2')),
Service('akamai-test-srv',
6789, None, None,
('./bin/akamai-test-srv', '--listen', 'localhost:6789', '--secret', 'its-a-secret'),
None),
Service('akamai-purger',
9666, None, None,
('./bin/boulder', 'akamai-purger', '--addr', ':9399', '--config', os.path.join(config_dir, 'akamai-purger.json'), '--debug-addr', ':9666'),
('akamai-test-srv',)),
Service('s3-test-srv',
4501, None, None,
('./bin/s3-test-srv', '--listen', 'localhost:4501'),
None),
Service('crl-storer',
9667, None, None,
('./bin/boulder', 'crl-storer', '--config', os.path.join(config_dir, 'crl-storer.json'), '--addr', ':9309', '--debug-addr', ':9667'),
('s3-test-srv',)),
Service('crl-updater',
8021, None, None,
('./bin/boulder', 'crl-updater', '--config', os.path.join(config_dir, 'crl-updater.json'), '--debug-addr', ':8021'),
('boulder-ca-1', 'boulder-ca-2', 'boulder-sa-1', 'boulder-sa-2', 'crl-storer')),
Service('boulder-ra-1',
8002, 9394, 'ra.boulder',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9394', '--debug-addr', ':8002'),
('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'akamai-purger', 'boulder-publisher-1', 'boulder-publisher-2')),
Service('boulder-ra-2',
8102, 9494, 'ra.boulder',
('./bin/boulder', 'boulder-ra', '--config', os.path.join(config_dir, 'ra.json'), '--addr', ':9494', '--debug-addr', ':8102'),
('boulder-sa-1', 'boulder-sa-2', 'boulder-ca-1', 'boulder-ca-2', 'boulder-va-1', 'boulder-va-2', 'akamai-purger', 'boulder-publisher-1', 'boulder-publisher-2')),
Service('bad-key-revoker',
8020, None, None,
('./bin/boulder', 'bad-key-revoker', '--config', os.path.join(config_dir, 'bad-key-revoker.json'), '--debug-addr', ':8020'),
('boulder-ra-1', 'boulder-ra-2', 'mail-test-srv')),
# Note: the nonce-service instances bind to specific ports, not "all interfaces",
# because they use their explicitly bound port in calculating the nonce
# prefix, which is used by WFEs when deciding where to redeem nonces.
# The `taro` and `zinc` instances simulate nonce services in two different
# datacenters. The WFE is configured to get nonces from one of these
# services, and potentially redeeem from either service (though in practice
# it will only redeem from the one that is configured for getting nonces).
Service('nonce-service-taro-1',
8111, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9301', '--debug-addr', ':8111',),
None),
Service('nonce-service-taro-2',
8113, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-a.json'), '--addr', '10.77.77.77:9501', '--debug-addr', ':8113',),
None),
Service('nonce-service-zinc-1',
8112, None, None,
('./bin/boulder', 'nonce-service', '--config', os.path.join(config_dir, 'nonce-b.json'), '--addr', '10.77.77.77:9401', '--debug-addr', ':8112',),
None),
Service('boulder-wfe2',
4001, None, None,
('./bin/boulder', 'boulder-wfe2', '--config', os.path.join(config_dir, 'wfe2.json'), '--addr', ':4001', '--tls-addr', ':4431', '--debug-addr', ':8013'),
('boulder-ra-1', 'boulder-ra-2', 'boulder-sa-1', 'boulder-sa-2', 'nonce-service-taro-1', 'nonce-service-taro-2', 'nonce-service-zinc-1')),
Service('log-validator',
8016, None, None,
('./bin/boulder', 'log-validator', '--config', os.path.join(config_dir, 'log-validator.json'), '--debug-addr', ':8016'),
None),
)
def _service_toposort(services):
"""Yields Service objects in topologically sorted order.
No service will be yielded until every service listed in its deps value
has been yielded.
"""
ready = set([s for s in services if not s.deps])
blocked = set(services) - ready
done = set()
while ready:
service = ready.pop()
yield service
done.add(service.name)
new = set([s for s in blocked if all([d in done for d in s.deps])])
ready |= new
blocked -= new
if blocked:
print("WARNING: services with unsatisfied dependencies:")
for s in blocked:
print(s.name, ":", s.deps)
raise(Exception("Unable to satisfy service dependencies"))
processes = []
# NOTE(@cpu): We manage the challSrvProcess separately from the other global
# processes because we want integration tests to be able to stop/start it (e.g.
# to run the load-generator).
challSrvProcess = None
def install(race_detection):
# Pass empty BUILD_TIME and BUILD_ID flags to avoid constantly invalidating the
# build cache with new BUILD_TIMEs, or invalidating it on merges with a new
# BUILD_ID.
go_build_flags='-tags "integration"'
if race_detection:
go_build_flags += ' -race'
return subprocess.call(["/usr/bin/make", "GO_BUILD_FLAGS=%s" % go_build_flags]) == 0
def run(cmd, fakeclock):
e = os.environ.copy()
e.setdefault("GORACE", "halt_on_error=1")
if fakeclock:
e.setdefault("FAKECLOCK", fakeclock)
p = subprocess.Popen(cmd, env=e)
p.cmd = cmd
return p
def start(fakeclock):
"""Return True if everything builds and starts.
Give up and return False if anything fails to build, or dies at
startup. Anything that did start before this point can be cleaned
up explicitly by calling stop(), or automatically atexit.
"""
signal.signal(signal.SIGTERM, lambda _, __: stop())
signal.signal(signal.SIGINT, lambda _, __: stop())
# Check that we can resolve the service names before we try to start any
# services. This prevents a confusing error (timed out health check).
try:
socket.getaddrinfo('publisher.service.consul', None)
except Exception as e:
print("Error querying DNS. Is consul running? `docker compose ps bconsul`. %s" % (e))
return False
# Start the pebble-challtestsrv first so it can be used to resolve DNS for
# gRPC.
startChallSrv()
# Processes are in order of dependency: Each process should be started
# before any services that intend to send it RPCs. On shutdown they will be
# killed in reverse order.
for service in _service_toposort(SERVICES):
print("Starting service", service.name)
try:
global processes
p = run(service.cmd, fakeclock)
processes.append(p)
if service.grpc_port is not None:
waithealth(' '.join(p.args), service.grpc_port, service.host_override)
else:
if not waitport(service.debug_port, ' '.join(p.args), perTickCheck=check):
return False
except Exception as e:
print("Error starting service %s: %s" % (service.name, e))
return False
print("All servers running. Hit ^C to kill.")
return True
def check():
"""Return true if all started processes are still alive.
Log about anything that died. The pebble-challtestsrv is not considered when
checking processes.
"""
global processes
busted = []
stillok = []
for p in processes:
if p.poll() is None:
stillok.append(p)
else:
busted.append(p)
if busted:
print("\n\nThese processes exited early (check above for their output):")
for p in busted:
print("\t'%s' with pid %d exited %d" % (p.cmd, p.pid, p.returncode))
processes = stillok
return not busted
def startChallSrv():
"""
Start the pebble-challtestsrv and wait for it to become available. See also
stopChallSrv.
"""
global challSrvProcess
if challSrvProcess is not None:
raise(Exception("startChallSrv called more than once"))
# NOTE(@cpu): We specify explicit bind addresses for -https01 and
# --tlsalpn01 here to allow HTTPS HTTP-01 responses on 443 for on interface
# and TLS-ALPN-01 responses on 443 for another interface. The choice of
# which is used is controlled by mock DNS data added by the relevant
# integration tests.
challSrvProcess = run([
'pebble-challtestsrv',
'--defaultIPv4', os.environ.get("FAKE_DNS"),
'-defaultIPv6', '',
'--dns01', ':8053,:8054',
'--doh', ':8343,:8443',
'--doh-cert', 'test/certs/ipki/10.77.77.77/cert.pem',
'--doh-cert-key', 'test/certs/ipki/10.77.77.77/key.pem',
'--management', ':8055',
'--http01', '10.77.77.77:80',
'-https01', '10.77.77.77:443',
'--tlsalpn01', '10.88.88.88:443'],
None)
# Wait for the pebble-challtestsrv management port.
if not waitport(8055, ' '.join(challSrvProcess.args)):
return False
def stopChallSrv():
"""
Stop the running pebble-challtestsrv (if any) and wait for it to terminate.
See also startChallSrv.
"""
global challSrvProcess
if challSrvProcess is None:
return
if challSrvProcess.poll() is None:
challSrvProcess.send_signal(signal.SIGTERM)
challSrvProcess.wait()
challSrvProcess = None
@atexit.register
def stop():
# When we are about to exit, send SIGTERM to each subprocess and wait for
# them to nicely die. This reflects the restart process in prod and allows
# us to exercise the graceful shutdown code paths.
global processes
for p in reversed(processes):
if p.poll() is None:
p.send_signal(signal.SIGTERM)
p.wait()
processes = []
# Also stop the challenge test server
stopChallSrv()