From dbcb16543e18e236466b6f4945c1cc09018dd24a Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Wed, 23 May 2018 06:47:14 -0700 Subject: [PATCH] Start using multiple-IP hostnames for load balancing (#3687) We'd like to start using the DNS load balancer in the latest version of gRPC. That means putting all IPs for a service under a single hostname (or using a SRV record, but we're not taking that path). This change adds an sd-test-srv to act as our service discovery DNS service. It returns both Boulder IP addresses for any A lookup ending in ".boulder". This change also sets up the Docker DNS for our boulder container to defer to sd-test-srv when it doesn't know an answer. sd-test-srv doesn't know how to resolve public Internet names like `github.com`. Resolving public names is required for the `godep-restore` test phase, so this change breaks out a copy of the boulder container that is used only for `godep-restore`. This change implements a shim of a DNS resolver for gRPC, so that we can switch to DNS-based load balancing with the currently vendored gRPC, then when we upgrade to the latest gRPC we won't need a simultaneous config update. Also, this change introduces a check at the end of the integration test that each backend received at least one RPC, ensuring that we are not sending all load to a single backend. --- .travis.yml | 12 ++-- docker-compose.yml | 17 +++++ grpc/client.go | 33 +++++++--- grpc/creds/creds.go | 23 ++++--- grpc/creds/creds_test.go | 4 +- grpc/dns_resolver.go | 53 ++++++++++++++++ test/config-next/admin-revoker.json | 4 +- test/config-next/ca.json | 2 +- test/config-next/expiration-mailer.json | 2 +- test/config-next/ocsp-updater.json | 6 +- test/config-next/orphan-finder.json | 2 +- test/config-next/publisher.json | 2 +- test/config-next/ra.json | 8 +-- test/config-next/va-remote-a.json | 5 +- test/config-next/va-remote-b.json | 5 +- test/config-next/wfe.json | 4 +- test/config-next/wfe2.json | 4 +- test/entrypoint.sh | 22 ++++--- test/integration-test.py | 27 +++++++- test/sd-test-srv/main.go | 84 +++++++++++++++++++++++++ test/startservers.py | 7 ++- 21 files changed, 272 insertions(+), 54 deletions(-) create mode 100644 grpc/dns_resolver.go create mode 100644 test/sd-test-srv/main.go diff --git a/.travis.yml b/.travis.yml index 79d9aa87b..96bee8e01 100644 --- a/.travis.yml +++ b/.travis.yml @@ -34,18 +34,22 @@ env: # # Current Go version build tasks: # - - RUN="vet fmt migrations integration godep-restore errcheck generate dashlint rpm" + - RUN="vet fmt migrations integration errcheck generate dashlint rpm" # Config changes that have landed in master but not yet been applied to # production can be made in boulder-config-next.json. - RUN="integration" BOULDER_CONFIG_DIR="test/config-next" - RUN="unit" - RUN="unit-next" BOULDER_CONFIG_DIR="test/config-next" - - RUN="coverage" + # godep-restore runs with a separate container because it needs to fetch + # packages from GitHub et. al., which is incompatible with the DNS server + # override in the boulder container (used for service discovery). + - RUN="godep-restore" CONTAINER="netaccess" + - RUN="coverage" CONTAINER="netaccess" matrix: fast_finish: true allow_failures: - - env: RUN="coverage" + - env: RUN="coverage" CONTAINER="netaccess" # We require a newer version of docker-compose than is installed by way of the # "services: docker" directive. Per the travis docs[0] this is best remedied @@ -63,4 +67,4 @@ install: - $HOME/bin/docker-compose pull script: - - $HOME/bin/docker-compose run --use-aliases -e BOULDER_CONFIG_DIR="${BOULDER_CONFIG_DIR}" -e RUN="${RUN}" -e TRAVIS="${TRAVIS}" -e TRAVIS_COMMIT="${TRAVIS_COMMIT}" -e TRAVIS_PULL_REQUEST="${TRAVIS_PULL_REQUEST}" -e TRAVIS_PULL_REQUEST="${TRAVIS_PULL_REQUEST}" -e TRAVIS_JOB_ID="${TRAVIS_JOB_ID}" -e COVERALLS_TOKEN="${COVERALLS_TOKEN}" boulder ./test.sh + - $HOME/bin/docker-compose run --use-aliases -e BOULDER_CONFIG_DIR="${BOULDER_CONFIG_DIR}" -e RUN="${RUN}" -e TRAVIS="${TRAVIS}" -e TRAVIS_COMMIT="${TRAVIS_COMMIT}" -e TRAVIS_PULL_REQUEST="${TRAVIS_PULL_REQUEST}" -e TRAVIS_PULL_REQUEST="${TRAVIS_PULL_REQUEST}" -e TRAVIS_JOB_ID="${TRAVIS_JOB_ID}" -e COVERALLS_TOKEN="${COVERALLS_TOKEN}" ${CONTAINER:-boulder} ./test.sh diff --git a/docker-compose.yml b/docker-compose.yml index 928b42904..bc1e9c3d2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,13 @@ services: - ra2.boulder - va2.boulder - publisher2.boulder + # Use sd-test-srv as a backup to Docker's embedded DNS server + # (https://docs.docker.com/config/containers/container-networking/#dns-services). + # If there's a name Docker's DNS server doesn't know about, it will + # forward the query to this IP (running sd-test-srv). We have + # special logic there that will return multiple IP addresses for + # service names. + dns: 10.77.77.77 ports: - 4000:4000 # ACME - 4001:4001 # ACMEv2 @@ -65,6 +72,16 @@ services: command: mysqld --bind-address=0.0.0.0 logging: driver: none + netaccess: + image: letsencrypt/boulder-tools-go${TRAVIS_GO_VERSION:-1.10.2}:2018-05-04 + networks: + - bluenet + volumes: + - .:/go/src/github.com/letsencrypt/boulder + working_dir: /go/src/github.com/letsencrypt/boulder + entrypoint: test/entrypoint.sh + depends_on: + - bmysql networks: bluenet: diff --git a/grpc/client.go b/grpc/client.go index 3ecd2c849..8069b6ecb 100644 --- a/grpc/client.go +++ b/grpc/client.go @@ -3,6 +3,7 @@ package grpc import ( "crypto/tls" "fmt" + "net" "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/jmhodges/clock" @@ -26,13 +27,31 @@ func ClientSetup(c *cmd.GRPCClientConfig, tls *tls.Config, metrics clientMetrics } ci := clientInterceptor{c.Timeout.Duration, metrics, clk} - creds := bcreds.NewClientCredentials(tls.RootCAs, tls.Certificates) - return grpc.Dial( - "", // Since our staticResolver provides addresses we don't need to pass an address here - grpc.WithTransportCredentials(creds), - grpc.WithBalancer(grpc.RoundRobin(newStaticResolver(c.ServerAddresses))), - grpc.WithUnaryInterceptor(ci.intercept), - ) + // When there's only one server address, we use our custom newDNSResolver, + // intended as a temporary shim until we upgrade to a version of gRPC that has + // its own built-in DNS resolver. This works equally well when there's only + // one IP for a hostname or when there are multiple IPs for the hostname. + if len(c.ServerAddresses) == 1 { + host, port, err := net.SplitHostPort(c.ServerAddresses[0]) + if err != nil { + return nil, err + } + creds := bcreds.NewClientCredentials(tls.RootCAs, tls.Certificates, host) + return grpc.Dial( + c.ServerAddresses[0], + grpc.WithTransportCredentials(creds), + grpc.WithBalancer(grpc.RoundRobin(newDNSResolver(host, port))), + grpc.WithUnaryInterceptor(ci.intercept), + ) + } else { + creds := bcreds.NewClientCredentials(tls.RootCAs, tls.Certificates, "") + return grpc.Dial( + "", // Since our staticResolver provides addresses we don't need to pass an address here + grpc.WithTransportCredentials(creds), + grpc.WithBalancer(grpc.RoundRobin(newStaticResolver(c.ServerAddresses))), + grpc.WithUnaryInterceptor(ci.intercept), + ) + } } type registry interface { diff --git a/grpc/creds/creds.go b/grpc/creds/creds.go index 3ddf4e0db..b901227fd 100644 --- a/grpc/creds/creds.go +++ b/grpc/creds/creds.go @@ -40,11 +40,14 @@ func (e SANNotAcceptedErr) Error() string { type clientTransportCredentials struct { roots *x509.CertPool clients []tls.Certificate + // If set, this is used as the hostname to validate on certificates, instead + // of the value passed to ClientHandshake by grpc. + hostOverride string } // NewClientCredentials returns a new initialized grpc/credentials.TransportCredentials for client usage -func NewClientCredentials(rootCAs *x509.CertPool, clientCerts []tls.Certificate) credentials.TransportCredentials { - return &clientTransportCredentials{rootCAs, clientCerts} +func NewClientCredentials(rootCAs *x509.CertPool, clientCerts []tls.Certificate, hostOverride string) credentials.TransportCredentials { + return &clientTransportCredentials{rootCAs, clientCerts, hostOverride} } // ClientHandshake does the authentication handshake specified by the corresponding @@ -52,11 +55,15 @@ func NewClientCredentials(rootCAs *x509.CertPool, clientCerts []tls.Certificate) // connection and the corresponding auth information about the connection. // Implementations must use the provided context to implement timely cancellation. func (tc *clientTransportCredentials) ClientHandshake(ctx context.Context, addr string, rawConn net.Conn) (net.Conn, credentials.AuthInfo, error) { - // IMPORTANT: Don't wrap the errors returned from this method. gRPC expects to be - // able to check err.Temporary to spot temporary errors and reconnect when they happen. - host, _, err := net.SplitHostPort(addr) - if err != nil { - return nil, nil, err + var err error + host := tc.hostOverride + if host == "" { + // IMPORTANT: Don't wrap the errors returned from this method. gRPC expects to be + // able to check err.Temporary to spot temporary errors and reconnect when they happen. + host, _, err = net.SplitHostPort(addr) + if err != nil { + return nil, nil, err + } } conn := tls.Client(rawConn, &tls.Config{ ServerName: host, @@ -107,7 +114,7 @@ func (tc *clientTransportCredentials) RequireTransportSecurity() bool { // Clone returns a copy of the clientTransportCredentials func (tc *clientTransportCredentials) Clone() credentials.TransportCredentials { - return NewClientCredentials(tc.roots, tc.clients) + return NewClientCredentials(tc.roots, tc.clients, tc.hostOverride) } // OverrideServerName is not implemented and here only to satisfy the interface diff --git a/grpc/creds/creds_test.go b/grpc/creds/creds_test.go index 91e0dbeba..78ef7c3c4 100644 --- a/grpc/creds/creds_test.go +++ b/grpc/creds/creds_test.go @@ -113,7 +113,7 @@ func TestClientTransportCredentials(t *testing.T) { serverB := httptest.NewUnstartedServer(nil) serverB.TLS = &tls.Config{Certificates: []tls.Certificate{{Certificate: [][]byte{derB}, PrivateKey: priv}}} - tc := NewClientCredentials(roots, []tls.Certificate{}) + tc := NewClientCredentials(roots, []tls.Certificate{}, "") serverA.StartTLS() defer serverA.Close() @@ -195,7 +195,7 @@ func (bc *brokenConn) SetReadDeadline(time.Time) error { return nil } func (bc *brokenConn) SetWriteDeadline(time.Time) error { return nil } func TestClientReset(t *testing.T) { - tc := NewClientCredentials(nil, []tls.Certificate{}) + tc := NewClientCredentials(nil, []tls.Certificate{}, "") _, _, err := tc.ClientHandshake(context.Background(), "T:1010", &brokenConn{}) test.AssertError(t, err, "ClientHandshake succeeded with brokenConn") _, ok := err.(interface { diff --git a/grpc/dns_resolver.go b/grpc/dns_resolver.go new file mode 100644 index 000000000..94dc67380 --- /dev/null +++ b/grpc/dns_resolver.go @@ -0,0 +1,53 @@ +package grpc + +import ( + "context" + "net" + + "google.golang.org/grpc/naming" +) + +// dnsResolver implements both the naming.Resolver and naming.Watcher +// interfaces. It's a temporary shim until we upgrade to the latest gRPC, which +// has a built-in DNS resolver. It looks up the hostname only once; it doesn't +// monitor for changes. +type dnsResolver struct { + host, port string + // ch is used to enforce the "lookup only once" behavior. + ch chan bool +} + +func newDNSResolver(host, port string) *dnsResolver { + return &dnsResolver{ + host: host, + port: port, + ch: make(chan bool, 1), + } +} + +func (dr *dnsResolver) Resolve(target string) (naming.Watcher, error) { + return dr, nil +} + +// Next is called in a loop by grpc.RoundRobin expecting updates. We provide a +// single update then block forever. +func (dr *dnsResolver) Next() ([]*naming.Update, error) { + // Stick a value on the channel, which has capacity 1. This will succeed once, + // then each subsequent call will block forever. + dr.ch <- true + addrs, err := net.DefaultResolver.LookupHost(context.Background(), dr.host) + if err != nil { + return nil, err + } + var updates []*naming.Update + for _, ip := range addrs { + updates = append(updates, &naming.Update{ + Op: naming.Add, + Addr: net.JoinHostPort(ip, dr.port), + }) + } + return updates, nil +} + +// Close does nothing +func (dr *dnsResolver) Close() {} diff --git a/test/config-next/admin-revoker.json b/test/config-next/admin-revoker.json index d4e52de4f..b2d27971b 100644 --- a/test/config-next/admin-revoker.json +++ b/test/config-next/admin-revoker.json @@ -8,11 +8,11 @@ "keyFile": "test/grpc-creds/admin-revoker.boulder/key.pem" }, "raService": { - "serverAddresses": ["ra1.boulder:9094", "ra2.boulder:9094"], + "serverAddresses": ["ra.boulder:9094"], "timeout": "15s" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" } }, diff --git a/test/config-next/ca.json b/test/config-next/ca.json index 44126e931..649a464ad 100644 --- a/test/config-next/ca.json +++ b/test/config-next/ca.json @@ -11,7 +11,7 @@ "keyFile": "test/grpc-creds/ca.boulder/key.pem" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "grpcCA": { diff --git a/test/config-next/expiration-mailer.json b/test/config-next/expiration-mailer.json index b98956e46..bf21fb6ea 100644 --- a/test/config-next/expiration-mailer.json +++ b/test/config-next/expiration-mailer.json @@ -18,7 +18,7 @@ "keyFile": "test/grpc-creds/expiration-mailer.boulder/key.pem" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "SMTPTrustedRootFile": "test/mail-test-srv/minica.pem", diff --git a/test/config-next/ocsp-updater.json b/test/config-next/ocsp-updater.json index cd455b6a7..a435bc7fc 100644 --- a/test/config-next/ocsp-updater.json +++ b/test/config-next/ocsp-updater.json @@ -23,15 +23,15 @@ "keyFile": "test/grpc-creds/ocsp-updater.boulder/key.pem" }, "publisher": { - "serverAddresses": ["publisher1.boulder:9091", "publisher2.boulder:9091"], + "serverAddresses": ["publisher.boulder:9091"], "timeout": "10s" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "ocspGeneratorService": { - "serverAddresses": ["ca1.boulder:9096", "ca2.boulder:9096"], + "serverAddresses": ["ca.boulder:9096"], "timeout": "15s" }, "features": { diff --git a/test/config-next/orphan-finder.json b/test/config-next/orphan-finder.json index 25ccceff4..1f76b8b8a 100644 --- a/test/config-next/orphan-finder.json +++ b/test/config-next/orphan-finder.json @@ -12,7 +12,7 @@ }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" } } diff --git a/test/config-next/publisher.json b/test/config-next/publisher.json index 2170fcf2d..5dd394799 100644 --- a/test/config-next/publisher.json +++ b/test/config-next/publisher.json @@ -17,7 +17,7 @@ "keyFile": "test/grpc-creds/publisher.boulder/key.pem" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "features": { diff --git a/test/config-next/ra.json b/test/config-next/ra.json index 3c53b1eb5..7cc615658 100644 --- a/test/config-next/ra.json +++ b/test/config-next/ra.json @@ -22,19 +22,19 @@ "keyFile": "test/grpc-creds/ra.boulder/key.pem" }, "vaService": { - "serverAddresses": ["va1.boulder:9092", "va1.boulder:9092"], + "serverAddresses": ["va.boulder:9092"], "timeout": "20s" }, "caService": { - "serverAddresses": ["ca1.boulder:9093", "ca1.boulder:9093"], + "serverAddresses": ["ca.boulder:9093"], "timeout": "15s" }, "publisherService": { - "serverAddresses": ["publisher1.boulder:9091", "publisher1.boulder:9091"], + "serverAddresses": ["publisher.boulder:9091"], "timeout": "300s" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa1.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "grpc": { diff --git a/test/config-next/va-remote-a.json b/test/config-next/va-remote-a.json index 00147d695..7e3026a51 100644 --- a/test/config-next/va-remote-a.json +++ b/test/config-next/va-remote-a.json @@ -9,6 +9,10 @@ "tlsPort": 5001 }, "dnsTries": 3, + "dnsResolvers": [ + "127.0.0.1:8053", + "127.0.0.1:8054" + ], "issuerDomain": "happy-hacker-ca.invalid", "tls": { "caCertfile": "test/grpc-creds/minica.pem", @@ -34,7 +38,6 @@ }, "common": { - "dnsResolver": "127.0.0.1:8053", "dnsTimeout": "1s", "dnsAllowLoopbackAddresses": true } diff --git a/test/config-next/va-remote-b.json b/test/config-next/va-remote-b.json index d75b60cb7..5f8dd9b0b 100644 --- a/test/config-next/va-remote-b.json +++ b/test/config-next/va-remote-b.json @@ -9,6 +9,10 @@ "tlsPort": 5001 }, "dnsTries": 3, + "dnsResolvers": [ + "127.0.0.1:8053", + "127.0.0.1:8054" + ], "issuerDomain": "happy-hacker-ca.invalid", "tls": { "caCertfile": "test/grpc-creds/minica.pem", @@ -34,7 +38,6 @@ }, "common": { - "dnsResolver": "127.0.0.1:8053", "dnsTimeout": "1s", "dnsAllowLoopbackAddresses": true } diff --git a/test/config-next/wfe.json b/test/config-next/wfe.json index ee33ceb5c..1a8ee44d1 100644 --- a/test/config-next/wfe.json +++ b/test/config-next/wfe.json @@ -23,11 +23,11 @@ "keyFile": "test/grpc-creds/wfe.boulder/key.pem" }, "raService": { - "serverAddresses": ["ra1.boulder:9094", "ra2.boulder:9094"], + "serverAddresses": ["ra.boulder:9094"], "timeout": "20s" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "features": { diff --git a/test/config-next/wfe2.json b/test/config-next/wfe2.json index 66d575167..431305655 100644 --- a/test/config-next/wfe2.json +++ b/test/config-next/wfe2.json @@ -24,11 +24,11 @@ "keyFile": "test/grpc-creds/wfe.boulder/key.pem" }, "raService": { - "serverAddresses": ["ra1.boulder:9094", "ra2.boulder:9094"], + "serverAddresses": ["ra.boulder:9094"], "timeout": "15s" }, "saService": { - "serverAddresses": ["sa1.boulder:9095", "sa2.boulder:9095"], + "serverAddresses": ["sa.boulder:9095"], "timeout": "15s" }, "certificateChains": { diff --git a/test/entrypoint.sh b/test/entrypoint.sh index 1c70754bf..73c6e66ff 100755 --- a/test/entrypoint.sh +++ b/test/entrypoint.sh @@ -31,17 +31,19 @@ wait_tcp_port boulder-mysql 3306 # create the database MYSQL_CONTAINER=1 $DIR/create_db.sh -# Delaying loading private key into SoftHSM container until now so that switching -# out the signing key doesn't require rebuilding the boulder-tools image. Only -# convert key to DER once per container. -wait_tcp_port boulder-hsm 5657 +if [ -n "${PKCS11_PROXY_SOCKET:-}" ]; then + # Delaying loading private key into SoftHSM container until now so that switching + # out the signing key doesn't require rebuilding the boulder-tools image. Only + # convert key to DER once per container. + wait_tcp_port boulder-hsm 5657 -addkey() { - pkcs11-tool --module=/usr/local/lib/libpkcs11-proxy.so \ - --type privkey --pin 5678 --login --so-pin 1234 "$@"; -} -addkey --token-label intermediate --write-object test/test-ca.key.der --label intermediate_key -addkey --token-label root --write-object test/test-root.key.der --label root_key + addkey() { + pkcs11-tool --module=/usr/local/lib/libpkcs11-proxy.so \ + --type privkey --pin 5678 --login --so-pin 1234 "$@"; + } + addkey --token-label intermediate --write-object test/test-ca.key.der --label intermediate_key + addkey --token-label root --write-object test/test-root.key.der --label root_key +fi if [[ $# -eq 0 ]]; then exec ./start.py diff --git a/test/integration-test.py b/test/integration-test.py index 5b36c6acc..f60e2a6b0 100644 --- a/test/integration-test.py +++ b/test/integration-test.py @@ -270,7 +270,7 @@ def random_domain(): return "rand.%x.xyz" % random.randrange(2**32) def test_expiration_mailer(): - email_addr = "integration.%x@boulder.local" % random.randrange(2**16) + email_addr = "integration.%x@boulder" % random.randrange(2**16) cert, _ = auth_and_issue([random_domain()], email=email_addr) # Check that the expiration mailer sends a reminder expiry = datetime.datetime.strptime(cert.body.get_notAfter(), '%Y%m%d%H%M%SZ') @@ -616,6 +616,7 @@ def main(): run(args.custom) run_cert_checker() + check_balance() run_expired_authz_purger() if not startservers.check(): @@ -641,6 +642,30 @@ def run_loadtest(): -config test/load-generator/config/v2-integration-test-config.json\ -results %s" % latency_data_file) +def check_balance(): + """Verify that gRPC load balancing across backends is working correctly. + + Fetch metrics from each backend and ensure the grpc_server_handled_total + metric is present, which means that backend handled at least one request. + """ + addresses = [ + "sa1.boulder:8003", + "sa2.boulder:8103", + "publisher1.boulder:8009", + "publisher2.boulder:8109", + "va1.boulder:8004", + "va2.boulder:8104", + "ca1.boulder:8001", + "ca2.boulder:8104", + "ra1.boulder:8002", + "ra2.boulder:8102", + ] + for address in addresses: + metrics = requests.get("http://%s/metrics" % address) + if not "grpc_server_handled_total" in metrics.text: + raise Exception("no gRPC traffic processed by %s; load balancing problem?" + % address) + def run_cert_checker(): run("./bin/cert-checker -config %s/cert-checker.json" % default_config_dir) diff --git a/test/sd-test-srv/main.go b/test/sd-test-srv/main.go new file mode 100644 index 000000000..a28683b1a --- /dev/null +++ b/test/sd-test-srv/main.go @@ -0,0 +1,84 @@ +// sd-test-srv runs a simple service discovery system; it returns two hardcoded +// IP addresses for every A query. +package main + +import ( + "flag" + "log" + "net" + "strings" + "time" + + "github.com/miekg/dns" +) + +func dnsHandler(w dns.ResponseWriter, r *dns.Msg) { + m := new(dns.Msg) + m.SetReply(r) + m.Compress = false + + if len(r.Question) != 1 { + m.Rcode = dns.RcodeServerFailure + w.WriteMsg(m) + return + } + if !strings.HasSuffix(r.Question[0].Name, ".boulder.") { + m.Rcode = dns.RcodeServerFailure + w.WriteMsg(m) + return + } + + hdr := dns.RR_Header{ + Name: r.Question[0].Name, + Rrtype: dns.TypeA, + Class: dns.ClassINET, + Ttl: 0, + } + + // These two hardcoded IPs correspond to the configured addresses for boulder + // in docker-compose.yml. In our Docker setup, boulder is present on two + // networks, rednet and bluenet, with a different IP address on each. This + // allows us to test load balance across gRPC backends. + m.Answer = append(m.Answer, &dns.A{ + A: net.ParseIP("10.77.77.77"), + Hdr: hdr, + }, &dns.A{ + A: net.ParseIP("10.88.88.88"), + Hdr: hdr, + }) + + w.WriteMsg(m) + return +} + +func main() { + listen := flag.String("listen", ":53", "Address and port to listen on.") + flag.Parse() + if *listen == "" { + flag.Usage() + return + } + dns.HandleFunc(".", dnsHandler) + go func() { + srv := dns.Server{ + Addr: *listen, + Net: "tcp", + ReadTimeout: time.Second, + WriteTimeout: time.Second, + } + err := srv.ListenAndServe() + if err != nil { + log.Fatal(err) + } + }() + srv := dns.Server{ + Addr: *listen, + Net: "udp", + ReadTimeout: time.Second, + WriteTimeout: time.Second, + } + err := srv.ListenAndServe() + if err != nil { + log.Fatal(err) + } +} diff --git a/test/startservers.py b/test/startservers.py index e63f6bc16..05105b603 100644 --- a/test/startservers.py +++ b/test/startservers.py @@ -40,7 +40,7 @@ def run(cmd, race_detection, fakeclock): def waitport(port, prog): """Wait until a port on localhost is open.""" - while True: + for _ in range(1000): try: time.sleep(0.1) # If one of the servers has died, quit immediately. @@ -49,13 +49,13 @@ def waitport(port, prog): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(('localhost', port)) s.close() - break + return True except socket.error as e: if e.errno == errno.ECONNREFUSED: print "Waiting for debug port %d (%s)" % (port, prog) else: raise - return True + raise Exception("timed out waiting for debug port %d (%s)" % (port, prog)) def start(race_detection, fakeclock=None): """Return True if everything builds and starts. @@ -79,6 +79,7 @@ def start(race_detection, fakeclock=None): [8012, 'boulder-va --config %s' % os.path.join(default_config_dir, "va-remote-b.json")], ]) progs.extend([ + [53, 'sd-test-srv --listen :53'], # Service discovery DNS server [8003, 'boulder-sa --config %s --addr sa1.boulder:9095 --debug-addr :8003' % os.path.join(default_config_dir, "sa.json")], [8103, 'boulder-sa --config %s --addr sa2.boulder:9095 --debug-addr :8103' % os.path.join(default_config_dir, "sa.json")], [4500, 'ct-test-srv --config test/ct-test-srv/ct-test-srv.json'],