From fcf817f67c28e269b142a35c2c3cd488445d595a Mon Sep 17 00:00:00 2001 From: Doug Fawley Date: Thu, 19 Dec 2019 08:53:07 -0800 Subject: [PATCH] dns: report errors from A record lookups instead of zero addresses (#3258) --- internal/resolver/dns/dns_resolver.go | 85 +++++++++++++--------- internal/resolver/dns/dns_resolver_test.go | 55 ++++++++++++-- 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/internal/resolver/dns/dns_resolver.go b/internal/resolver/dns/dns_resolver.go index 7705ca22e..1505754b0 100644 --- a/internal/resolver/dns/dns_resolver.go +++ b/internal/resolver/dns/dns_resolver.go @@ -204,8 +204,12 @@ func (d *dnsResolver) watcher() { case <-d.rn: } - state := d.lookup() - d.cc.UpdateState(*state) + state, err := d.lookup() + if err != nil { + d.cc.ReportError(err) + } else { + d.cc.UpdateState(*state) + } // Sleep to prevent excessive re-resolutions. Incoming resolution requests // will be queued in d.rn. @@ -219,33 +223,37 @@ func (d *dnsResolver) watcher() { } } -func (d *dnsResolver) lookupSRV() []resolver.Address { +func (d *dnsResolver) lookupSRV() ([]resolver.Address, error) { if !EnableSRVLookups { - return nil + return nil, nil } var newAddrs []resolver.Address _, srvs, err := d.resolver.LookupSRV(d.ctx, "grpclb", "tcp", d.host) if err != nil { - grpclog.Infof("grpc: failed dns SRV record lookup due to %v.\n", err) - return nil + err = handleDNSError(err, "SRV") // may become nil + return nil, err } for _, s := range srvs { lbAddrs, err := d.resolver.LookupHost(d.ctx, s.Target) if err != nil { - grpclog.Infof("grpc: failed load balancer address dns lookup due to %v.\n", err) - continue - } - for _, a := range lbAddrs { - a, ok := formatIP(a) - if !ok { - grpclog.Errorf("grpc: failed IP parsing due to %v.\n", err) + err = handleDNSError(err, "A") // may become nil + if err == nil { + // If there are other SRV records, look them up and ignore this + // one that does not exist. continue } - addr := a + ":" + strconv.Itoa(int(s.Port)) + return nil, err + } + for _, a := range lbAddrs { + ip, ok := formatIP(a) + if !ok { + return nil, fmt.Errorf("dns: error parsing A record IP address %v", a) + } + addr := ip + ":" + strconv.Itoa(int(s.Port)) newAddrs = append(newAddrs, resolver.Address{Addr: addr, Type: resolver.GRPCLB, ServerName: s.Target}) } } - return newAddrs + return newAddrs, nil } var filterError = func(err error) error { @@ -258,13 +266,19 @@ var filterError = func(err error) error { return err } +func handleDNSError(err error, lookupType string) error { + err = filterError(err) + if err != nil { + err = fmt.Errorf("dns: %v record lookup error: %v", lookupType, err) + grpclog.Infoln(err) + } + return err +} + func (d *dnsResolver) lookupTXT() *serviceconfig.ParseResult { ss, err := d.resolver.LookupTXT(d.ctx, txtPrefix+d.host) if err != nil { - err = filterError(err) - if err != nil { - err = fmt.Errorf("error from DNS TXT record lookup: %v", err) - grpclog.Infoln("grpc:", err) + if err = handleDNSError(err, "TXT"); err != nil { return &serviceconfig.ParseResult{Err: err} } return nil @@ -276,7 +290,7 @@ func (d *dnsResolver) lookupTXT() *serviceconfig.ParseResult { // TXT record must have "grpc_config=" attribute in order to be used as service config. if !strings.HasPrefix(res, txtAttribute) { - grpclog.Warningf("grpc: DNS TXT record %v missing %v attribute", res, txtAttribute) + grpclog.Warningf("dns: TXT record %v missing %v attribute", res, txtAttribute) // This is not an error; it is the equivalent of not having a service config. return nil } @@ -284,34 +298,37 @@ func (d *dnsResolver) lookupTXT() *serviceconfig.ParseResult { return d.cc.ParseServiceConfig(sc) } -func (d *dnsResolver) lookupHost() []resolver.Address { +func (d *dnsResolver) lookupHost() ([]resolver.Address, error) { var newAddrs []resolver.Address addrs, err := d.resolver.LookupHost(d.ctx, d.host) if err != nil { - grpclog.Warningf("grpc: failed dns A record lookup due to %v.\n", err) - return nil + err = handleDNSError(err, "A") + return nil, err } for _, a := range addrs { - a, ok := formatIP(a) + ip, ok := formatIP(a) if !ok { - grpclog.Errorf("grpc: failed IP parsing due to %v.\n", err) - continue + return nil, fmt.Errorf("dns: error parsing A record IP address %v", a) } - addr := a + ":" + d.port + addr := ip + ":" + d.port newAddrs = append(newAddrs, resolver.Address{Addr: addr}) } - return newAddrs + return newAddrs, nil } -func (d *dnsResolver) lookup() *resolver.State { - srv := d.lookupSRV() +func (d *dnsResolver) lookup() (*resolver.State, error) { + srv, srvErr := d.lookupSRV() + addrs, hostErr := d.lookupHost() + if hostErr != nil && (srvErr != nil || len(srv) == 0) { + return nil, hostErr + } state := &resolver.State{ - Addresses: append(d.lookupHost(), srv...), + Addresses: append(addrs, srv...), } if !d.disableServiceConfig { state.ServiceConfig = d.lookupTXT() } - return state + return state, nil } // formatIP returns ok = false if addr is not a valid textual representation of an IP address. @@ -397,12 +414,12 @@ func canaryingSC(js string) string { var rcs []rawChoice err := json.Unmarshal([]byte(js), &rcs) if err != nil { - grpclog.Warningf("grpc: failed to parse service config json string due to %v.\n", err) + grpclog.Warningf("dns: error parsing service config json: %v", err) return "" } cliHostname, err := os.Hostname() if err != nil { - grpclog.Warningf("grpc: failed to get client hostname due to %v.\n", err) + grpclog.Warningf("dns: error getting client hostname: %v", err) return "" } var sc string diff --git a/internal/resolver/dns/dns_resolver_test.go b/internal/resolver/dns/dns_resolver_test.go index 829d7b97f..aeb16de60 100644 --- a/internal/resolver/dns/dns_resolver_test.go +++ b/internal/resolver/dns/dns_resolver_test.go @@ -25,6 +25,7 @@ import ( "net" "os" "reflect" + "strings" "sync" "testing" "time" @@ -53,6 +54,7 @@ type testClientConn struct { m1 sync.Mutex state resolver.State updateStateCalls int + errChan chan error } func (t *testClientConn) UpdateState(s resolver.State) { @@ -87,8 +89,8 @@ func (t *testClientConn) ParseServiceConfig(s string) *serviceconfig.ParseResult return &serviceconfig.ParseResult{Config: unparsedServiceConfig{config: s}} } -func (t *testClientConn) ReportError(error) { - panic("not implemented") +func (t *testClientConn) ReportError(err error) { + t.errChan <- err } type testResolver struct { @@ -139,6 +141,9 @@ var hostLookupTbl = struct { "foo.bar.com": {"1.2.3.4", "5.6.7.8"}, "ipv4.single.fake": {"1.2.3.4"}, "srv.ipv4.single.fake": {"2.4.6.8"}, + "srv.ipv4.multi.fake": {}, + "srv.ipv6.single.fake": {}, + "srv.ipv6.multi.fake": {}, "ipv4.multi.fake": {"1.2.3.4", "5.6.7.8", "9.10.11.12"}, "ipv6.single.fake": {"2607:f8b0:400a:801::1001"}, "ipv6.multi.fake": {"2607:f8b0:400a:801::1001", "2607:f8b0:400a:801::1002", "2607:f8b0:400a:801::1003"}, @@ -148,10 +153,15 @@ var hostLookupTbl = struct { func hostLookup(host string) ([]string, error) { hostLookupTbl.Lock() defer hostLookupTbl.Unlock() - if addrs, cnt := hostLookupTbl.tbl[host]; cnt { + if addrs, ok := hostLookupTbl.tbl[host]; ok { return addrs, nil } - return nil, fmt.Errorf("failed to lookup host:%s resolution in hostLookupTbl", host) + return nil, &net.DNSError{ + Err: "hostLookup error", + Name: host, + Server: "fake", + IsTemporary: true, + } } var srvLookupTbl = struct { @@ -173,7 +183,12 @@ func srvLookup(service, proto, name string) (string, []*net.SRV, error) { if srvs, cnt := srvLookupTbl.tbl[cname]; cnt { return cname, srvs, nil } - return "", nil, fmt.Errorf("failed to lookup srv record for %s in srvLookupTbl", cname) + return "", nil, &net.DNSError{ + Err: "srvLookup error", + Name: cname, + Server: "fake", + IsTemporary: true, + } } // scfs contains an array of service config file string in JSON format. @@ -635,7 +650,12 @@ func txtLookup(host string) ([]string, error) { if scs, cnt := txtLookupTbl.tbl[host]; cnt { return scs, nil } - return nil, fmt.Errorf("failed to lookup TXT:%s resolution in txtLookupTbl", host) + return nil, &net.DNSError{ + Err: "txtLookup error", + Name: host, + Server: "fake", + IsTemporary: true, + } } func TestResolve(t *testing.T) { @@ -962,7 +982,7 @@ func TestResolveFunc(t *testing.T) { b := NewBuilder() for _, v := range tests { - cc := &testClientConn{target: v.addr} + cc := &testClientConn{target: v.addr, errChan: make(chan error, 1)} r, err := b.Build(resolver.Target{Endpoint: v.addr}, cc, resolver.BuildOptions{}) if err == nil { r.Close() @@ -1155,7 +1175,7 @@ func TestCustomAuthority(t *testing.T) { } b := NewBuilder() - cc := &testClientConn{target: "foo.bar.com"} + cc := &testClientConn{target: "foo.bar.com", errChan: make(chan error, 1)} r, err := b.Build(resolver.Target{Endpoint: "foo.bar.com", Authority: a.authority}, cc, resolver.BuildOptions{}) if err == nil { @@ -1276,3 +1296,22 @@ func TestRateLimitedResolve(t *testing.T) { t.Errorf("Resolved addresses of target: %q = %+v, want %+v\n", target, state.Addresses, wantAddrs) } } + +func TestReportError(t *testing.T) { + const target = "notfoundaddress" + cc := &testClientConn{target: target, errChan: make(chan error)} + b := NewBuilder() + r, err := b.Build(resolver.Target{Endpoint: target}, cc, resolver.BuildOptions{}) + if err != nil { + t.Fatalf("%v\n", err) + } + defer r.Close() + select { + case err := <-cc.errChan: + if !strings.Contains(err.Error(), "hostLookup error") { + t.Fatalf(`ReportError(err=%v) called; want err contains "hostLookupError"`, err) + } + case <-time.After(time.Second): + t.Fatalf("did not receive error after 1s") + } +}