boulder/cdr/resolver.go

248 lines
7.5 KiB
Go

package cdr
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"sort"
"strconv"
"time"
"github.com/miekg/dns"
"golang.org/x/net/context"
"golang.org/x/net/context/ctxhttp"
"github.com/letsencrypt/boulder/core"
blog "github.com/letsencrypt/boulder/log"
"github.com/letsencrypt/boulder/metrics"
)
// We have found a number of network operators which block or drop CAA
// queries that pass through their network which leads to consistent
// timeout failures from certain network perspectives. We have been
// unable to find a network solution to this so we are required to
// implement a multi-path resolution technique. This is a real hack and
// to be honest probably not the best solution to this problem. Ideally
// we would control our own distributed multi-path resolver but there
// are no publicly available ones.
//
// This implementation talks to the Google Public DNS resolver over
// multiple paths using HTTP proxies with geographically distributed
// endpoints. In case the Google resolver encounters the same issues we do
// multiple queries for the same name in parallel and we require a M of N
// quorum of responses to return the SUCCESS return code. In order to prevent
// the case where an attacker may be able to exploit the Google resolver in
// some way we also require that the records returned from all requests are
// the same (as far as I can tell the Google DNS implementation doesn't share
// cache state between the distributed nodes so this should be safe).
//
// Since DNS isn't a super secure protocol and Google has recently introduced
// a public HTTPS API for their DNS resolver we use that instead.
//
// API reference:
// https://developers.google.com/speed/public-dns/docs/dns-over-https#api_specification
var apiURI = "https://dns.google.com/resolve"
func parseAnswer(as []core.GPDNSAnswer) ([]*dns.CAA, error) {
rrs := []*dns.CAA{}
// only bother parsing out CAA records
for _, a := range as {
if a.Type != dns.TypeCAA {
continue
}
rr, err := dns.NewRR(fmt.Sprintf("%s %d IN CAA %s", a.Name, a.TTL, a.Data))
if err != nil {
return nil, err
}
if caaRR, ok := rr.(*dns.CAA); ok {
rrs = append(rrs, caaRR)
}
}
return rrs, nil
}
func createClient(proxy string) (*http.Client, string, error) {
u, err := url.Parse(proxy)
if err != nil {
return nil, "", err
}
transport := &http.Transport{
Proxy: http.ProxyURL(u),
TLSHandshakeTimeout: 10 * time.Second, // Same as http.DefaultTransport, doesn't override context
}
return &http.Client{
Transport: transport,
}, u.Host, nil
}
// CAADistributedResolver holds state needed to talk to GPDNS
type CAADistributedResolver struct {
URI string
Clients map[string]*http.Client
stats metrics.Scope
maxFailures int
timeout time.Duration
logger blog.Logger
}
// New returns an initialized CAADistributedResolver which requires a M of N
// quorum to succeed where N = len(proxies) and M = N - maxFailures
func New(scope metrics.Scope, timeout time.Duration, maxFailures int, proxies []string, logger blog.Logger) (*CAADistributedResolver, error) {
cdr := &CAADistributedResolver{
Clients: make(map[string]*http.Client, len(proxies)),
URI: apiURI,
stats: scope,
maxFailures: maxFailures,
timeout: timeout,
logger: logger,
}
for _, p := range proxies {
c, h, err := createClient(p)
if err != nil {
return nil, err
}
cdr.Clients[h] = c
}
return cdr, nil
}
// queryCAA sends the query request to the GPD API. If the return code is
// dns.RcodeSuccess the 'Answer' section is parsed for CAA records, otherwise
// an error is returned. Unlike bdns.DNSResolver.LookupCAA it will not repeat
// failed queries if the context has not expired as we expect to be running
// multiple queries in parallel and only need a M of N quorum (we also expect
// GPD to have quite good availability)
func (cdr *CAADistributedResolver) queryCAA(ctx context.Context, url string, ic *http.Client) ([]*dns.CAA, error) {
apiResp, err := ctxhttp.Get(ctx, ic, url)
if err != nil {
return nil, err
}
defer func() {
_ = apiResp.Body.Close()
}()
body, err := ioutil.ReadAll(&io.LimitedReader{R: apiResp.Body, N: 1024})
if err != nil {
return nil, err
}
if apiResp.StatusCode != http.StatusOK {
if string(body) != "" {
return nil, fmt.Errorf("Unexpected HTTP status code %d, body: %s", apiResp.StatusCode, body)
}
return nil, fmt.Errorf("Unexpected HTTP status code %d", apiResp.StatusCode)
}
var respObj core.GPDNSResponse
err = json.Unmarshal(body, &respObj)
if err != nil {
return nil, err
}
if respObj.Status != dns.RcodeSuccess {
if respObj.Comment != "" {
return nil, fmt.Errorf("Query failed with %s: %s", dns.RcodeToString[respObj.Status], respObj.Comment)
}
return nil, fmt.Errorf("Query failed wtih %s", dns.RcodeToString[respObj.Status])
}
return parseAnswer(respObj.Answer)
}
type queryResult struct {
records []*dns.CAA
err error
}
type caaSet []*dns.CAA
func (cs caaSet) Len() int { return len(cs) }
func (cs caaSet) Less(i, j int) bool { return cs[i].Value < cs[j].Value } // sort by value...?
func (cs caaSet) Swap(i, j int) { cs[i], cs[j] = cs[j], cs[i] }
func marshalCanonicalCAASet(set []*dns.CAA) ([]byte, error) {
var err error
offset, size := 0, 0
sortedSet := caaSet(set)
sort.Sort(sortedSet)
for _, rr := range sortedSet {
size += dns.Len(rr)
}
tbh := make([]byte, size)
for _, rr := range sortedSet {
ttl := rr.Hdr.Ttl
rr.Hdr.Ttl = 0 // only variable that should jitter
offset, err = dns.PackRR(rr, tbh, offset, nil, false)
if err != nil {
return nil, err
}
rr.Hdr.Ttl = ttl
}
return tbh, nil
}
// LookupCAA performs a multipath CAA DNS lookup using GPDNS
func (cdr *CAADistributedResolver) LookupCAA(ctx context.Context, domain string) ([]*dns.CAA, error) {
query := make(url.Values)
query.Add("name", domain)
query.Add("type", strconv.Itoa(int(dns.TypeCAA)))
uri, err := url.Parse(cdr.URI)
if err != nil {
return nil, err
}
uri.RawQuery = query.Encode()
uriStr := uri.String()
// min of ctx deadline and time.Now().Add(cdr.timeout)
caaCtx, cancel := context.WithTimeout(ctx, cdr.timeout)
defer cancel()
results := make(chan queryResult, len(cdr.Clients))
for addr, interfaceClient := range cdr.Clients {
go func(ic *http.Client, ia string) {
started := time.Now()
records, err := cdr.queryCAA(caaCtx, uriStr, ic)
cdr.stats.TimingDuration(fmt.Sprintf("CDR.GPDNS.Latency.%s", ia), time.Since(started))
if err != nil {
cdr.stats.Inc(fmt.Sprintf("CDR.GPDNS.Failures.%s", ia), 1)
cdr.logger.AuditErr(fmt.Sprintf("queryCAA failed [via %s]: %s", ia, err))
}
results <- queryResult{records, err}
}(interfaceClient, addr)
}
// collect everything
failed := 0
var CAAs []*dns.CAA
var canonicalSet []byte
for i := 0; i < len(cdr.Clients); i++ {
r := <-results
if r.err != nil {
failed++
if failed > cdr.maxFailures {
cdr.stats.Inc("CDR.QuorumFailed", 1)
cdr.logger.AuditErr(fmt.Sprintf("%d out of %d CAA queries failed", len(cdr.Clients), failed))
return nil, r.err
}
}
if CAAs == nil {
CAAs = r.records
canonicalSet, err = marshalCanonicalCAASet(CAAs)
if err != nil {
return nil, err
}
} else {
thisSet, err := marshalCanonicalCAASet(r.records)
if err != nil {
return nil, err
}
if len(r.records) != len(CAAs) || !bytes.Equal(thisSet, canonicalSet) {
cdr.stats.Inc("CDR.MismatchedSet", 1)
return nil, errors.New("mismatching CAA record sets were returned")
}
}
}
cdr.stats.Inc("CDR.Quorum", 1)
return CAAs, nil
}