RA: fix suberror identifier bug in CAA recheck. (#4259)

In the RA's recheckCAA function we loop through a list of *core.Authorizations, dispatching each to a Go routine that checks CAA for the authz and writes an error to a results channel.

Later, we iterate the same *core.Authorization list and read errors from the channel. If we get a non-nil error, then the current iteration's *core.Authorization is used as the identifier for the suberror created with the non-nil error.

This is a flawed approach and relies on the scheduling of recheck goroutines matching the iteration of the authorizations. When the goroutines write error results to the channel in an order that doesn't match the loop over the authorizations the RA will construct a suberror with the wrong identifier. This manifests as making the TestRecheckCAAFail unit test appear flaky, because it specifically checks the expected identifiers in the returned subproblems.

The fix involves writing both the checked authorization and the error result to the results channel. Later instead of iterating the authorizations we just read the correct number of results from the channel and use the attached authorization from the result when constructing a suberror.

Resolves #4248

Take away lessons:

Write unit tests and always verify expected values!
Always investigate flaky unit tests! Sometimes there's a real bug and not just a subpar test :-)
This commit is contained in:
Daniel McCarney 2019-06-12 12:13:47 -04:00 committed by Roland Bracewell Shoemaker
parent 098a761c02
commit daf311f41b
2 changed files with 31 additions and 13 deletions

View File

@ -854,7 +854,12 @@ func (ra *RegistrationAuthorityImpl) checkAuthorizationsCAA(
func (ra *RegistrationAuthorityImpl) recheckCAA(ctx context.Context, authzs []*core.Authorization) error {
ra.stats.Inc("recheck_caa", 1)
ra.stats.Inc("recheck_caa_authzs", int64(len(authzs)))
ch := make(chan error, len(authzs))
type authzCAAResult struct {
authz *core.Authorization
err error
}
ch := make(chan authzCAAResult, len(authzs))
for _, authz := range authzs {
go func(authz *core.Authorization) {
name := authz.Identifier.Value
@ -870,10 +875,12 @@ func (ra *RegistrationAuthorityImpl) recheckCAA(ctx context.Context, authzs []*c
}
}
if method == "" {
ch <- berrors.InternalServerError(
"Internal error determining validation method for authorization ID %v (%v)",
authz.ID, name,
)
ch <- authzCAAResult{
authz: authz,
err: berrors.InternalServerError(
"Internal error determining validation method for authorization ID %v (%v)",
authz.ID, name),
}
return
}
@ -891,16 +898,22 @@ func (ra *RegistrationAuthorityImpl) recheckCAA(ctx context.Context, authzs []*c
} else if resp.Problem != nil {
err = berrors.CAAError(*resp.Problem.Detail)
}
ch <- err
ch <- authzCAAResult{
authz: authz,
err: err,
}
}(authz)
}
var subErrors []berrors.SubBoulderError
for _, authz := range authzs {
err := <-ch
if err != nil {
// Read a recheckResult for each authz from the results channel
for i := 0; i < len(authzs); i++ {
recheckResult := <-ch
// If the result had a CAA boulder error, construct a suberror with the
// identifier from the authorization that was checked.
if err := recheckResult.err; err != nil {
if bErr, _ := err.(*berrors.BoulderError); berrors.Is(err, berrors.CAA) {
subErrors = append(subErrors, berrors.SubBoulderError{
Identifier: authz.Identifier,
Identifier: recheckResult.authz.Identifier,
BoulderError: bErr})
} else {
return err

View File

@ -17,6 +17,7 @@ import (
"net"
"net/url"
"os"
"regexp"
"sort"
"strconv"
"strings"
@ -2078,9 +2079,13 @@ func TestRecheckCAAFail(t *testing.T) {
// There should be two sub errors
test.AssertEquals(t, len(berr.SubErrors), 2)
// The top level error should reference the first failing identifier
if !strings.Contains(berr.Detail, `Rechecking CAA for "a.com" and 1 more identifiers`) {
t.Errorf("expected error to ref first failed identiifer, got %q", err)
// We don't know whether the asynchronous a.com or c.com CAA recheck will fail
// first. Whichever does will be mentioned in the top level problem detail.
expectedDetailRegex := regexp.MustCompile(
`Rechecking CAA for "(?:a\.com|c\.com)" and 1 more identifiers failed. Refer to sub-problems for more information`,
)
if !expectedDetailRegex.MatchString(berr.Detail) {
t.Errorf("expected suberror detail to match expected regex, got %q", err)
}
// There should be a sub error for both a.com and c.com with the correct type