Deprecate EnforceMultiVA and MultiVAFullResults feature flags (#7520)

These flags have been true and false, respectively, for years. We do not
expect to change them at any time in the future, and their continued
existence makes certain parts of the VA code significantly more complex.

Remove all references to them, preserving behavior in the "enforce, but
not full results" configuration.

IN-10358 tracks the corresponding config changes
This commit is contained in:
Aaron Gable 2024-06-04 11:57:03 -07:00 committed by GitHub
parent f84050a20e
commit c3c278a1a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 99 additions and 299 deletions

View File

@ -37,19 +37,12 @@ and
[`test/config-next/remoteva-b.json`](https://github.com/letsencrypt/boulder/blob/5c27eadb1db0605f380e41c8bd444a7f4ffe3c08/test/config-next/remoteva-b.json)
as their config files.
There are two feature flags that control whether multi-VA takes effect:
MultiVAFullResults and EnforceMultiVA. If MultiVAFullResults is enabled
then each primary validation will also send out remote validation requests, and
wait for all the results to come in, so we can log the results for analysis. If
EnforceMultiVA is enabled, we require that almost all remote validation requests
succeed. The primary VA's "maxRemoteValidationFailures" config field specifies
how many remote VAs can fail before the primary VA considers overall validation
a failure. It should be strictly less than the number of remote VAs.
Validation is also controlled by the "multiVAPolicyFile" config field on the
primary VA. This specifies a file that can contain temporary overrides for
domains or accounts that fail under multi-va. Over time those temporary
overrides will be removed.
We require that almost all remote validation requests succeed; the exact number
is controlled by the VA's `maxRemoteFailures` config variable. If the number of
failing remote VAs exceeds that threshold, validation is terminated. If the
number of successful remote VAs is high enough that it would be impossible for
the outstanding remote VAs to exceed that threshold, validation immediately
succeeds.
There are some integration tests that test this end to end. The most relevant is
probably

View File

@ -20,13 +20,8 @@ type Config struct {
CAAAfterValidation bool
AllowNoCommonName bool
SHA256SubjectKeyIdentifier bool
// EnforceMultiVA causes the VA to block on remote VA PerformValidation
// requests in order to make a valid/invalid decision with the results.
EnforceMultiVA bool
// MultiVAFullResults will cause the main VA to wait for all of the remote VA
// results, not just the threshold required to make a decision.
MultiVAFullResults bool
EnforceMultiVA bool
MultiVAFullResults bool
// ECDSAForAll enables all accounts, regardless of their presence in the CA's
// ecdsaAllowedAccounts config value, to get issuance from ECDSA issuers.

View File

@ -38,8 +38,6 @@
}
},
"features": {
"EnforceMultiVA": true,
"MultiVAFullResults": true,
"EnforceMultiCAA": true,
"MultiCAAFullResults": true,
"DOH": true

View File

@ -38,10 +38,7 @@
}
}
},
"features": {
"EnforceMultiVA": true,
"MultiVAFullResults": true
},
"features": {},
"remoteVAs": [
{
"serverAddress": "rva1.service.consul:9397",

View File

@ -212,7 +212,7 @@ func (va *ValidationAuthorityImpl) processRemoteCAAResults(
// If we are using `features.MultiCAAFullResults` then we haven't returned
// early and can now log the differential between what the primary VA saw and
// what all of the remote VAs saw.
va.logRemoteDifferentials(
va.logRemoteResults(
domain,
acctID,
challengeType,

186
va/va.go
View File

@ -23,7 +23,6 @@ import (
"github.com/letsencrypt/boulder/canceled"
"github.com/letsencrypt/boulder/core"
berrors "github.com/letsencrypt/boulder/errors"
"github.com/letsencrypt/boulder/features"
bgrpc "github.com/letsencrypt/boulder/grpc"
"github.com/letsencrypt/boulder/identifier"
blog "github.com/letsencrypt/boulder/log"
@ -82,21 +81,20 @@ type RemoteVA struct {
}
type vaMetrics struct {
validationTime *prometheus.HistogramVec
localValidationTime *prometheus.HistogramVec
remoteValidationTime *prometheus.HistogramVec
remoteValidationFailures prometheus.Counter
prospectiveRemoteValidationFailures prometheus.Counter
caaCheckTime *prometheus.HistogramVec
localCAACheckTime *prometheus.HistogramVec
remoteCAACheckTime *prometheus.HistogramVec
remoteCAACheckFailures prometheus.Counter
prospectiveRemoteCAACheckFailures prometheus.Counter
tlsALPNOIDCounter *prometheus.CounterVec
http01Fallbacks prometheus.Counter
http01Redirects prometheus.Counter
caaCounter *prometheus.CounterVec
ipv4FallbackCounter prometheus.Counter
validationTime *prometheus.HistogramVec
localValidationTime *prometheus.HistogramVec
remoteValidationTime *prometheus.HistogramVec
remoteValidationFailures prometheus.Counter
caaCheckTime *prometheus.HistogramVec
localCAACheckTime *prometheus.HistogramVec
remoteCAACheckTime *prometheus.HistogramVec
remoteCAACheckFailures prometheus.Counter
prospectiveRemoteCAACheckFailures prometheus.Counter
tlsALPNOIDCounter *prometheus.CounterVec
http01Fallbacks prometheus.Counter
http01Redirects prometheus.Counter
caaCounter *prometheus.CounterVec
ipv4FallbackCounter prometheus.Counter
}
func initMetrics(stats prometheus.Registerer) *vaMetrics {
@ -130,12 +128,6 @@ func initMetrics(stats prometheus.Registerer) *vaMetrics {
Help: "Number of validations failed due to remote VAs returning failure when consensus is enforced",
})
stats.MustRegister(remoteValidationFailures)
prospectiveRemoteValidationFailures := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prospective_remote_validation_failures",
Help: "Number of validations that would have failed due to remote VAs returning failure if consesus were enforced",
})
stats.MustRegister(prospectiveRemoteValidationFailures)
caaCheckTime := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "caa_check_time",
@ -204,21 +196,20 @@ func initMetrics(stats prometheus.Registerer) *vaMetrics {
stats.MustRegister(ipv4FallbackCounter)
return &vaMetrics{
validationTime: validationTime,
remoteValidationTime: remoteValidationTime,
localValidationTime: localValidationTime,
remoteValidationFailures: remoteValidationFailures,
prospectiveRemoteValidationFailures: prospectiveRemoteValidationFailures,
caaCheckTime: caaCheckTime,
localCAACheckTime: localCAACheckTime,
remoteCAACheckTime: remoteCAACheckTime,
remoteCAACheckFailures: remoteCAACheckFailures,
prospectiveRemoteCAACheckFailures: prospectiveRemoteCAACheckFailures,
tlsALPNOIDCounter: tlsALPNOIDCounter,
http01Fallbacks: http01Fallbacks,
http01Redirects: http01Redirects,
caaCounter: caaCounter,
ipv4FallbackCounter: ipv4FallbackCounter,
validationTime: validationTime,
remoteValidationTime: remoteValidationTime,
localValidationTime: localValidationTime,
remoteValidationFailures: remoteValidationFailures,
caaCheckTime: caaCheckTime,
localCAACheckTime: localCAACheckTime,
remoteCAACheckTime: remoteCAACheckTime,
remoteCAACheckFailures: remoteCAACheckFailures,
prospectiveRemoteCAACheckFailures: prospectiveRemoteCAACheckFailures,
tlsALPNOIDCounter: tlsALPNOIDCounter,
http01Fallbacks: http01Fallbacks,
http01Redirects: http01Redirects,
caaCounter: caaCounter,
ipv4FallbackCounter: ipv4FallbackCounter,
}
}
@ -528,23 +519,9 @@ func (va *ValidationAuthorityImpl) performRemoteValidation(
// processRemoteValidationResults evaluates a primary VA result, and a channel
// of remote VA problems to produce a single overall validation result based on
// configured feature flags. The overall result is calculated based on the VA's
// configured `maxRemoteFailures` value.
//
// If the `MultiVAFullResults` feature is enabled then
// `processRemoteValidationResults` will expect to read a result from the
// `remoteErrors` channel for each VA and will not produce an overall result
// until all remote VAs have responded. In this case `logRemoteDifferentials`
// will also be called to describe the differential between the primary and all
// of the remote VAs.
//
// If the `MultiVAFullResults` feature flag is not enabled then
// `processRemoteValidationResults` will potentially return before all remote
// VAs have had a chance to respond. This happens if the success or failure
// threshold is met. This doesn't allow for logging the differential between the
// primary and remote VAs but is more performant.
// configured `maxRemoteFailures` value, and the function returns as soon as
// that threshold has been exceeded or cannot possibly be exceeded.
func (va *ValidationAuthorityImpl) processRemoteValidationResults(
domain string,
acctID int64,
challengeType string,
remoteResultsChan <-chan *remoteVAResult) *probs.ProblemDetails {
@ -575,60 +552,36 @@ func (va *ValidationAuthorityImpl) processRemoteValidationResults(
} else {
bad++
}
// Store the first non-nil problem to return later (if `MultiVAFullResults`
// is enabled).
// Store the first non-nil problem to return later.
if firstProb == nil && result.Problem != nil {
firstProb = result.Problem
}
// If MultiVAFullResults isn't enabled then return early whenever the
// success or failure threshold is met.
if !features.Get().MultiVAFullResults {
if good >= required {
state = "success"
return nil
} else if bad > va.maxRemoteFailures {
modifiedProblem := *result.Problem
modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail
return &modifiedProblem
}
// Return as soon as we have enough successes or failures for a definitive result.
if good >= required {
state = "success"
return nil
} else if bad > va.maxRemoteFailures {
modifiedProblem := *result.Problem
modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail
return &modifiedProblem
}
// If we haven't returned early because of MultiVAFullResults being enabled
// we need to break the loop once all of the VAs have returned a result.
// If we somehow haven't returned early, we need to break the loop once all
// of the VAs have returned a result.
if len(remoteResults) == len(va.remoteVAs) {
break
}
}
// If we are using `features.MultiVAFullResults` then we haven't returned
// early and can now log the differential between what the primary VA saw and
// what all of the remote VAs saw.
va.logRemoteDifferentials(
domain,
acctID,
challengeType,
remoteResults)
// Based on the threshold of good/bad return nil or a problem.
if good >= required {
state = "success"
return nil
} else if bad > va.maxRemoteFailures {
modifiedProblem := *firstProb
modifiedProblem.Detail = "During secondary validation: " + firstProb.Detail
va.metrics.prospectiveRemoteValidationFailures.Inc()
return &modifiedProblem
}
// This condition should not occur - it indicates the good/bad counts didn't
// meet either the required threshold or the maxRemoteFailures threshold.
return probs.ServerInternal("Too few remote PerformValidation RPC results")
}
// logRemoteDifferentials is called by `processRemoteValidationResults` when the
// `MultiVAFullResults` feature flag is enabled and `processRemoteCAAResults`
// logRemoteResults is called by `processRemoteCAAResults` when the
// `MultiCAAFullResults` feature flag is enabled. It produces a JSON log line
// that contains the primary VA result and the results each remote VA returned.
func (va *ValidationAuthorityImpl) logRemoteDifferentials(
// that contains the results each remote VA returned.
func (va *ValidationAuthorityImpl) logRemoteResults(
domain string,
acctID int64,
challengeType string,
@ -726,41 +679,24 @@ func (va *ValidationAuthorityImpl) PerformValidation(ctx context.Context, req *v
logEvent.Error = prob.Error()
logEvent.InternalError = err.Error()
} else if remoteResults != nil {
if !features.Get().EnforceMultiVA && features.Get().MultiVAFullResults {
go func() {
_ = va.processRemoteValidationResults(
req.Domain,
req.Authz.RegID,
string(challenge.Type),
remoteResults)
}()
// Since prob was nil and we're not enforcing the results from
// `processRemoteValidationResults` set the challenge status to
// valid so the validationTime metrics increment has the correct
// result label.
challenge.Status = core.StatusValid
} else if features.Get().EnforceMultiVA {
remoteProb := va.processRemoteValidationResults(
req.Domain,
req.Authz.RegID,
string(challenge.Type),
remoteResults)
remoteProb := va.processRemoteValidationResults(
string(challenge.Type),
remoteResults)
// If the remote result was a non-nil problem then fail the validation
if remoteProb != nil {
prob = remoteProb
challenge.Status = core.StatusInvalid
challenge.Error = remoteProb
// We only set .Error here, not .InternalError, because the
// remote VA doesn't send us the internal error. But that's ok,
// it got logged at the remote VA.
logEvent.Error = remoteProb.Error()
va.log.Infof("Validation failed due to remote failures: identifier=%v err=%s",
req.Domain, remoteProb)
va.metrics.remoteValidationFailures.Inc()
} else {
challenge.Status = core.StatusValid
}
// If the remote result was a non-nil problem then fail the validation
if remoteProb != nil {
prob = remoteProb
challenge.Status = core.StatusInvalid
challenge.Error = remoteProb
// We only set .Error here, not .InternalError, because the
// remote VA doesn't send us the internal error. But that's ok,
// it got logged at the remote VA.
logEvent.Error = remoteProb.Error()
va.log.Infof("Validation failed due to remote failures: identifier=%v err=%s",
req.Domain, remoteProb)
va.metrics.remoteValidationFailures.Inc()
} else {
challenge.Status = core.StatusValid
}
} else {
challenge.Status = core.StatusValid

View File

@ -395,20 +395,6 @@ func TestMultiVA(t *testing.T) {
VAClient: cancelledVA{},
CAAClient: cancelledVA{},
}
enforceMultiVA := features.Config{
EnforceMultiVA: true,
}
enforceMultiVAFullResults := features.Config{
EnforceMultiVA: true,
MultiVAFullResults: true,
}
noEnforceMultiVA := features.Config{
EnforceMultiVA: false,
}
noEnforceMultiVAFullResults := features.Config{
EnforceMultiVA: false,
MultiVAFullResults: true,
}
unauthorized := probs.Unauthorized(fmt.Sprintf(
`The key authorization file from the server did not match this challenge. Expected %q (got "???")`,
@ -420,124 +406,68 @@ func TestMultiVA(t *testing.T) {
Name string
RemoteVAs []RemoteVA
AllowedUAs map[string]bool
Features features.Config
ExpectedProb *probs.ProblemDetails
ExpectedLog string
}{
{
// With local and both remote VAs working there should be no problem.
Name: "Local and remote VAs OK, enforce multi VA",
Name: "Local and remote VAs OK",
RemoteVAs: remoteVAs,
AllowedUAs: allowedUAs,
Features: enforceMultiVA,
},
{
// Ditto if multi VA enforcement is disabled
Name: "Local and remote VAs OK, no enforce multi VA",
RemoteVAs: remoteVAs,
AllowedUAs: allowedUAs,
Features: noEnforceMultiVA,
},
{
// If the local VA fails everything should fail
Name: "Local VA bad, remote VAs OK, no enforce multi VA",
Name: "Local VA bad, remote VAs OK",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{remoteUA1: true, remoteUA2: true},
Features: noEnforceMultiVA,
ExpectedProb: unauthorized,
},
{
// Ditto when enforcing remote VA
Name: "Local VA bad, remote VAs OK, enforce multi VA",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{remoteUA1: true, remoteUA2: true},
Features: enforceMultiVA,
ExpectedProb: unauthorized,
},
{
// If a remote VA fails with an internal err it should fail when enforcing multi VA
Name: "Local VA ok, remote VA internal err, enforce multi VA",
// If a remote VA fails with an internal err it should fail
Name: "Local VA ok, remote VA internal err",
RemoteVAs: []RemoteVA{
{remoteVA1, remoteUA1},
{brokenVA, "broken"},
},
AllowedUAs: allowedUAs,
Features: enforceMultiVA,
ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC failed"),
// The real failure cause should be logged
ExpectedLog: expectedInternalErrLine,
},
{
// If a remote VA fails with an internal err it should not fail when not
// enforcing multi VA
Name: "Local VA ok, remote VA internal err, no enforce multi VA",
RemoteVAs: []RemoteVA{
{remoteVA1, remoteUA1},
{brokenVA, "broken"},
},
AllowedUAs: allowedUAs,
Features: noEnforceMultiVA,
// Like above, the real failure cause will be logged eventually, but that
// will happen asynchronously. It's not guaranteed to happen before the
// test case exits, so we don't check for it here.
},
{
// With only one working remote VA there should *not* be a validation
// failure when not enforcing multi VA.
Name: "Local VA and one remote VA OK, no enforce multi VA",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{localUA: true, remoteUA2: true},
Features: noEnforceMultiVA,
},
{
// With only one working remote VA there should be a validation failure
// when enforcing multi VA.
Name: "Local VA and one remote VA OK, enforce multi VA",
Name: "Local VA and one remote VA OK",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{localUA: true, remoteUA2: true},
Features: enforceMultiVA,
ExpectedProb: probs.Unauthorized(fmt.Sprintf(
`During secondary validation: The key authorization file from the server did not match this challenge. Expected %q (got "???")`,
expectedKeyAuthorization)),
},
{
// When enforcing multi-VA, any cancellations are a problem.
Name: "Local VA and one remote VA OK, one cancelled VA, enforce multi VA",
// Any remote VA cancellations are a problem.
Name: "Local VA and one remote VA OK, one cancelled VA",
RemoteVAs: []RemoteVA{
{remoteVA1, remoteUA1},
{cancelledVA, remoteUA2},
},
AllowedUAs: allowedUAs,
Features: enforceMultiVA,
ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC canceled"),
},
{
// When enforcing multi-VA, any cancellations are a problem.
Name: "Local VA OK, two cancelled remote VAs, enforce multi VA",
// Any remote VA cancellations are a problem.
Name: "Local VA OK, two cancelled remote VAs",
RemoteVAs: []RemoteVA{
{cancelledVA, remoteUA1},
{cancelledVA, remoteUA2},
},
AllowedUAs: allowedUAs,
Features: enforceMultiVA,
ExpectedProb: probs.ServerInternal("During secondary validation: Remote PerformValidation RPC canceled"),
},
{
// With the local and remote VAs seeing diff problems and the full results
// feature flag on but multi VA enforcement off we expect
// no problem.
Name: "Local and remote VA differential, full results, no enforce multi VA",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{localUA: true},
Features: noEnforceMultiVAFullResults,
},
{
// With the local and remote VAs seeing diff problems and the full results
// feature flag on and multi VA enforcement on we expect a problem.
// With the local and remote VAs seeing diff problems, we expect a problem.
Name: "Local and remote VA differential, full results, enforce multi VA",
RemoteVAs: remoteVAs,
AllowedUAs: map[string]bool{localUA: true},
Features: enforceMultiVAFullResults,
ExpectedProb: probs.Unauthorized(fmt.Sprintf(
`During secondary validation: The key authorization file from the server did not match this challenge. Expected %q (got "???")`,
expectedKeyAuthorization)),
@ -552,9 +482,6 @@ func TestMultiVA(t *testing.T) {
// Configure a primary VA with testcase remote VAs.
localVA, mockLog := setup(ms.Server, 0, localUA, tc.RemoteVAs, nil)
features.Set(tc.Features)
defer features.Reset()
// Perform all validations
res, _ := localVA.PerformValidation(ctx, req)
if res.Problems == nil && tc.ExpectedProb != nil {
@ -601,66 +528,28 @@ func TestMultiVAEarlyReturn(t *testing.T) {
}
// Create a local test VA with the two remote VAs
localVA, mockLog := setup(ms.Server, 0, localUA, remoteVAs, nil)
testCases := []struct {
Name string
EarlyReturn bool
}{
{
Name: "One slow remote VA, no early return",
},
{
Name: "One slow remote VA, early return",
EarlyReturn: true,
},
}
earlyReturnFeatures := features.Config{
EnforceMultiVA: true,
MultiVAFullResults: false,
}
noEarlyReturnFeatures := features.Config{
EnforceMultiVA: true,
MultiVAFullResults: true,
}
localVA, _ := setup(ms.Server, 0, localUA, remoteVAs, nil)
// Perform all validations
start := time.Now()
req := createValidationRequest("localhost", core.ChallengeTypeHTTP01)
for _, tc := range testCases {
t.Run(tc.Name, func(t *testing.T) {
mockLog.Clear()
res, _ := localVA.PerformValidation(ctx, req)
var err error
if tc.EarlyReturn {
features.Set(earlyReturnFeatures)
} else {
features.Set(noEarlyReturnFeatures)
}
test.AssertNotError(t, err, "Failed to set MultiVAFullResults feature flag")
defer features.Reset()
// It should always fail
if res.Problems == nil {
t.Error("expected prob from PerformValidation, got nil")
}
start := time.Now()
elapsed := time.Since(start).Round(time.Millisecond).Milliseconds()
// Perform all validations
res, _ := localVA.PerformValidation(ctx, req)
// It should always fail
if res.Problems == nil {
t.Error("expected prob from PerformValidation, got nil")
}
elapsed := time.Since(start).Round(time.Millisecond).Milliseconds()
// The slow UA should sleep for `slowRemoteSleepMillis`. In the early return
// case the first remote VA should fail the overall validation and a prob
// should be returned quickly (i.e. in less than half of `slowRemoteSleepMillis`).
// In the non-early return case we don't expect a problem until
// `slowRemoteSleepMillis`.
if tc.EarlyReturn && elapsed > slowRemoteSleepMillis/2 {
t.Errorf(
"Expected an early return from PerformValidation in < %d ms, took %d ms",
slowRemoteSleepMillis/2, elapsed)
}
})
// The slow UA should sleep for `slowRemoteSleepMillis`. But the first remote
// VA should fail quickly and the early-return code should cause the overall
// overall validation to return a prob quickly (i.e. in less than half of
// `slowRemoteSleepMillis`).
if elapsed > slowRemoteSleepMillis/2 {
t.Errorf(
"Expected an early return from PerformValidation in < %d ms, took %d ms",
slowRemoteSleepMillis/2, elapsed)
}
}
@ -691,14 +580,6 @@ func TestMultiVAPolicy(t *testing.T) {
// Create a local test VA with the two remote VAs
localVA, _ := setup(ms.Server, 0, localUA, remoteVAs, nil)
// Ensure multi VA enforcement is enabled, don't wait for full multi VA
// results.
features.Set(features.Config{
EnforceMultiVA: true,
MultiVAFullResults: false,
})
defer features.Reset()
// Perform validation for a domain not in the disabledDomains list
req := createValidationRequest("letsencrypt.org", core.ChallengeTypeHTTP01)
res, _ := localVA.PerformValidation(ctx, req)
@ -814,7 +695,7 @@ func TestLogRemoteDifferentials(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
mockLog.Clear()
localVA.logRemoteDifferentials(
localVA.logRemoteResults(
"example.com", 1999, "blorpus-01", tc.remoteProbs)
lines := mockLog.GetAllMatching("remoteVADifferentials JSON=.*")