Remote VA logging and stats (#3063)
Add a logging statement that fires when a remote VA fail causes overall failure. Also change remoteValidationFailures into a counter that counts the same thing, instead of a histogram. Since the histogram had the default bucket sizes, it failed to collect what we needed, and produced more metrics than necessary.
This commit is contained in:
parent
3b6a5ff63c
commit
568407e5b8
|
@ -601,7 +601,7 @@ func TestRetry(t *testing.T) {
|
|||
t.Errorf("#%d, error, expectedCount %v, got %v", i, tc.expectedCount, tc.te.count)
|
||||
}
|
||||
if tc.metricsAllRetries > 0 {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"qtype",
|
||||
"TXT",
|
||||
dr.usedAllRetriesCounter), tc.metricsAllRetries)
|
||||
|
@ -635,7 +635,7 @@ func TestRetry(t *testing.T) {
|
|||
t.Errorf("expected %s, got %s", context.DeadlineExceeded, err)
|
||||
}
|
||||
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"qtype",
|
||||
"TXT",
|
||||
dr.cancelCounter), 3)
|
||||
|
|
|
@ -766,8 +766,8 @@ func countMustStaple(t *testing.T, cert *x509.Certificate) (count int) {
|
|||
func issueCertificateSubTestMustStapleWhenDisabled(t *testing.T, i *TestCertificateIssuance) {
|
||||
// With ca.enableMustStaple = false, should issue successfully and not add
|
||||
// Must Staple.
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
|
||||
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
|
||||
test.AssertEquals(t, countMustStaple(t, i.cert), 0)
|
||||
}
|
||||
|
@ -776,20 +776,20 @@ func issueCertificateSubTestMustStapleWhenEnabled(t *testing.T, i *TestCertifica
|
|||
// With ca.enableMustStaple = true, a TLS feature extension should put a must-staple
|
||||
// extension into the cert. Even if there are multiple TLS Feature extensions, only
|
||||
// one extension should be included.
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
|
||||
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
|
||||
test.AssertEquals(t, countMustStaple(t, i.cert), 1)
|
||||
}
|
||||
|
||||
func issueCertificateSubTestTLSFeatureUnknown(t *testing.T, ca *CertificateAuthorityImpl, _ *mockSA) {
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, ca.csrExtensionCount), 1)
|
||||
}
|
||||
|
||||
func issueCertificateSubTestUnknownExtension(t *testing.T, i *TestCertificateIssuance) {
|
||||
// Unsupported extensions in the CSR should be silently ignored.
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
|
||||
|
||||
// NOTE: The hard-coded value here will have to change over time as Boulder
|
||||
|
@ -806,7 +806,7 @@ func issueCertificateSubTestCTPoisonExtension(t *testing.T, i *TestCertificateIs
|
|||
// unknown extension, whether it has a valid or invalid value. The check
|
||||
// for whether or not the poison extension is present in the issued
|
||||
// certificate/precertificate is done in the caller.
|
||||
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
|
||||
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
|
||||
}
|
||||
|
||||
|
@ -820,5 +820,5 @@ func findExtension(extensions []pkix.Extension, id asn1.ObjectIdentifier) *pkix.
|
|||
}
|
||||
|
||||
func signatureCountByPurpose(signatureType string, signatureCount *prometheus.CounterVec) int {
|
||||
return test.CountCounter("purpose", signatureType, signatureCount)
|
||||
return test.CountCounterVec("purpose", signatureType, signatureCount)
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_model/go"
|
||||
|
@ -156,11 +157,22 @@ func AssertBetween(t *testing.T, a, b, c int64) {
|
|||
}
|
||||
}
|
||||
|
||||
// CountCounterVecVec returns the count by label and value of a prometheus metric
|
||||
func CountCounterVec(labelName string, value string, counterVec *prometheus.CounterVec) int {
|
||||
return CountCounter(counterVec.With(prometheus.Labels{labelName: value}))
|
||||
}
|
||||
|
||||
// CountCounter returns the count by label and value of a prometheus metric
|
||||
func CountCounter(labelName string, value string, counter *prometheus.CounterVec) int {
|
||||
func CountCounter(counter prometheus.Counter) int {
|
||||
ch := make(chan prometheus.Metric, 10)
|
||||
counter.With(prometheus.Labels{labelName: value}).Collect(ch)
|
||||
m := <-ch
|
||||
counter.Collect(ch)
|
||||
var m prometheus.Metric
|
||||
select {
|
||||
case <-time.After(time.Second):
|
||||
fmt.Println("timed out collecting metrics")
|
||||
return 0
|
||||
case m = <-ch:
|
||||
}
|
||||
var iom io_prometheus_client.Metric
|
||||
_ = m.Write(&iom)
|
||||
return int(iom.Counter.GetValue())
|
||||
|
|
15
va/va.go
15
va/va.go
|
@ -59,7 +59,7 @@ type RemoteVA struct {
|
|||
type vaMetrics struct {
|
||||
validationTime *prometheus.HistogramVec
|
||||
remoteValidationTime *prometheus.HistogramVec
|
||||
remoteValidationFailures *prometheus.HistogramVec
|
||||
remoteValidationFailures prometheus.Counter
|
||||
}
|
||||
|
||||
func initMetrics(stats metrics.Scope) *vaMetrics {
|
||||
|
@ -77,11 +77,11 @@ func initMetrics(stats metrics.Scope) *vaMetrics {
|
|||
},
|
||||
[]string{"type", "result"})
|
||||
stats.MustRegister(remoteValidationTime)
|
||||
remoteValidationFailures := prometheus.NewHistogramVec(
|
||||
prometheus.HistogramOpts{
|
||||
remoteValidationFailures := prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "remote_validation_failures",
|
||||
Help: "Number of remote VAs that failed during challenge validation",
|
||||
}, nil)
|
||||
Help: "Number of validations failed due to remote VAs returning failure",
|
||||
})
|
||||
stats.MustRegister(remoteValidationFailures)
|
||||
|
||||
return &vaMetrics{
|
||||
|
@ -851,7 +851,6 @@ func (va *ValidationAuthorityImpl) performRemoteValidation(ctx context.Context,
|
|||
"type": string(challenge.Type),
|
||||
"result": state,
|
||||
}).Observe(va.clk.Since(s).Seconds())
|
||||
va.metrics.remoteValidationFailures.With(prometheus.Labels{}).Observe(float64(bad))
|
||||
}
|
||||
|
||||
// PerformValidation validates the given challenge. It always returns a list of
|
||||
|
@ -893,6 +892,10 @@ func (va *ValidationAuthorityImpl) PerformValidation(ctx context.Context, domain
|
|||
challenge.Status = core.StatusInvalid
|
||||
challenge.Error = prob
|
||||
logEvent.Error = prob.Error()
|
||||
va.log.Info(fmt.Sprintf(
|
||||
"Validation failed due to remote failures: identifier=%v err=%s",
|
||||
authz.Identifier, prob))
|
||||
va.metrics.remoteValidationFailures.Inc()
|
||||
} else {
|
||||
challenge.Status = core.StatusValid
|
||||
}
|
||||
|
|
|
@ -1305,6 +1305,7 @@ func TestPerformRemoteValidation(t *testing.T) {
|
|||
ms.mu.Lock()
|
||||
delete(ms.allowedUAs, "remote 1")
|
||||
ms.mu.Unlock()
|
||||
mockLog := blog.NewMock()
|
||||
localVA.performRemoteValidation(context.Background(), "localhost", chall, core.Authorization{}, probCh)
|
||||
prob = <-probCh
|
||||
if prob == nil {
|
||||
|
@ -1333,6 +1334,7 @@ func TestPerformRemoteValidation(t *testing.T) {
|
|||
}
|
||||
|
||||
// Local and remote 2 working, should fail
|
||||
localVA.log = mockLog
|
||||
ms.mu.Lock()
|
||||
ms.allowedUAs["local"] = struct{}{}
|
||||
delete(ms.allowedUAs, "remote 1")
|
||||
|
@ -1341,6 +1343,14 @@ func TestPerformRemoteValidation(t *testing.T) {
|
|||
if err == nil {
|
||||
t.Error("PerformValidation didn't fail when one 'remote' validation failed")
|
||||
}
|
||||
failLogs := mockLog.GetAllMatching(`Validation failed due to remote failures`)
|
||||
if len(failLogs) == 0 {
|
||||
t.Error("Expected log line about failure due to remote failures, didn't get it")
|
||||
}
|
||||
remoteFailMetric := test.CountCounter(localVA.metrics.remoteValidationFailures)
|
||||
if remoteFailMetric != 1 {
|
||||
t.Errorf("Expected remote_validation_failures to be incremented, but it wasn't")
|
||||
}
|
||||
|
||||
// Local and remote 2 working with maxRemoteFailures == 1, should succeed
|
||||
localVA, _ = setup(ms.Server, 1)
|
||||
|
|
|
@ -1021,7 +1021,7 @@ func TestIssueCertificate(t *testing.T) {
|
|||
`{"type":"`+probs.V1ErrorNS+`malformed","detail":"CSR generated using a pre-1.0.2 OpenSSL with a client that doesn't properly specify the CSR version. See https://community.letsencrypt.org/t/openssl-bug-information/19591","status":400}`)
|
||||
|
||||
// Test the CSR signature type counter works
|
||||
test.AssertEquals(t, test.CountCounter("type", "SHA256-RSA", wfe.csrSignatureAlgs), 4)
|
||||
test.AssertEquals(t, test.CountCounterVec("type", "SHA256-RSA", wfe.csrSignatureAlgs), 4)
|
||||
}
|
||||
|
||||
func TestGetChallenge(t *testing.T) {
|
||||
|
|
|
@ -414,7 +414,7 @@ func TestValidPOSTRequest(t *testing.T) {
|
|||
test.AssertEquals(t, prob.Type, probs.MalformedProblem)
|
||||
test.AssertEquals(t, prob.HTTPStatus, tc.HTTPStatus)
|
||||
test.AssertEquals(t, prob.Detail, tc.ProblemDetail)
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.httpErrorCount), 1)
|
||||
})
|
||||
}
|
||||
|
@ -512,7 +512,7 @@ func TestEnforceJWSAuthType(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -582,7 +582,7 @@ func TestValidNonce(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -701,7 +701,7 @@ func TestValidPOSTURL(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -857,7 +857,7 @@ func TestParseJWSRequest(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -1047,7 +1047,7 @@ func TestLookupJWK(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -1178,7 +1178,7 @@ func TestValidJWSForKey(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -1274,7 +1274,7 @@ func TestValidPOSTForAccount(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -1379,7 +1379,7 @@ func TestValidSelfAuthenticatedPOST(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
@ -1462,7 +1462,7 @@ func TestMatchJWSURLs(t *testing.T) {
|
|||
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
|
||||
}
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -1911,7 +1911,7 @@ func TestKeyRollover(t *testing.T) {
|
|||
wfe.KeyRollover(ctx, newRequestEvent(), responseWriter, makePostRequestWithPath("key-change", outer))
|
||||
test.AssertUnmarshaledEquals(t, responseWriter.Body.String(), tc.ExpectedResponse)
|
||||
if tc.ErrorStatType != "" {
|
||||
test.AssertEquals(t, test.CountCounter(
|
||||
test.AssertEquals(t, test.CountCounterVec(
|
||||
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
|
||||
}
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue