Remote VA logging and stats (#3063)

Add a logging statement that fires when a remote VA fail causes
overall failure. Also change remoteValidationFailures into a
counter that counts the same thing, instead of a histogram. Since
the histogram had the default bucket sizes, it failed to collect
what we needed, and produced more metrics than necessary.
This commit is contained in:
Jacob Hoffman-Andrews 2017-09-11 12:50:50 -07:00 committed by Roland Bracewell Shoemaker
parent 3b6a5ff63c
commit 568407e5b8
8 changed files with 57 additions and 32 deletions

View File

@ -601,7 +601,7 @@ func TestRetry(t *testing.T) {
t.Errorf("#%d, error, expectedCount %v, got %v", i, tc.expectedCount, tc.te.count)
}
if tc.metricsAllRetries > 0 {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"qtype",
"TXT",
dr.usedAllRetriesCounter), tc.metricsAllRetries)
@ -635,7 +635,7 @@ func TestRetry(t *testing.T) {
t.Errorf("expected %s, got %s", context.DeadlineExceeded, err)
}
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"qtype",
"TXT",
dr.cancelCounter), 3)

View File

@ -766,8 +766,8 @@ func countMustStaple(t *testing.T, cert *x509.Certificate) (count int) {
func issueCertificateSubTestMustStapleWhenDisabled(t *testing.T, i *TestCertificateIssuance) {
// With ca.enableMustStaple = false, should issue successfully and not add
// Must Staple.
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
test.AssertEquals(t, countMustStaple(t, i.cert), 0)
}
@ -776,20 +776,20 @@ func issueCertificateSubTestMustStapleWhenEnabled(t *testing.T, i *TestCertifica
// With ca.enableMustStaple = true, a TLS feature extension should put a must-staple
// extension into the cert. Even if there are multiple TLS Feature extensions, only
// one extension should be included.
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, i.ca.csrExtensionCount), 0)
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
test.AssertEquals(t, countMustStaple(t, i.cert), 1)
}
func issueCertificateSubTestTLSFeatureUnknown(t *testing.T, ca *CertificateAuthorityImpl, _ *mockSA) {
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeature, ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionTLSFeatureInvalid, ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeature, ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionTLSFeatureInvalid, ca.csrExtensionCount), 1)
}
func issueCertificateSubTestUnknownExtension(t *testing.T, i *TestCertificateIssuance) {
// Unsupported extensions in the CSR should be silently ignored.
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
// NOTE: The hard-coded value here will have to change over time as Boulder
@ -806,7 +806,7 @@ func issueCertificateSubTestCTPoisonExtension(t *testing.T, i *TestCertificateIs
// unknown extension, whether it has a valid or invalid value. The check
// for whether or not the poison extension is present in the issued
// certificate/precertificate is done in the caller.
test.AssertEquals(t, test.CountCounter(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, test.CountCounterVec(csrExtensionCategory, csrExtensionOther, i.ca.csrExtensionCount), 1)
test.AssertEquals(t, signatureCountByPurpose("cert", i.ca.signatureCount), 1)
}
@ -820,5 +820,5 @@ func findExtension(extensions []pkix.Extension, id asn1.ObjectIdentifier) *pkix.
}
func signatureCountByPurpose(signatureType string, signatureCount *prometheus.CounterVec) int {
return test.CountCounter("purpose", signatureType, signatureCount)
return test.CountCounterVec("purpose", signatureType, signatureCount)
}

View File

@ -10,6 +10,7 @@ import (
"runtime"
"strings"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_model/go"
@ -156,11 +157,22 @@ func AssertBetween(t *testing.T, a, b, c int64) {
}
}
// CountCounterVecVec returns the count by label and value of a prometheus metric
func CountCounterVec(labelName string, value string, counterVec *prometheus.CounterVec) int {
return CountCounter(counterVec.With(prometheus.Labels{labelName: value}))
}
// CountCounter returns the count by label and value of a prometheus metric
func CountCounter(labelName string, value string, counter *prometheus.CounterVec) int {
func CountCounter(counter prometheus.Counter) int {
ch := make(chan prometheus.Metric, 10)
counter.With(prometheus.Labels{labelName: value}).Collect(ch)
m := <-ch
counter.Collect(ch)
var m prometheus.Metric
select {
case <-time.After(time.Second):
fmt.Println("timed out collecting metrics")
return 0
case m = <-ch:
}
var iom io_prometheus_client.Metric
_ = m.Write(&iom)
return int(iom.Counter.GetValue())

View File

@ -59,7 +59,7 @@ type RemoteVA struct {
type vaMetrics struct {
validationTime *prometheus.HistogramVec
remoteValidationTime *prometheus.HistogramVec
remoteValidationFailures *prometheus.HistogramVec
remoteValidationFailures prometheus.Counter
}
func initMetrics(stats metrics.Scope) *vaMetrics {
@ -77,11 +77,11 @@ func initMetrics(stats metrics.Scope) *vaMetrics {
},
[]string{"type", "result"})
stats.MustRegister(remoteValidationTime)
remoteValidationFailures := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
remoteValidationFailures := prometheus.NewCounter(
prometheus.CounterOpts{
Name: "remote_validation_failures",
Help: "Number of remote VAs that failed during challenge validation",
}, nil)
Help: "Number of validations failed due to remote VAs returning failure",
})
stats.MustRegister(remoteValidationFailures)
return &vaMetrics{
@ -851,7 +851,6 @@ func (va *ValidationAuthorityImpl) performRemoteValidation(ctx context.Context,
"type": string(challenge.Type),
"result": state,
}).Observe(va.clk.Since(s).Seconds())
va.metrics.remoteValidationFailures.With(prometheus.Labels{}).Observe(float64(bad))
}
// PerformValidation validates the given challenge. It always returns a list of
@ -893,6 +892,10 @@ func (va *ValidationAuthorityImpl) PerformValidation(ctx context.Context, domain
challenge.Status = core.StatusInvalid
challenge.Error = prob
logEvent.Error = prob.Error()
va.log.Info(fmt.Sprintf(
"Validation failed due to remote failures: identifier=%v err=%s",
authz.Identifier, prob))
va.metrics.remoteValidationFailures.Inc()
} else {
challenge.Status = core.StatusValid
}

View File

@ -1305,6 +1305,7 @@ func TestPerformRemoteValidation(t *testing.T) {
ms.mu.Lock()
delete(ms.allowedUAs, "remote 1")
ms.mu.Unlock()
mockLog := blog.NewMock()
localVA.performRemoteValidation(context.Background(), "localhost", chall, core.Authorization{}, probCh)
prob = <-probCh
if prob == nil {
@ -1333,6 +1334,7 @@ func TestPerformRemoteValidation(t *testing.T) {
}
// Local and remote 2 working, should fail
localVA.log = mockLog
ms.mu.Lock()
ms.allowedUAs["local"] = struct{}{}
delete(ms.allowedUAs, "remote 1")
@ -1341,6 +1343,14 @@ func TestPerformRemoteValidation(t *testing.T) {
if err == nil {
t.Error("PerformValidation didn't fail when one 'remote' validation failed")
}
failLogs := mockLog.GetAllMatching(`Validation failed due to remote failures`)
if len(failLogs) == 0 {
t.Error("Expected log line about failure due to remote failures, didn't get it")
}
remoteFailMetric := test.CountCounter(localVA.metrics.remoteValidationFailures)
if remoteFailMetric != 1 {
t.Errorf("Expected remote_validation_failures to be incremented, but it wasn't")
}
// Local and remote 2 working with maxRemoteFailures == 1, should succeed
localVA, _ = setup(ms.Server, 1)

View File

@ -1021,7 +1021,7 @@ func TestIssueCertificate(t *testing.T) {
`{"type":"`+probs.V1ErrorNS+`malformed","detail":"CSR generated using a pre-1.0.2 OpenSSL with a client that doesn't properly specify the CSR version. See https://community.letsencrypt.org/t/openssl-bug-information/19591","status":400}`)
// Test the CSR signature type counter works
test.AssertEquals(t, test.CountCounter("type", "SHA256-RSA", wfe.csrSignatureAlgs), 4)
test.AssertEquals(t, test.CountCounterVec("type", "SHA256-RSA", wfe.csrSignatureAlgs), 4)
}
func TestGetChallenge(t *testing.T) {

View File

@ -414,7 +414,7 @@ func TestValidPOSTRequest(t *testing.T) {
test.AssertEquals(t, prob.Type, probs.MalformedProblem)
test.AssertEquals(t, prob.HTTPStatus, tc.HTTPStatus)
test.AssertEquals(t, prob.Detail, tc.ProblemDetail)
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.httpErrorCount), 1)
})
}
@ -512,7 +512,7 @@ func TestEnforceJWSAuthType(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -582,7 +582,7 @@ func TestValidNonce(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -701,7 +701,7 @@ func TestValidPOSTURL(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedResult)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -857,7 +857,7 @@ func TestParseJWSRequest(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -1047,7 +1047,7 @@ func TestLookupJWK(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -1178,7 +1178,7 @@ func TestValidJWSForKey(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -1274,7 +1274,7 @@ func TestValidPOSTForAccount(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -1379,7 +1379,7 @@ func TestValidSelfAuthenticatedPOST(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})
@ -1462,7 +1462,7 @@ func TestMatchJWSURLs(t *testing.T) {
test.AssertMarshaledEquals(t, prob, tc.ExpectedProblem)
}
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})

View File

@ -1911,7 +1911,7 @@ func TestKeyRollover(t *testing.T) {
wfe.KeyRollover(ctx, newRequestEvent(), responseWriter, makePostRequestWithPath("key-change", outer))
test.AssertUnmarshaledEquals(t, responseWriter.Body.String(), tc.ExpectedResponse)
if tc.ErrorStatType != "" {
test.AssertEquals(t, test.CountCounter(
test.AssertEquals(t, test.CountCounterVec(
"type", tc.ErrorStatType, wfe.stats.joseErrorCount), 1)
}
})