Add authorization webhook duration/count/failopen metrics

Kubernetes-commit: 79b344d85e3e2f8f3192a3dcabb384cfe87136a6
This commit is contained in:
Jordan Liggitt 2024-03-02 01:44:28 -05:00 committed by Kubernetes Publisher
parent 250f19d55f
commit 9adb3ee3c0
6 changed files with 314 additions and 1 deletions

View File

@ -60,6 +60,8 @@ var (
var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{}) var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{})
type delegatingAuthorizerMetrics struct { type delegatingAuthorizerMetrics struct {
// no-op for webhook metrics for now, delegating authorization reports original total/latency metrics
webhookmetrics.NoopWebhookMetrics
// no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions // no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions
celmetrics.NoopMatcherMetrics celmetrics.NoopMatcherMetrics
} }

View File

@ -18,20 +18,26 @@ package metrics
import ( import (
"context" "context"
"sync"
"k8s.io/apiserver/pkg/authorization/cel" "k8s.io/apiserver/pkg/authorization/cel"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
) )
// AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer // AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer
type AuthorizerMetrics interface { type AuthorizerMetrics interface {
// Request total and latency metrics // Request total and latency metrics
RequestMetrics RequestMetrics
// Webhook count, latency, and fail open metrics
WebhookMetrics
// match condition metrics // match condition metrics
cel.MatcherMetrics cel.MatcherMetrics
} }
type NoopAuthorizerMetrics struct { type NoopAuthorizerMetrics struct {
NoopRequestMetrics NoopRequestMetrics
NoopWebhookMetrics
cel.NoopMatcherMetrics cel.NoopMatcherMetrics
} }
@ -47,3 +53,114 @@ type NoopRequestMetrics struct{}
func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {} func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {}
func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {} func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {}
type WebhookMetrics interface {
// RecordWebhookEvaluation increments with each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookEvaluation(ctx context.Context, name, result string)
// RecordWebhookDuration records latency for each round-trip of a webhook authorizer.
// result is one of:
// - canceled: the call invoking the webhook request was canceled
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
// - success: the webhook response completed and was well-formed
RecordWebhookDuration(ctx context.Context, name, result string, duration float64)
// RecordWebhookFailOpen increments when a webhook timeout or error results in a fail open
// of a request which has not been canceled.
// result is one of:
// - timeout: the webhook request timed out
// - error: the webhook response completed and was invalid
RecordWebhookFailOpen(ctx context.Context, name, result string)
}
type NoopWebhookMetrics struct{}
func (NoopWebhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {}
func (NoopWebhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
}
func (NoopWebhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {}
var registerWebhookMetrics sync.Once
// RegisterMetrics registers authorizer metrics.
func RegisterWebhookMetrics() {
registerWebhookMetrics.Do(func() {
legacyregistry.MustRegister(webhookEvaluations)
legacyregistry.MustRegister(webhookDuration)
legacyregistry.MustRegister(webhookFailOpen)
})
}
func ResetMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}
const (
namespace = "apiserver"
subsystem = "authorization"
)
var (
webhookEvaluations = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_total",
Help: "Round-trips to authorization webhooks.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
webhookDuration = compbasemetrics.NewHistogramVec(
&compbasemetrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_duration_seconds",
Help: "Request latency in seconds.",
Buckets: compbasemetrics.DefBuckets,
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
webhookFailOpen = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_evaluations_fail_open_total",
Help: "NoOpinion results due to webhook timeout or error.",
StabilityLevel: compbasemetrics.ALPHA,
},
[]string{"name", "result"},
)
)
type webhookMetrics struct{}
func NewWebhookMetrics() WebhookMetrics {
RegisterWebhookMetrics()
return webhookMetrics{}
}
func ResetWebhookMetricsForTest() {
webhookEvaluations.Reset()
webhookDuration.Reset()
webhookFailOpen.Reset()
}
func (webhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
webhookEvaluations.WithContext(ctx).WithLabelValues(name, result).Inc()
}
func (webhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
webhookDuration.WithContext(ctx).WithLabelValues(name, result).Observe(duration)
}
func (webhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
webhookFailOpen.WithContext(ctx).WithLabelValues(name, result).Inc()
}

View File

@ -0,0 +1,86 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"context"
"strings"
"testing"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"
)
func TestRecordWebhookMetrics(t *testing.T) {
testCases := []struct {
desc string
metrics []string
name string
result string
duration float64
want string
}{
{
desc: "evaluation failure total",
metrics: []string{
"apiserver_authorization_webhook_duration_seconds",
"apiserver_authorization_webhook_evaluations_total",
"apiserver_authorization_webhook_evaluations_fail_open_total",
},
name: "wh1.example.com",
result: "timeout",
duration: 1.5,
want: `
# HELP apiserver_authorization_webhook_duration_seconds [ALPHA] Request latency in seconds.
# TYPE apiserver_authorization_webhook_duration_seconds histogram
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.005"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.01"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.025"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.05"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.25"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.5"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="1"} 0
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="2.5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="5"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="10"} 1
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="+Inf"} 1
apiserver_authorization_webhook_duration_seconds_sum{name="wh1.example.com",result="timeout"} 1.5
apiserver_authorization_webhook_duration_seconds_count{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_fail_open_total [ALPHA] NoOpinion results due to webhook timeout or error.
# TYPE apiserver_authorization_webhook_evaluations_fail_open_total counter
apiserver_authorization_webhook_evaluations_fail_open_total{name="wh1.example.com",result="timeout"} 1
# HELP apiserver_authorization_webhook_evaluations_total [ALPHA] Round-trips to authorization webhooks.
# TYPE apiserver_authorization_webhook_evaluations_total counter
apiserver_authorization_webhook_evaluations_total{name="wh1.example.com",result="timeout"} 1
`,
},
}
for _, tt := range testCases {
t.Run(tt.desc, func(t *testing.T) {
ResetWebhookMetricsForTest()
m := NewWebhookMetrics()
m.RecordWebhookDuration(context.Background(), tt.name, tt.result, tt.duration)
m.RecordWebhookEvaluation(context.Background(), tt.name, tt.result)
m.RecordWebhookFailOpen(context.Background(), tt.name, tt.result)
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil {
t.Fatal(err)
}
})
}
}

View File

@ -18,8 +18,10 @@ package webhook
import ( import (
"context" "context"
"net/http"
"testing" "testing"
authorizationv1 "k8s.io/api/authorization/v1"
"k8s.io/apiserver/pkg/apis/apiserver" "k8s.io/apiserver/pkg/apis/apiserver"
"k8s.io/apiserver/pkg/authentication/user" "k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/authorization/authorizer" "k8s.io/apiserver/pkg/authorization/authorizer"
@ -29,11 +31,15 @@ import (
func TestAuthorizerMetrics(t *testing.T) { func TestAuthorizerMetrics(t *testing.T) {
scenarios := []struct { scenarios := []struct {
name string name string
canceledRequest bool
clientCert, clientKey, clientCA []byte clientCert, clientKey, clientCA []byte
serverCert, serverKey, serverCA []byte serverCert, serverKey, serverCA []byte
authzFakeServiceStatusCode int authzFakeServiceStatusCode int
authFakeServiceDeny bool authFakeServiceDeny bool
expectedRegisteredStatusCode string expectedRegisteredStatusCode string
expectEvalutionResult string
expectDurationResult string
expectFailOpenResult string
wantErr bool wantErr bool
}{ }{
{ {
@ -41,6 +47,31 @@ func TestAuthorizerMetrics(t *testing.T) {
clientCert: clientCert, clientKey: clientKey, clientCA: caCert, clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
expectedRegisteredStatusCode: "200", expectedRegisteredStatusCode: "200",
expectEvalutionResult: "success",
expectDurationResult: "success",
expectFailOpenResult: "",
},
{
name: "timed out request",
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
authzFakeServiceStatusCode: http.StatusGatewayTimeout,
expectedRegisteredStatusCode: "504",
expectEvalutionResult: "timeout",
expectDurationResult: "timeout",
expectFailOpenResult: "timeout",
},
{
name: "canceled request",
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
canceledRequest: true,
expectedRegisteredStatusCode: "<error>",
expectEvalutionResult: "canceled",
expectDurationResult: "canceled",
expectFailOpenResult: "",
}, },
{ {
@ -49,6 +80,9 @@ func TestAuthorizerMetrics(t *testing.T) {
serverCert: serverCert, serverKey: serverKey, serverCA: caCert, serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
authzFakeServiceStatusCode: 500, authzFakeServiceStatusCode: 500,
expectedRegisteredStatusCode: "500", expectedRegisteredStatusCode: "500",
expectEvalutionResult: "error",
expectDurationResult: "error",
expectFailOpenResult: "error",
}, },
{ {
@ -56,17 +90,28 @@ func TestAuthorizerMetrics(t *testing.T) {
clientCert: clientCert, clientKey: clientKey, clientCA: caCert, clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
serverCert: serverCert, serverKey: serverKey, serverCA: badCACert, serverCert: serverCert, serverKey: serverKey, serverCA: badCACert,
expectedRegisteredStatusCode: "<error>", expectedRegisteredStatusCode: "<error>",
expectEvalutionResult: "error",
expectDurationResult: "error",
expectFailOpenResult: "error",
wantErr: true, wantErr: true,
}, },
} }
for _, scenario := range scenarios { for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) { t.Run(scenario.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
service := new(mockV1Service) service := new(mockV1Service)
service.statusCode = scenario.authzFakeServiceStatusCode service.statusCode = scenario.authzFakeServiceStatusCode
if service.statusCode == 0 { if service.statusCode == 0 {
service.statusCode = 200 service.statusCode = 200
} }
service.reviewHook = func(*authorizationv1.SubjectAccessReview) {
if scenario.canceledRequest {
cancel()
}
}
service.allow = !scenario.authFakeServiceDeny service.allow = !scenario.authFakeServiceDeny
server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA) server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA)
@ -84,7 +129,7 @@ func TestAuthorizerMetrics(t *testing.T) {
} }
attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}} attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}}
_, _, err = wh.Authorize(context.Background(), attr) _, _, err = wh.Authorize(ctx, attr)
if scenario.wantErr { if scenario.wantErr {
if err == nil { if err == nil {
t.Errorf("expected error making authorization request: %v", err) t.Errorf("expected error making authorization request: %v", err)
@ -98,6 +143,16 @@ func TestAuthorizerMetrics(t *testing.T) {
if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode { if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode {
t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode) t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode)
} }
if fakeAuthzMetrics.evaluationsResult != scenario.expectEvalutionResult {
t.Errorf("expected evaluationsResult %q, got %q", scenario.expectEvalutionResult, fakeAuthzMetrics.evaluationsResult)
}
if fakeAuthzMetrics.durationResult != scenario.expectDurationResult {
t.Errorf("expected durationResult %q, got %q", scenario.expectDurationResult, fakeAuthzMetrics.durationResult)
}
if fakeAuthzMetrics.failOpenResult != scenario.expectFailOpenResult {
t.Errorf("expected failOpenResult %q, got %q", scenario.expectFailOpenResult, fakeAuthzMetrics.failOpenResult)
}
}) })
} }
} }
@ -108,6 +163,15 @@ type fakeAuthorizerMetrics struct {
latency float64 latency float64
latencyCode string latencyCode string
evaluations int
evaluationsResult string
duration float64
durationResult string
failOpen int
failOpenResult string
cel.NoopMatcherMetrics cel.NoopMatcherMetrics
} }
@ -119,3 +183,16 @@ func (f *fakeAuthorizerMetrics) RecordRequestLatency(_ context.Context, code str
f.latency = latency f.latency = latency
f.latencyCode = code f.latencyCode = code
} }
func (f *fakeAuthorizerMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
f.evaluations += 1
f.evaluationsResult = result
}
func (f *fakeAuthorizerMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
f.duration = duration
f.durationResult = result
}
func (f *fakeAuthorizerMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
f.failOpen += 1
f.failOpenResult = result
}

View File

@ -20,12 +20,15 @@ package webhook
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"net/http"
"strconv" "strconv"
"time" "time"
authorizationv1 "k8s.io/api/authorization/v1" authorizationv1 "k8s.io/api/authorization/v1"
authorizationv1beta1 "k8s.io/api/authorization/v1beta1" authorizationv1beta1 "k8s.io/api/authorization/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/schema"
@ -233,6 +236,7 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
r.Status = entry.(authorizationv1.SubjectAccessReviewStatus) r.Status = entry.(authorizationv1.SubjectAccessReviewStatus)
} else { } else {
var result *authorizationv1.SubjectAccessReview var result *authorizationv1.SubjectAccessReview
var metricsResult string
// WithExponentialBackoff will return SAR create error (sarErr) if any. // WithExponentialBackoff will return SAR create error (sarErr) if any.
if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error { if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error {
var sarErr error var sarErr error
@ -242,6 +246,19 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{}) result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{})
latency := time.Since(start) latency := time.Since(start)
switch {
case sarErr == nil:
metricsResult = "success"
case ctx.Err() != nil:
metricsResult = "canceled"
case errors.Is(sarErr, context.DeadlineExceeded) || apierrors.IsTimeout(sarErr) || statusCode == http.StatusGatewayTimeout:
metricsResult = "timeout"
default:
metricsResult = "error"
}
w.metrics.RecordWebhookEvaluation(ctx, w.name, metricsResult)
w.metrics.RecordWebhookDuration(ctx, w.name, metricsResult, latency.Seconds())
if statusCode != 0 { if statusCode != 0 {
w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode)) w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode))
w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds()) w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds())
@ -256,6 +273,12 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
return sarErr return sarErr
}, webhook.DefaultShouldRetry); err != nil { }, webhook.DefaultShouldRetry); err != nil {
klog.Errorf("Failed to make webhook authorizer request: %v", err) klog.Errorf("Failed to make webhook authorizer request: %v", err)
// we're returning NoOpinion, and the parent context has not timed out or been canceled
if w.decisionOnError == authorizer.DecisionNoOpinion && ctx.Err() == nil {
w.metrics.RecordWebhookFailOpen(ctx, w.name, metricsResult)
}
return w.decisionOnError, "", err return w.decisionOnError, "", err
} }

View File

@ -315,11 +315,18 @@ type mockV1Service struct {
allow bool allow bool
statusCode int statusCode int
called int called int
// reviewHook is called just before returning from the Review() method
reviewHook func(*authorizationv1.SubjectAccessReview)
} }
func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) { func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) {
m.called++ m.called++
r.Status.Allowed = m.allow r.Status.Allowed = m.allow
if m.reviewHook != nil {
m.reviewHook(r)
}
} }
func (m *mockV1Service) Allow() { m.allow = true } func (m *mockV1Service) Allow() { m.allow = true }
func (m *mockV1Service) Deny() { m.allow = false } func (m *mockV1Service) Deny() { m.allow = false }
@ -1414,5 +1421,6 @@ func celAuthorizerMetrics() metrics.AuthorizerMetrics {
type celAuthorizerMetricsType struct { type celAuthorizerMetricsType struct {
metrics.NoopRequestMetrics metrics.NoopRequestMetrics
metrics.NoopWebhookMetrics
celmetrics.MatcherMetrics celmetrics.MatcherMetrics
} }