Add authorization webhook duration/count/failopen metrics
Kubernetes-commit: 79b344d85e3e2f8f3192a3dcabb384cfe87136a6
This commit is contained in:
parent
250f19d55f
commit
9adb3ee3c0
|
|
@ -60,6 +60,8 @@ var (
|
|||
var _ = webhookmetrics.AuthorizerMetrics(delegatingAuthorizerMetrics{})
|
||||
|
||||
type delegatingAuthorizerMetrics struct {
|
||||
// no-op for webhook metrics for now, delegating authorization reports original total/latency metrics
|
||||
webhookmetrics.NoopWebhookMetrics
|
||||
// no-op for matchCondition metrics for now, delegating authorization doesn't configure match conditions
|
||||
celmetrics.NoopMatcherMetrics
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,20 +18,26 @@ package metrics
|
|||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
|
||||
"k8s.io/apiserver/pkg/authorization/cel"
|
||||
compbasemetrics "k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
)
|
||||
|
||||
// AuthorizerMetrics specifies a set of methods that are used to register various metrics for the webhook authorizer
|
||||
type AuthorizerMetrics interface {
|
||||
// Request total and latency metrics
|
||||
RequestMetrics
|
||||
// Webhook count, latency, and fail open metrics
|
||||
WebhookMetrics
|
||||
// match condition metrics
|
||||
cel.MatcherMetrics
|
||||
}
|
||||
|
||||
type NoopAuthorizerMetrics struct {
|
||||
NoopRequestMetrics
|
||||
NoopWebhookMetrics
|
||||
cel.NoopMatcherMetrics
|
||||
}
|
||||
|
||||
|
|
@ -47,3 +53,114 @@ type NoopRequestMetrics struct{}
|
|||
|
||||
func (NoopRequestMetrics) RecordRequestTotal(context.Context, string) {}
|
||||
func (NoopRequestMetrics) RecordRequestLatency(context.Context, string, float64) {}
|
||||
|
||||
type WebhookMetrics interface {
|
||||
// RecordWebhookEvaluation increments with each round-trip of a webhook authorizer.
|
||||
// result is one of:
|
||||
// - canceled: the call invoking the webhook request was canceled
|
||||
// - timeout: the webhook request timed out
|
||||
// - error: the webhook response completed and was invalid
|
||||
// - success: the webhook response completed and was well-formed
|
||||
RecordWebhookEvaluation(ctx context.Context, name, result string)
|
||||
// RecordWebhookDuration records latency for each round-trip of a webhook authorizer.
|
||||
// result is one of:
|
||||
// - canceled: the call invoking the webhook request was canceled
|
||||
// - timeout: the webhook request timed out
|
||||
// - error: the webhook response completed and was invalid
|
||||
// - success: the webhook response completed and was well-formed
|
||||
RecordWebhookDuration(ctx context.Context, name, result string, duration float64)
|
||||
// RecordWebhookFailOpen increments when a webhook timeout or error results in a fail open
|
||||
// of a request which has not been canceled.
|
||||
// result is one of:
|
||||
// - timeout: the webhook request timed out
|
||||
// - error: the webhook response completed and was invalid
|
||||
RecordWebhookFailOpen(ctx context.Context, name, result string)
|
||||
}
|
||||
|
||||
type NoopWebhookMetrics struct{}
|
||||
|
||||
func (NoopWebhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {}
|
||||
func (NoopWebhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
|
||||
}
|
||||
func (NoopWebhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {}
|
||||
|
||||
var registerWebhookMetrics sync.Once
|
||||
|
||||
// RegisterMetrics registers authorizer metrics.
|
||||
func RegisterWebhookMetrics() {
|
||||
registerWebhookMetrics.Do(func() {
|
||||
legacyregistry.MustRegister(webhookEvaluations)
|
||||
legacyregistry.MustRegister(webhookDuration)
|
||||
legacyregistry.MustRegister(webhookFailOpen)
|
||||
})
|
||||
}
|
||||
|
||||
func ResetMetricsForTest() {
|
||||
webhookEvaluations.Reset()
|
||||
webhookDuration.Reset()
|
||||
webhookFailOpen.Reset()
|
||||
}
|
||||
|
||||
const (
|
||||
namespace = "apiserver"
|
||||
subsystem = "authorization"
|
||||
)
|
||||
|
||||
var (
|
||||
webhookEvaluations = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "webhook_evaluations_total",
|
||||
Help: "Round-trips to authorization webhooks.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"name", "result"},
|
||||
)
|
||||
|
||||
webhookDuration = compbasemetrics.NewHistogramVec(
|
||||
&compbasemetrics.HistogramOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "webhook_duration_seconds",
|
||||
Help: "Request latency in seconds.",
|
||||
Buckets: compbasemetrics.DefBuckets,
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"name", "result"},
|
||||
)
|
||||
|
||||
webhookFailOpen = compbasemetrics.NewCounterVec(
|
||||
&compbasemetrics.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "webhook_evaluations_fail_open_total",
|
||||
Help: "NoOpinion results due to webhook timeout or error.",
|
||||
StabilityLevel: compbasemetrics.ALPHA,
|
||||
},
|
||||
[]string{"name", "result"},
|
||||
)
|
||||
)
|
||||
|
||||
type webhookMetrics struct{}
|
||||
|
||||
func NewWebhookMetrics() WebhookMetrics {
|
||||
RegisterWebhookMetrics()
|
||||
return webhookMetrics{}
|
||||
}
|
||||
|
||||
func ResetWebhookMetricsForTest() {
|
||||
webhookEvaluations.Reset()
|
||||
webhookDuration.Reset()
|
||||
webhookFailOpen.Reset()
|
||||
}
|
||||
|
||||
func (webhookMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
|
||||
webhookEvaluations.WithContext(ctx).WithLabelValues(name, result).Inc()
|
||||
}
|
||||
func (webhookMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
|
||||
webhookDuration.WithContext(ctx).WithLabelValues(name, result).Observe(duration)
|
||||
}
|
||||
func (webhookMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
|
||||
webhookFailOpen.WithContext(ctx).WithLabelValues(name, result).Inc()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
Copyright 2024 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/component-base/metrics/testutil"
|
||||
)
|
||||
|
||||
func TestRecordWebhookMetrics(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
metrics []string
|
||||
name string
|
||||
result string
|
||||
duration float64
|
||||
want string
|
||||
}{
|
||||
{
|
||||
desc: "evaluation failure total",
|
||||
metrics: []string{
|
||||
"apiserver_authorization_webhook_duration_seconds",
|
||||
"apiserver_authorization_webhook_evaluations_total",
|
||||
"apiserver_authorization_webhook_evaluations_fail_open_total",
|
||||
},
|
||||
name: "wh1.example.com",
|
||||
result: "timeout",
|
||||
duration: 1.5,
|
||||
want: `
|
||||
# HELP apiserver_authorization_webhook_duration_seconds [ALPHA] Request latency in seconds.
|
||||
# TYPE apiserver_authorization_webhook_duration_seconds histogram
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.005"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.01"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.025"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.05"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.1"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.25"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="0.5"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="1"} 0
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="2.5"} 1
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="5"} 1
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="10"} 1
|
||||
apiserver_authorization_webhook_duration_seconds_bucket{name="wh1.example.com",result="timeout",le="+Inf"} 1
|
||||
apiserver_authorization_webhook_duration_seconds_sum{name="wh1.example.com",result="timeout"} 1.5
|
||||
apiserver_authorization_webhook_duration_seconds_count{name="wh1.example.com",result="timeout"} 1
|
||||
# HELP apiserver_authorization_webhook_evaluations_fail_open_total [ALPHA] NoOpinion results due to webhook timeout or error.
|
||||
# TYPE apiserver_authorization_webhook_evaluations_fail_open_total counter
|
||||
apiserver_authorization_webhook_evaluations_fail_open_total{name="wh1.example.com",result="timeout"} 1
|
||||
# HELP apiserver_authorization_webhook_evaluations_total [ALPHA] Round-trips to authorization webhooks.
|
||||
# TYPE apiserver_authorization_webhook_evaluations_total counter
|
||||
apiserver_authorization_webhook_evaluations_total{name="wh1.example.com",result="timeout"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range testCases {
|
||||
t.Run(tt.desc, func(t *testing.T) {
|
||||
ResetWebhookMetricsForTest()
|
||||
m := NewWebhookMetrics()
|
||||
m.RecordWebhookDuration(context.Background(), tt.name, tt.result, tt.duration)
|
||||
m.RecordWebhookEvaluation(context.Background(), tt.name, tt.result)
|
||||
m.RecordWebhookFailOpen(context.Background(), tt.name, tt.result)
|
||||
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tt.want), tt.metrics...); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -18,8 +18,10 @@ package webhook
|
|||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
authorizationv1 "k8s.io/api/authorization/v1"
|
||||
"k8s.io/apiserver/pkg/apis/apiserver"
|
||||
"k8s.io/apiserver/pkg/authentication/user"
|
||||
"k8s.io/apiserver/pkg/authorization/authorizer"
|
||||
|
|
@ -29,11 +31,15 @@ import (
|
|||
func TestAuthorizerMetrics(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
name string
|
||||
canceledRequest bool
|
||||
clientCert, clientKey, clientCA []byte
|
||||
serverCert, serverKey, serverCA []byte
|
||||
authzFakeServiceStatusCode int
|
||||
authFakeServiceDeny bool
|
||||
expectedRegisteredStatusCode string
|
||||
expectEvalutionResult string
|
||||
expectDurationResult string
|
||||
expectFailOpenResult string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
|
|
@ -41,6 +47,31 @@ func TestAuthorizerMetrics(t *testing.T) {
|
|||
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
|
||||
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
|
||||
expectedRegisteredStatusCode: "200",
|
||||
expectEvalutionResult: "success",
|
||||
expectDurationResult: "success",
|
||||
expectFailOpenResult: "",
|
||||
},
|
||||
|
||||
{
|
||||
name: "timed out request",
|
||||
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
|
||||
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
|
||||
authzFakeServiceStatusCode: http.StatusGatewayTimeout,
|
||||
expectedRegisteredStatusCode: "504",
|
||||
expectEvalutionResult: "timeout",
|
||||
expectDurationResult: "timeout",
|
||||
expectFailOpenResult: "timeout",
|
||||
},
|
||||
|
||||
{
|
||||
name: "canceled request",
|
||||
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
|
||||
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
|
||||
canceledRequest: true,
|
||||
expectedRegisteredStatusCode: "<error>",
|
||||
expectEvalutionResult: "canceled",
|
||||
expectDurationResult: "canceled",
|
||||
expectFailOpenResult: "",
|
||||
},
|
||||
|
||||
{
|
||||
|
|
@ -49,6 +80,9 @@ func TestAuthorizerMetrics(t *testing.T) {
|
|||
serverCert: serverCert, serverKey: serverKey, serverCA: caCert,
|
||||
authzFakeServiceStatusCode: 500,
|
||||
expectedRegisteredStatusCode: "500",
|
||||
expectEvalutionResult: "error",
|
||||
expectDurationResult: "error",
|
||||
expectFailOpenResult: "error",
|
||||
},
|
||||
|
||||
{
|
||||
|
|
@ -56,17 +90,28 @@ func TestAuthorizerMetrics(t *testing.T) {
|
|||
clientCert: clientCert, clientKey: clientKey, clientCA: caCert,
|
||||
serverCert: serverCert, serverKey: serverKey, serverCA: badCACert,
|
||||
expectedRegisteredStatusCode: "<error>",
|
||||
expectEvalutionResult: "error",
|
||||
expectDurationResult: "error",
|
||||
expectFailOpenResult: "error",
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
t.Run(scenario.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
service := new(mockV1Service)
|
||||
service.statusCode = scenario.authzFakeServiceStatusCode
|
||||
if service.statusCode == 0 {
|
||||
service.statusCode = 200
|
||||
}
|
||||
service.reviewHook = func(*authorizationv1.SubjectAccessReview) {
|
||||
if scenario.canceledRequest {
|
||||
cancel()
|
||||
}
|
||||
}
|
||||
service.allow = !scenario.authFakeServiceDeny
|
||||
|
||||
server, err := NewV1TestServer(service, scenario.serverCert, scenario.serverKey, scenario.serverCA)
|
||||
|
|
@ -84,7 +129,7 @@ func TestAuthorizerMetrics(t *testing.T) {
|
|||
}
|
||||
|
||||
attr := authorizer.AttributesRecord{User: &user.DefaultInfo{}}
|
||||
_, _, err = wh.Authorize(context.Background(), attr)
|
||||
_, _, err = wh.Authorize(ctx, attr)
|
||||
if scenario.wantErr {
|
||||
if err == nil {
|
||||
t.Errorf("expected error making authorization request: %v", err)
|
||||
|
|
@ -98,6 +143,16 @@ func TestAuthorizerMetrics(t *testing.T) {
|
|||
if fakeAuthzMetrics.latencyCode != scenario.expectedRegisteredStatusCode {
|
||||
t.Errorf("incorrect status code recorded for RecordRequestLatency method, expected = %v, got %v", scenario.expectedRegisteredStatusCode, fakeAuthzMetrics.latencyCode)
|
||||
}
|
||||
|
||||
if fakeAuthzMetrics.evaluationsResult != scenario.expectEvalutionResult {
|
||||
t.Errorf("expected evaluationsResult %q, got %q", scenario.expectEvalutionResult, fakeAuthzMetrics.evaluationsResult)
|
||||
}
|
||||
if fakeAuthzMetrics.durationResult != scenario.expectDurationResult {
|
||||
t.Errorf("expected durationResult %q, got %q", scenario.expectDurationResult, fakeAuthzMetrics.durationResult)
|
||||
}
|
||||
if fakeAuthzMetrics.failOpenResult != scenario.expectFailOpenResult {
|
||||
t.Errorf("expected failOpenResult %q, got %q", scenario.expectFailOpenResult, fakeAuthzMetrics.failOpenResult)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -108,6 +163,15 @@ type fakeAuthorizerMetrics struct {
|
|||
latency float64
|
||||
latencyCode string
|
||||
|
||||
evaluations int
|
||||
evaluationsResult string
|
||||
|
||||
duration float64
|
||||
durationResult string
|
||||
|
||||
failOpen int
|
||||
failOpenResult string
|
||||
|
||||
cel.NoopMatcherMetrics
|
||||
}
|
||||
|
||||
|
|
@ -119,3 +183,16 @@ func (f *fakeAuthorizerMetrics) RecordRequestLatency(_ context.Context, code str
|
|||
f.latency = latency
|
||||
f.latencyCode = code
|
||||
}
|
||||
|
||||
func (f *fakeAuthorizerMetrics) RecordWebhookEvaluation(ctx context.Context, name, result string) {
|
||||
f.evaluations += 1
|
||||
f.evaluationsResult = result
|
||||
}
|
||||
func (f *fakeAuthorizerMetrics) RecordWebhookDuration(ctx context.Context, name, result string, duration float64) {
|
||||
f.duration = duration
|
||||
f.durationResult = result
|
||||
}
|
||||
func (f *fakeAuthorizerMetrics) RecordWebhookFailOpen(ctx context.Context, name, result string) {
|
||||
f.failOpen += 1
|
||||
f.failOpenResult = result
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,12 +20,15 @@ package webhook
|
|||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
authorizationv1 "k8s.io/api/authorization/v1"
|
||||
authorizationv1beta1 "k8s.io/api/authorization/v1beta1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
|
|
@ -233,6 +236,7 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
|
|||
r.Status = entry.(authorizationv1.SubjectAccessReviewStatus)
|
||||
} else {
|
||||
var result *authorizationv1.SubjectAccessReview
|
||||
var metricsResult string
|
||||
// WithExponentialBackoff will return SAR create error (sarErr) if any.
|
||||
if err := webhook.WithExponentialBackoff(ctx, w.retryBackoff, func() error {
|
||||
var sarErr error
|
||||
|
|
@ -242,6 +246,19 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
|
|||
result, statusCode, sarErr = w.subjectAccessReview.Create(ctx, r, metav1.CreateOptions{})
|
||||
latency := time.Since(start)
|
||||
|
||||
switch {
|
||||
case sarErr == nil:
|
||||
metricsResult = "success"
|
||||
case ctx.Err() != nil:
|
||||
metricsResult = "canceled"
|
||||
case errors.Is(sarErr, context.DeadlineExceeded) || apierrors.IsTimeout(sarErr) || statusCode == http.StatusGatewayTimeout:
|
||||
metricsResult = "timeout"
|
||||
default:
|
||||
metricsResult = "error"
|
||||
}
|
||||
w.metrics.RecordWebhookEvaluation(ctx, w.name, metricsResult)
|
||||
w.metrics.RecordWebhookDuration(ctx, w.name, metricsResult, latency.Seconds())
|
||||
|
||||
if statusCode != 0 {
|
||||
w.metrics.RecordRequestTotal(ctx, strconv.Itoa(statusCode))
|
||||
w.metrics.RecordRequestLatency(ctx, strconv.Itoa(statusCode), latency.Seconds())
|
||||
|
|
@ -256,6 +273,12 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri
|
|||
return sarErr
|
||||
}, webhook.DefaultShouldRetry); err != nil {
|
||||
klog.Errorf("Failed to make webhook authorizer request: %v", err)
|
||||
|
||||
// we're returning NoOpinion, and the parent context has not timed out or been canceled
|
||||
if w.decisionOnError == authorizer.DecisionNoOpinion && ctx.Err() == nil {
|
||||
w.metrics.RecordWebhookFailOpen(ctx, w.name, metricsResult)
|
||||
}
|
||||
|
||||
return w.decisionOnError, "", err
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -315,11 +315,18 @@ type mockV1Service struct {
|
|||
allow bool
|
||||
statusCode int
|
||||
called int
|
||||
|
||||
// reviewHook is called just before returning from the Review() method
|
||||
reviewHook func(*authorizationv1.SubjectAccessReview)
|
||||
}
|
||||
|
||||
func (m *mockV1Service) Review(r *authorizationv1.SubjectAccessReview) {
|
||||
m.called++
|
||||
r.Status.Allowed = m.allow
|
||||
|
||||
if m.reviewHook != nil {
|
||||
m.reviewHook(r)
|
||||
}
|
||||
}
|
||||
func (m *mockV1Service) Allow() { m.allow = true }
|
||||
func (m *mockV1Service) Deny() { m.allow = false }
|
||||
|
|
@ -1414,5 +1421,6 @@ func celAuthorizerMetrics() metrics.AuthorizerMetrics {
|
|||
|
||||
type celAuthorizerMetricsType struct {
|
||||
metrics.NoopRequestMetrics
|
||||
metrics.NoopWebhookMetrics
|
||||
celmetrics.MatcherMetrics
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue