diff --git a/pkg/util/webhook/webhook.go b/pkg/util/webhook/webhook.go index 28eee5469..917d66c92 100644 --- a/pkg/util/webhook/webhook.go +++ b/pkg/util/webhook/webhook.go @@ -36,9 +36,28 @@ import ( // timeout of the HTTP request, including reading the response body. const defaultRequestTimeout = 30 * time.Second +// GenericWebhook defines a generic client for webhooks with commonly used capabilities, +// such as retry requests. type GenericWebhook struct { RestClient *rest.RESTClient InitialBackoff time.Duration + ShouldRetry func(error) bool +} + +// DefaultShouldRetry is a default implementation for the GenericWebhook ShouldRetry function property. +// If the error reason is one of: networking (connection reset) or http (InternalServerError (500), GatewayTimeout (504), TooManyRequests (429)), +// or apierrors.SuggestsClientDelay() returns true, then the function advises a retry. +// Otherwise it returns false for an immediate fail. +func DefaultShouldRetry(err error) bool { + // these errors indicate a transient error that should be retried. + if net.IsConnectionReset(err) || apierrors.IsInternalError(err) || apierrors.IsTimeout(err) || apierrors.IsTooManyRequests(err) { + return true + } + // if the error sends the Retry-After header, we respect it as an explicit confirmation we should retry. + if _, shouldRetry := apierrors.SuggestsClientDelay(err); shouldRetry { + return true + } + return false } // NewGenericWebhook creates a new GenericWebhook from the provided kubeconfig file. @@ -77,23 +96,28 @@ func newGenericWebhook(scheme *runtime.Scheme, codecFactory serializer.CodecFact return nil, err } - return &GenericWebhook{restClient, initialBackoff}, nil + return &GenericWebhook{restClient, initialBackoff, DefaultShouldRetry}, nil } // WithExponentialBackoff will retry webhookFn() up to 5 times with exponentially increasing backoff when -// it returns an error for which apierrors.SuggestsClientDelay() or apierrors.IsInternalError() returns true. +// it returns an error for which this GenericWebhook's ShouldRetry function returns true, confirming it to +// be retriable. If no ShouldRetry has been defined for the webhook, then the default one is used (DefaultShouldRetry). func (g *GenericWebhook) WithExponentialBackoff(ctx context.Context, webhookFn func() rest.Result) rest.Result { var result rest.Result + shouldRetry := g.ShouldRetry + if shouldRetry == nil { + shouldRetry = DefaultShouldRetry + } WithExponentialBackoff(ctx, g.InitialBackoff, func() error { result = webhookFn() return result.Error() - }) + }, shouldRetry) return result } -// WithExponentialBackoff will retry webhookFn() up to 5 times with exponentially increasing backoff when -// it returns an error for which apierrors.SuggestsClientDelay() or apierrors.IsInternalError() returns true. -func WithExponentialBackoff(ctx context.Context, initialBackoff time.Duration, webhookFn func() error) error { +// WithExponentialBackoff will retry webhookFn up to 5 times with exponentially increasing backoff when +// it returns an error for which shouldRetry returns true, confirming it to be retriable. +func WithExponentialBackoff(ctx context.Context, initialBackoff time.Duration, webhookFn func() error, shouldRetry func(error) bool) error { backoff := wait.Backoff{ Duration: initialBackoff, Factor: 1.5, @@ -104,18 +128,11 @@ func WithExponentialBackoff(ctx context.Context, initialBackoff time.Duration, w var err error wait.ExponentialBackoff(backoff, func() (bool, error) { err = webhookFn() - if ctx.Err() != nil { // we timed out or were cancelled, we should not retry return true, err } - - // these errors indicate a transient error that should be retried. - if net.IsConnectionReset(err) || apierrors.IsInternalError(err) || apierrors.IsTimeout(err) || apierrors.IsTooManyRequests(err) { - return false, nil - } - // if the error sends the Retry-After header, we respect it as an explicit confirmation we should retry. - if _, shouldRetry := apierrors.SuggestsClientDelay(err); shouldRetry { + if shouldRetry(err) { return false, nil } if err != nil { diff --git a/plugin/pkg/audit/webhook/webhook.go b/plugin/pkg/audit/webhook/webhook.go index d2606541d..d0f053643 100644 --- a/plugin/pkg/audit/webhook/webhook.go +++ b/plugin/pkg/audit/webhook/webhook.go @@ -44,9 +44,27 @@ func init() { install.Install(audit.Scheme) } +// retryOnError enforces the webhook client to retry requests +// on error regardless of its nature. +// The default implementation considers a very limited set of +// 'retriable' errors, assuming correct use of HTTP codes by +// external webhooks. +// That may easily lead to dropped audit events. In fact, there is +// hardly any error that could be a justified reason NOT to retry +// sending audit events if there is even a slight chance that the +// receiving service gets back to normal at some point. +func retryOnError(err error) bool { + if err != nil { + return true + } + return false +} + func loadWebhook(configFile string, groupVersion schema.GroupVersion, initialBackoff time.Duration) (*webhook.GenericWebhook, error) { - return webhook.NewGenericWebhook(audit.Scheme, audit.Codecs, configFile, + w, err := webhook.NewGenericWebhook(audit.Scheme, audit.Codecs, configFile, []schema.GroupVersion{groupVersion}, initialBackoff) + w.ShouldRetry = retryOnError + return w, err } type backend struct { @@ -61,6 +79,7 @@ func NewDynamicBackend(rc *rest.RESTClient, initialBackoff time.Duration) audit. w: &webhook.GenericWebhook{ RestClient: rc, InitialBackoff: initialBackoff, + ShouldRetry: retryOnError, }, name: fmt.Sprintf("dynamic_%s", PluginName), } diff --git a/plugin/pkg/authenticator/token/webhook/webhook.go b/plugin/pkg/authenticator/token/webhook/webhook.go index bbc8eef51..635992062 100644 --- a/plugin/pkg/authenticator/token/webhook/webhook.go +++ b/plugin/pkg/authenticator/token/webhook/webhook.go @@ -101,7 +101,7 @@ func (w *WebhookTokenAuthenticator) AuthenticateToken(ctx context.Context, token webhook.WithExponentialBackoff(ctx, w.initialBackoff, func() error { result, err = w.tokenReview.CreateContext(ctx, r) return err - }) + }, webhook.DefaultShouldRetry) if err != nil { // An error here indicates bad configuration or an outage. Log for debugging. klog.Errorf("Failed to make webhook authenticator request: %v", err) diff --git a/plugin/pkg/authorizer/webhook/webhook.go b/plugin/pkg/authorizer/webhook/webhook.go index 4006f1ac7..83ac25c39 100644 --- a/plugin/pkg/authorizer/webhook/webhook.go +++ b/plugin/pkg/authorizer/webhook/webhook.go @@ -191,7 +191,7 @@ func (w *WebhookAuthorizer) Authorize(ctx context.Context, attr authorizer.Attri webhook.WithExponentialBackoff(ctx, w.initialBackoff, func() error { result, err = w.subjectAccessReview.CreateContext(ctx, r) return err - }) + }, webhook.DefaultShouldRetry) if err != nil { // An error here indicates bad configuration or an outage. Log for debugging. klog.Errorf("Failed to make webhook authorizer request: %v", err)