Merge pull request #133946 from lalitc375/k8s-dv-metrics

Add fine grained metrics to narrow down DV mismatches and panics

Kubernetes-commit: 09278c12922303db042d2d9af686d4fb3a615d18
This commit is contained in:
Kubernetes Publisher 2025-09-15 19:44:09 -07:00
commit beaf74edfa
4 changed files with 97 additions and 34 deletions

View File

@ -51,6 +51,13 @@ func WithTakeover(takeover bool) ValidationConfig {
}
}
// WithValidationIdentifier sets the validation identifier, which is used to determine the source of a mismatch in metrics.
func WithValidationIdentifier(identifier string) ValidationConfig {
return func(config *validationConfigOption) {
config.validationIdentifier = identifier
}
}
// WithSubresourceMapper sets the subresource mapper for validation.
// This should be used when registering validation for polymorphic subresources like /scale.
//
@ -79,6 +86,7 @@ type validationConfigOption struct {
options []string
takeover bool
subresourceGVKMapper GroupVersionKindProvider
validationIdentifier string
}
// ValidateDeclaratively validates obj against declarative validation tags
@ -99,7 +107,7 @@ func ValidateDeclaratively(ctx context.Context, scheme *runtime.Scheme, obj runt
o(cfg)
}
return panicSafeValidateFunc(validateDeclaratively, cfg.takeover)(ctx, scheme, obj, nil, cfg)
return panicSafeValidateFunc(validateDeclaratively, cfg.takeover, cfg.validationIdentifier)(ctx, scheme, obj, nil, cfg)
}
// ValidateUpdateDeclaratively validates obj and oldObj against declarative
@ -119,7 +127,7 @@ func ValidateUpdateDeclaratively(ctx context.Context, scheme *runtime.Scheme, ob
for _, o := range configOpts {
o(cfg)
}
return panicSafeValidateFunc(validateDeclaratively, cfg.takeover)(ctx, scheme, obj, oldObj, cfg)
return panicSafeValidateFunc(validateDeclaratively, cfg.takeover, cfg.validationIdentifier)(ctx, scheme, obj, oldObj, cfg)
}
func validateDeclaratively(ctx context.Context, scheme *runtime.Scheme, obj, oldObj runtime.Object, o *validationConfigOption) field.ErrorList {
@ -180,7 +188,7 @@ func parseSubresourcePath(subresourcePath string) ([]string, error) {
// CompareDeclarativeErrorsAndEmitMismatches checks for mismatches between imperative and declarative validation
// and logs + emits metrics when inconsistencies are found
func CompareDeclarativeErrorsAndEmitMismatches(ctx context.Context, imperativeErrs, declarativeErrs field.ErrorList, takeover bool) {
func CompareDeclarativeErrorsAndEmitMismatches(ctx context.Context, imperativeErrs, declarativeErrs field.ErrorList, takeover bool, validationIdentifier string) {
logger := klog.FromContext(ctx)
mismatchDetails := gatherDeclarativeValidationMismatches(imperativeErrs, declarativeErrs, takeover)
for _, detail := range mismatchDetails {
@ -188,7 +196,7 @@ func CompareDeclarativeErrorsAndEmitMismatches(ctx context.Context, imperativeEr
logger.Error(nil, detail)
// Increment the metric for the mismatch
validationmetrics.Metrics.IncDeclarativeValidationMismatchMetric()
validationmetrics.Metrics.IncDeclarativeValidationMismatchMetric(validationIdentifier)
}
}
@ -288,12 +296,12 @@ func gatherDeclarativeValidationMismatches(imperativeErrs, declarativeErrs field
// createDeclarativeValidationPanicHandler returns a function with panic recovery logic
// that will increment the panic metric and either log or append errors based on the takeover parameter.
func createDeclarativeValidationPanicHandler(ctx context.Context, errs *field.ErrorList, takeover bool) func() {
func createDeclarativeValidationPanicHandler(ctx context.Context, errs *field.ErrorList, takeover bool, validationIdentifier string) func() {
logger := klog.FromContext(ctx)
return func() {
if r := recover(); r != nil {
// Increment the panic metric counter
validationmetrics.Metrics.IncDeclarativeValidationPanicMetric()
validationmetrics.Metrics.IncDeclarativeValidationPanicMetric(validationIdentifier)
const errorFmt = "panic during declarative validation: %v"
if takeover {
@ -313,10 +321,10 @@ func createDeclarativeValidationPanicHandler(ctx context.Context, errs *field.Er
// if takeover=false, and adding a validation error if takeover=true.
func panicSafeValidateFunc(
validateUpdateFunc func(ctx context.Context, scheme *runtime.Scheme, obj, oldObj runtime.Object, o *validationConfigOption) field.ErrorList,
takeover bool,
takeover bool, validationIdentifier string,
) func(ctx context.Context, scheme *runtime.Scheme, obj, oldObj runtime.Object, o *validationConfigOption) field.ErrorList {
return func(ctx context.Context, scheme *runtime.Scheme, obj, oldObj runtime.Object, o *validationConfigOption) (errs field.ErrorList) {
defer createDeclarativeValidationPanicHandler(ctx, &errs, takeover)()
defer createDeclarativeValidationPanicHandler(ctx, &errs, takeover, validationIdentifier)()
return validateUpdateFunc(ctx, scheme, obj, oldObj, o)
}

View File

@ -425,7 +425,7 @@ func TestCompareDeclarativeErrorsAndEmitMismatches(t *testing.T) {
defer klog.LogToStderr(true)
ctx := context.Background()
CompareDeclarativeErrorsAndEmitMismatches(ctx, tc.imperativeErrs, tc.declarativeErrs, tc.takeover)
CompareDeclarativeErrorsAndEmitMismatches(ctx, tc.imperativeErrs, tc.declarativeErrs, tc.takeover, "test_validationIdentifier")
klog.Flush()
logOutput := buf.String()
@ -511,7 +511,7 @@ func TestWithRecover(t *testing.T) {
defer klog.LogToStderr(true)
// Pass the takeover flag to panicSafeValidateFunc instead of relying on the feature gate
wrapped := panicSafeValidateFunc(tc.validateFn, tc.takeoverEnabled)
wrapped := panicSafeValidateFunc(tc.validateFn, tc.takeoverEnabled, "test_validationIdentifier")
gotErrs := wrapped(ctx, scheme, obj, nil, &validationConfigOption{opType: operation.Create, options: options, takeover: tc.takeoverEnabled})
klog.Flush()
@ -605,7 +605,7 @@ func TestWithRecoverUpdate(t *testing.T) {
defer klog.LogToStderr(true)
// Pass the takeover flag to panicSafeValidateUpdateFunc instead of relying on the feature gate
wrapped := panicSafeValidateFunc(tc.validateFn, tc.takeoverEnabled)
wrapped := panicSafeValidateFunc(tc.validateFn, tc.takeoverEnabled, "test_validationIdentifier")
gotErrs := wrapped(ctx, scheme, obj, oldObj, &validationConfigOption{opType: operation.Update, options: options, takeover: tc.takeoverEnabled})
klog.Flush()

View File

@ -28,8 +28,8 @@ const (
// ValidationMetrics is the interface for validation metrics.
type ValidationMetrics interface {
IncDeclarativeValidationMismatchMetric()
IncDeclarativeValidationPanicMetric()
IncDeclarativeValidationMismatchMetric(validationIdentifier string)
IncDeclarativeValidationPanicMetric(validationIdentifier string)
IncDuplicateValidationErrorMetric()
Reset()
}
@ -44,6 +44,16 @@ var validationMetricsInstance = &validationMetrics{
StabilityLevel: metrics.BETA,
},
),
DeclarativeValidationMismatchCounterVector: metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "declarative_validation_parity_discrepancies_total",
Help: "Number of discrepancies between declarative and handwritten validation, broken down by validation identifier.",
StabilityLevel: metrics.ALPHA,
},
[]string{"validation_identifier"},
),
DeclarativeValidationPanicCounter: metrics.NewCounter(
&metrics.CounterOpts{
Namespace: namespace,
@ -53,6 +63,16 @@ var validationMetricsInstance = &validationMetrics{
StabilityLevel: metrics.BETA,
},
),
DeclarativeValidationPanicCounterVector: metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "declarative_validation_panics_total",
Help: "Number of panics in declarative validation, broken down by validation identifier.",
StabilityLevel: metrics.ALPHA,
},
[]string{"validation_identifier"},
),
DuplicateValidationErrorCounter: metrics.NewCounter(
&metrics.CounterOpts{
Namespace: namespace,
@ -69,31 +89,39 @@ var Metrics ValidationMetrics = validationMetricsInstance
func init() {
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationMismatchCounter)
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationMismatchCounterVector)
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationPanicCounter)
legacyregistry.MustRegister(validationMetricsInstance.DeclarativeValidationPanicCounterVector)
legacyregistry.MustRegister(validationMetricsInstance.DuplicateValidationErrorCounter)
}
type validationMetrics struct {
DeclarativeValidationMismatchCounter *metrics.Counter
DeclarativeValidationPanicCounter *metrics.Counter
DuplicateValidationErrorCounter *metrics.Counter
DeclarativeValidationMismatchCounter *metrics.Counter
DeclarativeValidationMismatchCounterVector *metrics.CounterVec
DeclarativeValidationPanicCounter *metrics.Counter
DeclarativeValidationPanicCounterVector *metrics.CounterVec
DuplicateValidationErrorCounter *metrics.Counter
}
// Reset resets the validation metrics.
func (m *validationMetrics) Reset() {
m.DeclarativeValidationMismatchCounter.Reset()
m.DeclarativeValidationMismatchCounterVector.Reset()
m.DeclarativeValidationPanicCounter.Reset()
m.DeclarativeValidationPanicCounterVector.Reset()
m.DuplicateValidationErrorCounter.Reset()
}
// IncDeclarativeValidationMismatchMetric increments the counter for the declarative_validation_mismatch_total metric.
func (m *validationMetrics) IncDeclarativeValidationMismatchMetric() {
func (m *validationMetrics) IncDeclarativeValidationMismatchMetric(validationIdentifier string) {
m.DeclarativeValidationMismatchCounter.Inc()
m.DeclarativeValidationMismatchCounterVector.WithLabelValues(validationIdentifier).Inc()
}
// IncDeclarativeValidationPanicMetric increments the counter for the declarative_validation_panic_total metric.
func (m *validationMetrics) IncDeclarativeValidationPanicMetric() {
func (m *validationMetrics) IncDeclarativeValidationPanicMetric(validationIdentifier string) {
m.DeclarativeValidationPanicCounter.Inc()
m.DeclarativeValidationPanicCounterVector.WithLabelValues(validationIdentifier).Inc()
}
// IncDuplicateValidationErrorMetric increments the counter for the duplicate_validation_error_total metric.

View File

@ -24,21 +24,28 @@ import (
"k8s.io/component-base/metrics/testutil"
)
const testIdentifier = "test_validation_identifier"
const anotherTestIdentifier = "another_test_validation_identifier"
// TestDeclarativeValidationMismatchMetric tests that the mismatch metric correctly increments once
func TestDeclarativeValidationMismatchMetric(t *testing.T) {
defer legacyregistry.Reset()
defer ResetValidationMetricsInstance()
// Increment the metric once
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationMismatchMetric(testIdentifier)
expected := `
# HELP apiserver_validation_declarative_validation_parity_discrepancies_total [ALPHA] Number of discrepancies between declarative and handwritten validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_parity_discrepancies_total counter
apiserver_validation_declarative_validation_parity_discrepancies_total{validation_identifier="test_validation_identifier"} 1
# HELP apiserver_validation_declarative_validation_mismatch_total [BETA] Number of times declarative validation results differed from handwritten validation results for core types.
# TYPE apiserver_validation_declarative_validation_mismatch_total counter
apiserver_validation_declarative_validation_mismatch_total 1
`
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_mismatch_total"); err != nil {
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_parity_discrepancies_total", "apiserver_validation_declarative_validation_mismatch_total"); err != nil {
t.Fatal(err)
}
}
@ -49,15 +56,19 @@ func TestDeclarativeValidationPanicMetric(t *testing.T) {
defer ResetValidationMetricsInstance()
// Increment the metric once
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationPanicMetric(testIdentifier)
expected := `
# HELP apiserver_validation_declarative_validation_panics_total [ALPHA] Number of panics in declarative validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_panics_total counter
apiserver_validation_declarative_validation_panics_total{validation_identifier="test_validation_identifier"} 1
# HELP apiserver_validation_declarative_validation_panic_total [BETA] Number of times declarative validation has panicked during validation.
# TYPE apiserver_validation_declarative_validation_panic_total counter
apiserver_validation_declarative_validation_panic_total 1
`
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_panic_total"); err != nil {
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected),
"apiserver_validation_declarative_validation_panic_total", "apiserver_validation_declarative_validation_panics_total"); err != nil {
t.Fatal(err)
}
}
@ -87,17 +98,21 @@ func TestDeclarativeValidationMismatchMetricMultiple(t *testing.T) {
defer ResetValidationMetricsInstance()
// Increment the metric three times
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationMismatchMetric(testIdentifier)
Metrics.IncDeclarativeValidationMismatchMetric(testIdentifier)
Metrics.IncDeclarativeValidationMismatchMetric(anotherTestIdentifier)
expected := `
# HELP apiserver_validation_declarative_validation_parity_discrepancies_total [ALPHA] Number of discrepancies between declarative and handwritten validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_parity_discrepancies_total counter
apiserver_validation_declarative_validation_parity_discrepancies_total{validation_identifier="test_validation_identifier"} 2
apiserver_validation_declarative_validation_parity_discrepancies_total{validation_identifier="another_test_validation_identifier"} 1
# HELP apiserver_validation_declarative_validation_mismatch_total [BETA] Number of times declarative validation results differed from handwritten validation results for core types.
# TYPE apiserver_validation_declarative_validation_mismatch_total counter
apiserver_validation_declarative_validation_mismatch_total 3
`
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_mismatch_total"); err != nil {
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_parity_discrepancies_total", "apiserver_validation_declarative_validation_mismatch_total"); err != nil {
t.Fatal(err)
}
}
@ -108,17 +123,21 @@ func TestDeclarativeValidationPanicMetricMultiple(t *testing.T) {
defer ResetValidationMetricsInstance()
// Increment the metric three times
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationPanicMetric(testIdentifier)
Metrics.IncDeclarativeValidationPanicMetric(testIdentifier)
Metrics.IncDeclarativeValidationPanicMetric(anotherTestIdentifier)
expected := `
# HELP apiserver_validation_declarative_validation_panics_total [ALPHA] Number of panics in declarative validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_panics_total counter
apiserver_validation_declarative_validation_panics_total{validation_identifier="test_validation_identifier"} 2
apiserver_validation_declarative_validation_panics_total{validation_identifier="another_test_validation_identifier"} 1
# HELP apiserver_validation_declarative_validation_panic_total [BETA] Number of times declarative validation has panicked during validation.
# TYPE apiserver_validation_declarative_validation_panic_total counter
apiserver_validation_declarative_validation_panic_total 3
`
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_panic_total"); err != nil {
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected), "apiserver_validation_declarative_validation_panic_total", "apiserver_validation_declarative_validation_panics_total"); err != nil {
t.Fatal(err)
}
}
@ -150,8 +169,8 @@ func TestDeclarativeValidationMetricsReset(t *testing.T) {
defer ResetValidationMetricsInstance()
// Increment both metrics
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationMismatchMetric(testIdentifier)
Metrics.IncDeclarativeValidationPanicMetric(testIdentifier)
Metrics.IncDuplicateValidationErrorMetric()
// Reset the metrics
@ -178,8 +197,8 @@ func TestDeclarativeValidationMetricsReset(t *testing.T) {
}
// Increment the metrics again to ensure they're still functional
Metrics.IncDeclarativeValidationMismatchMetric()
Metrics.IncDeclarativeValidationPanicMetric()
Metrics.IncDeclarativeValidationMismatchMetric(testIdentifier)
Metrics.IncDeclarativeValidationPanicMetric(testIdentifier)
Metrics.IncDuplicateValidationErrorMetric()
// Verify they've been incremented correctly
@ -193,11 +212,19 @@ func TestDeclarativeValidationMetricsReset(t *testing.T) {
# HELP apiserver_validation_duplicate_validation_error_total [INTERNAL] Number of duplicate validation errors during validation.
# TYPE apiserver_validation_duplicate_validation_error_total counter
apiserver_validation_duplicate_validation_error_total 1
# HELP apiserver_validation_declarative_validation_parity_discrepancies_total [ALPHA] Number of discrepancies between declarative and handwritten validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_parity_discrepancies_total counter
apiserver_validation_declarative_validation_parity_discrepancies_total{validation_identifier="test_validation_identifier"} 1
# HELP apiserver_validation_declarative_validation_panics_total [ALPHA] Number of panics in declarative validation, broken down by validation identifier.
# TYPE apiserver_validation_declarative_validation_panics_total counter
apiserver_validation_declarative_validation_panics_total{validation_identifier="test_validation_identifier"} 1
`
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expected),
"apiserver_validation_declarative_validation_mismatch_total",
"apiserver_validation_declarative_validation_parity_discrepancies_total",
"apiserver_validation_declarative_validation_panic_total",
"apiserver_validation_declarative_validation_panics_total",
"apiserver_validation_duplicate_validation_error_total"); err != nil {
t.Fatal(err)
}