KEP-4603: Node specific kubelet config for maximum backoff down to 1 second (#128374)

* Add feature gate, API, and conflict validation tests for enablecrashloopbackoffmax

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Handle when current base is longer than node max

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Update pkg/features/kube_features.go

Co-authored-by: Tsubasa Nagasawa <toversus2357@gmail.com>

* Fix indentation

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Follow convention for success test

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Normalize casing, and change field to Duration

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Fix json name and some other casing errors

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Another one I missed before

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Don't clobber global max function

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Change to flat value in defaults.go

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Streamline validation and defaults

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Fix typecheck

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Lint

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Tighten up validation for subsecond values

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Rename field from MaxBackOffPeriod to MaxContainerRestartPeriod

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* A few missed references to renames

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Only compare flags in flags test

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Don't mess with SetDefault signature

Nobody messes with SetDefault signature

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Fix stale signature change, and update test data

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Inspect current feature gates at defaulting time

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Don't use the global feature gate for temp usage

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Expose default error, and some comments

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

* Hint fuzzer for less arbitrary values to FeatureGates

Signed-off-by: Laura Lorenz <lauralorenz@google.com>

---------

Signed-off-by: Laura Lorenz <lauralorenz@google.com>
Co-authored-by: Tsubasa Nagasawa <toversus2357@gmail.com>

Kubernetes-commit: 7fe41da5221b215d097e930c6b7aa52e88324f66
This commit is contained in:
lauralorenz 2024-11-08 17:44:43 -08:00 committed by Kubernetes Publisher
parent 353a4bcb20
commit 3b14f64fe5
2 changed files with 36 additions and 0 deletions

View File

@ -775,6 +775,11 @@ type KubeletConfiguration struct {
// +featureGate=GracefulNodeShutdownBasedOnPodPriority
// +optional
ShutdownGracePeriodByPodPriority []ShutdownGracePeriodByPodPriority `json:"shutdownGracePeriodByPodPriority,omitempty"`
// CrashLoopBackOff contains config to modify node-level parameters for
// container restart behavior
// +featureGate=KubeletCrashLoopBackOffMax
// +optional
CrashLoopBackOff CrashLoopBackOffConfig `json:"crashLoopBackOff,omitempty"`
// reservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
// The parameter makes sense only in the context of the memory manager feature.
// The memory manager will not allocate reserved memory for container workloads.
@ -975,6 +980,15 @@ type MemorySwapConfiguration struct {
SwapBehavior string `json:"swapBehavior,omitempty"`
}
type CrashLoopBackOffConfig struct {
// maxContainerRestartPeriod is the maximum duration the backoff delay can accrue
// to for container restarts, minimum 1 second, maximum 300 seconds. If not set,
// defaults to the internal crashloopbackoff maximum (300s).
// +featureGate=KubeletCrashLoopBackOffMax
// +optional
MaxContainerRestartPeriod *metav1.Duration `json:"maxContainerRestartPeriod,omitempty"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// CredentialProviderConfig is the configuration containing information about

View File

@ -28,6 +28,27 @@ import (
apiv1 "k8s.io/component-base/tracing/api/v1"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *CrashLoopBackOffConfig) DeepCopyInto(out *CrashLoopBackOffConfig) {
*out = *in
if in.MaxContainerRestartPeriod != nil {
in, out := &in.MaxContainerRestartPeriod, &out.MaxContainerRestartPeriod
*out = new(v1.Duration)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopBackOffConfig.
func (in *CrashLoopBackOffConfig) DeepCopy() *CrashLoopBackOffConfig {
if in == nil {
return nil
}
out := new(CrashLoopBackOffConfig)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *CredentialProvider) DeepCopyInto(out *CredentialProvider) {
*out = *in
@ -441,6 +462,7 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) {
*out = make([]ShutdownGracePeriodByPodPriority, len(*in))
copy(*out, *in)
}
in.CrashLoopBackOff.DeepCopyInto(&out.CrashLoopBackOff)
if in.ReservedMemory != nil {
in, out := &in.ReservedMemory, &out.ReservedMemory
*out = make([]MemoryReservation, len(*in))