diff --git a/cluster-autoscaler/simulator/node_info_utils.go b/cluster-autoscaler/simulator/node_info_utils.go index 438c7747a8..4e3adf11fb 100644 --- a/cluster-autoscaler/simulator/node_info_utils.go +++ b/cluster-autoscaler/simulator/node_info_utils.go @@ -148,23 +148,34 @@ func podsExpectedOnFreshNode(sanitizedExampleNodeInfo *framework.NodeInfo, daemo // TODO(DRA): Figure out how to make this work for DS pods using DRA. Currently such pods would get force-added to the // ClusterSnapshot, but the ResourceClaims reflecting their DRA usage on the Node wouldn't. So CA would be overestimating // available DRA resources on the Node. - if forceDaemonSets { - var pendingDS []*appsv1.DaemonSet - for _, ds := range daemonsets { - if !runningDS[ds.UID] { - pendingDS = append(pendingDS, ds) - } + var pendingDS []*appsv1.DaemonSet + for _, ds := range daemonsets { + if !runningDS[ds.UID] { + pendingDS = append(pendingDS, ds) } - // The provided nodeInfo has to have taints properly sanitized, or this won't work correctly. - daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS) - if err != nil { - return nil, err - } - for _, pod := range daemonPods { - // There's technically no need to sanitize these pods since they're created from scratch, but - // it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging. - result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)}) + } + // The provided nodeInfo has to have taints properly sanitized, or this won't work correctly. + daemonPods, err := daemonset.GetDaemonSetPodsForNode(sanitizedExampleNodeInfo, pendingDS) + if err != nil { + return nil, err + } + for _, pod := range daemonPods { + if !forceDaemonSets && !isPreemptingSystemNodeCritical(pod) { + continue } + // There's technically no need to sanitize these pods since they're created from scratch, but + // it's nice to have the same suffix for all names in one sanitized NodeInfo when debugging. + result = append(result, &framework.PodInfo{Pod: createSanitizedPod(pod.Pod, sanitizedExampleNodeInfo.Node().Name, nameSuffix)}) } return result, nil } + +func isPreemptingSystemNodeCritical(pod *framework.PodInfo) bool { + if pod.Spec.PriorityClassName != labels.SystemNodeCriticalLabel { + return false + } + if pod.Spec.PreemptionPolicy != nil && *pod.Spec.PreemptionPolicy != apiv1.PreemptLowerPriority { + return false + } + return true +} diff --git a/cluster-autoscaler/simulator/node_info_utils_test.go b/cluster-autoscaler/simulator/node_info_utils_test.go index 3c95f32f6b..70013cb927 100644 --- a/cluster-autoscaler/simulator/node_info_utils_test.go +++ b/cluster-autoscaler/simulator/node_info_utils_test.go @@ -31,13 +31,15 @@ import ( resourceapi "k8s.io/api/resource/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/kubernetes/pkg/controller/daemon" + "k8s.io/autoscaler/cluster-autoscaler/config" drautils "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/utils" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/labels" "k8s.io/autoscaler/cluster-autoscaler/utils/taints" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - "k8s.io/kubernetes/pkg/controller/daemon" ) var ( @@ -69,7 +71,21 @@ var ( }, }, } - testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3} + ds4 = &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds4", + Namespace: "ds4-namespace", + UID: types.UID("ds4"), + }, + Spec: appsv1.DaemonSetSpec{ + Template: apiv1.PodTemplateSpec{ + Spec: apiv1.PodSpec{ + PriorityClassName: labels.SystemNodeCriticalLabel, + }, + }, + }, + } + testDaemonSets = []*appsv1.DaemonSet{ds1, ds2, ds3, ds4} ) func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) { @@ -98,6 +114,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) { wantPods: []*apiv1.Pod{ buildDSPod(ds1, "n"), buildDSPod(ds2, "n"), + buildDSPod(ds4, "n"), }, }, { @@ -116,6 +133,7 @@ func TestSanitizedTemplateNodeInfoFromNodeGroup(t *testing.T) { SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), buildDSPod(ds1, "n"), buildDSPod(ds2, "n"), + buildDSPod(ds4, "n"), }, }, } { @@ -208,6 +226,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) { daemonSets: testDaemonSets, wantPods: []*apiv1.Pod{ buildDSPod(ds1, "n"), + buildDSPod(ds4, "n"), }, }, { @@ -232,6 +251,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) { wantPods: []*apiv1.Pod{ buildDSPod(ds1, "n"), buildDSPod(ds2, "n"), + buildDSPod(ds4, "n"), }, }, { @@ -248,6 +268,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) { wantPods: []*apiv1.Pod{ SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), buildDSPod(ds1, "n"), + buildDSPod(ds4, "n"), }, }, { @@ -266,6 +287,7 @@ func TestSanitizedTemplateNodeInfoFromNodeInfo(t *testing.T) { SetMirrorPodSpec(BuildScheduledTestPod("p3", 100, 1, "n")), buildDSPod(ds1, "n"), buildDSPod(ds2, "n"), + buildDSPod(ds4, "n"), }, }, } diff --git a/cluster-autoscaler/utils/labels/labels.go b/cluster-autoscaler/utils/labels/labels.go index e4dcf661d2..081e78b14b 100644 --- a/cluster-autoscaler/utils/labels/labels.go +++ b/cluster-autoscaler/utils/labels/labels.go @@ -25,6 +25,11 @@ import ( "k8s.io/apimachinery/pkg/api/resource" ) +const ( + // SystemNodeCriticalLabel is a label that marks critical pods with the highest priority. + SystemNodeCriticalLabel = "system-node-critical" +) + var ( // cpu amount used for account pods that don't specify cpu requests defaultMinCPU = *resource.NewMilliQuantity(50, resource.DecimalSI)