diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index 1c3e5bf10a..6bd27ad44c 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -123,9 +123,6 @@ func (r *RemovalSimulator) FindNodesToRemove( timestamp time.Time, remainingPdbTracker pdb.RemainingPdbTracker, ) (nodesToRemove []NodeToBeRemoved, unremovableNodes []*UnremovableNode) { - result := make([]NodeToBeRemoved, 0) - unremovable := make([]*UnremovableNode, 0) - destinationMap := make(map[string]bool, len(destinations)) for _, destination := range destinations { destinationMap[destination] = true @@ -134,12 +131,12 @@ func (r *RemovalSimulator) FindNodesToRemove( for _, nodeName := range candidates { rn, urn := r.SimulateNodeRemoval(nodeName, destinationMap, timestamp, remainingPdbTracker) if rn != nil { - result = append(result, *rn) + nodesToRemove = append(nodesToRemove, *rn) } else if urn != nil { - unremovable = append(unremovable, urn) + unremovableNodes = append(unremovableNodes, urn) } } - return result, unremovable + return nodesToRemove, unremovableNodes } // SimulateNodeRemoval simulates removing a node from the cluster to check diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index c53aec5cbc..e08c605c7c 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -136,14 +136,11 @@ func TestFindNodesToRemove(t *testing.T) { fullNodeInfo.AddPod(pod4) emptyNodeToRemove := NodeToBeRemoved{ - Node: emptyNode, - PodsToReschedule: []*apiv1.Pod{}, - DaemonSetPods: []*apiv1.Pod{}, + Node: emptyNode, } drainableNodeToRemove := NodeToBeRemoved{ Node: drainableNode, PodsToReschedule: []*apiv1.Pod{pod1, pod2}, - DaemonSetPods: []*apiv1.Pod{}, } clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() @@ -153,19 +150,16 @@ func TestFindNodesToRemove(t *testing.T) { tests := []findNodesToRemoveTestConfig{ { - name: "just an empty node, should be removed", - pods: []*apiv1.Pod{}, - candidates: []string{emptyNode.Name}, - allNodes: []*apiv1.Node{emptyNode}, - toRemove: []NodeToBeRemoved{emptyNodeToRemove}, - unremovable: []*UnremovableNode{}, + name: "just an empty node, should be removed", + candidates: []string{emptyNode.Name}, + allNodes: []*apiv1.Node{emptyNode}, + toRemove: []NodeToBeRemoved{emptyNodeToRemove}, }, { name: "just a drainable node, but nowhere for pods to go to", pods: []*apiv1.Pod{pod1, pod2}, candidates: []string{drainableNode.Name}, allNodes: []*apiv1.Node{drainableNode}, - toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, { @@ -181,16 +175,14 @@ func TestFindNodesToRemove(t *testing.T) { pods: []*apiv1.Pod{pod1, pod2, pod4}, candidates: []string{drainableNode.Name}, allNodes: []*apiv1.Node{drainableNode, fullNode}, - toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, { - name: "4 nodes, 1 empty, 1 drainable", - pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, - candidates: []string{emptyNode.Name, drainableNode.Name}, - allNodes: []*apiv1.Node{emptyNode, drainableNode, fullNode, nonDrainableNode}, - toRemove: []NodeToBeRemoved{emptyNodeToRemove, drainableNodeToRemove}, - unremovable: []*UnremovableNode{}, + name: "4 nodes, 1 empty, 1 drainable", + pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, + candidates: []string{emptyNode.Name, drainableNode.Name}, + allNodes: []*apiv1.Node{emptyNode, drainableNode, fullNode, nonDrainableNode}, + toRemove: []NodeToBeRemoved{emptyNodeToRemove, drainableNodeToRemove}, }, } diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 5df8585e1f..8a7ae28b9e 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -39,7 +39,6 @@ import ( // If listers is not nil it checks whether RC, DS, Jobs and RS that created // these pods still exist. func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, listers kube_util.ListerRegistry, remainingPdbTracker pdb.RemainingPdbTracker, timestamp time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *drain.BlockingPod, err error) { - var drainPods, drainDs []*apiv1.Pod if drainabilityRules == nil { drainabilityRules = rules.Default(deleteOptions) } @@ -55,31 +54,21 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. pod := podInfo.Pod status := drainabilityRules.Drainable(drainCtx, pod) switch status.Outcome { - case drainability.UndefinedOutcome: - pods = append(pods, podInfo.Pod) - case drainability.DrainOk: + case drainability.UndefinedOutcome, drainability.DrainOk: + if drain.IsPodLongTerminating(pod, timestamp) { + continue + } if pod_util.IsDaemonSetPod(pod) { - drainDs = append(drainDs, pod) + daemonSetPods = append(daemonSetPods, pod) } else { - drainPods = append(drainPods, pod) + pods = append(pods, pod) } case drainability.BlockDrain: - blockingPod = &drain.BlockingPod{ + return nil, nil, &drain.BlockingPod{ Pod: pod, Reason: status.BlockingReason, - } - err = status.Error - return + }, status.Error } } - - pods, daemonSetPods = drain.GetPodsForDeletionOnNodeDrain( - pods, - remainingPdbTracker.GetPdbs(), - deleteOptions.SkipNodesWithSystemPods, - deleteOptions.SkipNodesWithLocalStorage, - deleteOptions.SkipNodesWithCustomControllerPods, - timestamp) - - return append(pods, drainPods...), append(daemonSetPods, drainDs...), nil, nil + return pods, daemonSetPods, nil, nil } diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 1a7326be9a..ca0d6a0554 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -30,6 +32,8 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/kubernetes/pkg/kubelet/types" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -38,150 +42,466 @@ import ( ) func TestGetPodsToMove(t *testing.T) { - testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - unreplicatedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unreplicatedPod", - Namespace: "ns", - }, - } - rsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rsPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - } - manifestPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "manifestPod", - Namespace: "kube-system", - Annotations: map[string]string{ - types.ConfigMirrorAnnotationKey: "something", + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + unreplicatedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "unreplicatedPod", + Namespace: "ns", }, - }, - } - dsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "dsPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("ds", "DaemonSet", "extensions/v1beta1", ""), - }, - } - systemPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "systemPod", - Namespace: "kube-system", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - } - localStoragePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "localStoragePod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - Spec: apiv1.PodSpec{ - Volumes: []apiv1.Volume{ - { - Name: "empty-vol", - VolumeSource: apiv1.VolumeSource{ - EmptyDir: &apiv1.EmptyDirVolumeSource{}, - }, + } + manifestPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "manifestPod", + Namespace: "kube-system", + Annotations: map[string]string{ + types.ConfigMirrorAnnotationKey: "something", }, }, - }, - } - nonLocalStoragePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nonLocalStoragePod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - Spec: apiv1.PodSpec{ - Volumes: []apiv1.Volume{ - { - Name: "my-repo", - VolumeSource: apiv1.VolumeSource{ - GitRepo: &apiv1.GitRepoVolumeSource{ - Repository: "my-repo", + } + systemPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "systemPod", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + }, + } + localStoragePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "localStoragePod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + }, + Spec: apiv1.PodSpec{ + Volumes: []apiv1.Volume{ + { + Name: "empty-vol", + VolumeSource: apiv1.VolumeSource{ + EmptyDir: &apiv1.EmptyDirVolumeSource{}, }, }, }, }, - }, - } - pdbPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pdbPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - Labels: map[string]string{ - "critical": "true", + } + nonLocalStoragePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nonLocalStoragePod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), }, - }, - Spec: apiv1.PodSpec{}, - } - one := intstr.FromInt(1) - restrictivePdb := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Name: "foobar", - Namespace: "ns", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - MinAvailable: &one, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ + Spec: apiv1.PodSpec{ + Volumes: []apiv1.Volume{ + { + Name: "my-repo", + VolumeSource: apiv1.VolumeSource{ + GitRepo: &apiv1.GitRepoVolumeSource{ + Repository: "my-repo", + }, + }, + }, + }, + }, + } + pdbPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pdbPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + Labels: map[string]string{ "critical": "true", }, }, - }, - Status: policyv1.PodDisruptionBudgetStatus{ - DisruptionsAllowed: 0, - }, - } - permissivePdb := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Name: "foobar", - Namespace: "ns", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - MinAvailable: &one, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "critical": "true", + Spec: apiv1.PodSpec{}, + } + one = intstr.FromInt(1) + restrictivePdb = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foobar", + Namespace: "ns", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "critical": "true", + }, }, }, - }, - Status: policyv1.PodDisruptionBudgetStatus{ - DisruptionsAllowed: 1, - }, - } - terminatedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "terminatedPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - DeletionTimestamp: &metav1.Time{ - Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold - time.Minute), // more than PodLongTerminatingExtraThreshold + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 0, }, - }, - } - terminatingPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "terminatingPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - DeletionTimestamp: &metav1.Time{ - Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold + time.Minute), // still terminating, below the default TerminatingGracePeriode + } + permissivePdb = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foobar", + Namespace: "ns", }, - }, - } + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "critical": "true", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + terminatedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "terminatedPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + DeletionTimestamp: &metav1.Time{ + Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold - time.Minute), // more than PodLongTerminatingExtraThreshold + }, + }, + } + terminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "terminatingPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + DeletionTimestamp: &metav1.Time{ + Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold + time.Minute), // still terminating, below the default TerminatingGracePeriod + }, + }, + } + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + kubeSystemRc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "kube-system", + SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + kubeSystemRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), + Labels: map[string]string{ + "k8s-app": "bar", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + ds = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", + }, + } + dsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + cdsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + statefulset = appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ss", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", + }, + } + ssPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + } + rs = appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicasets/rs", + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + rsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + rsPodDeleted = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + emptyDirSafeToEvictLocalVolumeMultiValAllMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + terminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + zeroGracePeriod = int64(0) + longTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + extendedGracePeriod = int64(6 * 60) // 6 minutes + longTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Duration(extendedGracePeriod/2) * time.Second)}, + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + failedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + evictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + safePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + kubeSystemSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + emptydirSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + emptyPDB = &policyv1.PodDisruptionBudget{} + kubeSystemPDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "bar", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + kubeSystemFakePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "foo", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + defaultNamespacePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "PDB-managed pod", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + ) testCases := []struct { desc string pods []*apiv1.Pod pdbs []*policyv1.PodDisruptionBudget + rcs []*apiv1.ReplicationController + replicaSets []*appsv1.ReplicaSet rules rules.Rules wantPods []*apiv1.Pod wantDs []*apiv1.Pod @@ -304,9 +624,149 @@ func TestGetPodsToMove(t *testing.T) { rules: []rules.Rule{cantDecide{}}, wantPods: []*apiv1.Pod{rsPod}, }, + + { + desc: "RC-managed pod", + pods: []*apiv1.Pod{rcPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "DS-managed pod", + pods: []*apiv1.Pod{dsPod}, + wantDs: []*apiv1.Pod{dsPod}, + }, + { + desc: "DS-managed pod by a custom Daemonset", + pods: []*apiv1.Pod{cdsPod}, + wantDs: []*apiv1.Pod{cdsPod}, + }, + { + desc: "Job-managed pod", + pods: []*apiv1.Pod{jobPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{jobPod}, + }, + { + desc: "SS-managed pod", + pods: []*apiv1.Pod{ssPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{ssPod}, + }, + { + desc: "RS-managed pod", + pods: []*apiv1.Pod{rsPod}, + replicaSets: []*appsv1.ReplicaSet{&rs}, + wantPods: []*apiv1.Pod{rsPod}, + }, + { + desc: "RS-managed pod that is being deleted", + pods: []*apiv1.Pod{rsPodDeleted}, + replicaSets: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + }, + { + desc: "failed pod", + pods: []*apiv1.Pod{failedPod}, + wantPods: []*apiv1.Pod{failedPod}, + }, + { + desc: "long terminating pod with 0 grace period", + pods: []*apiv1.Pod{longTerminatingPod}, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "long terminating pod with extended grace period", + pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, + }, + { + desc: "evicted pod", + pods: []*apiv1.Pod{evictedPod}, + wantPods: []*apiv1.Pod{evictedPod}, + }, + { + desc: "pod in terminal state", + pods: []*apiv1.Pod{terminalPod}, + wantPods: []*apiv1.Pod{terminalPod}, + }, + { + desc: "pod with PodSafeToEvict annotation", + pods: []*apiv1.Pod{safePod}, + wantPods: []*apiv1.Pod{safePod}, + }, + { + desc: "kube-system pod with PodSafeToEvict annotation", + pods: []*apiv1.Pod{kubeSystemSafePod}, + wantPods: []*apiv1.Pod{kubeSystemSafePod}, + }, + { + desc: "pod with EmptyDir and PodSafeToEvict annotation", + pods: []*apiv1.Pod{emptydirSafePod}, + wantPods: []*apiv1.Pod{emptydirSafePod}, + }, + { + desc: "empty PDB with RC-managed pod", + pods: []*apiv1.Pod{rcPod}, + pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "kube-system PDB with matching kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantPods: []*apiv1.Pod{kubeSystemRcPod}, + }, + { + desc: "kube-system PDB with non-matching kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantErr: true, + wantBlocking: &drain.BlockingPod{Pod: kubeSystemRcPod, Reason: drain.UnmovableKubeSystemPod}, + }, + { + desc: "kube-system PDB with default namespace pod", + pods: []*apiv1.Pod{rcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "default namespace PDB with matching labels kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantErr: true, + wantBlocking: &drain.BlockingPod{Pod: kubeSystemRcPod, Reason: drain.UnmovableKubeSystemPod}, + }, } for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { + var registry kubernetes.ListerRegistry + if tc.rcs != nil || tc.replicaSets != nil { + rcLister, err := kube_util.NewTestReplicationControllerLister(tc.rcs) + assert.NoError(t, err) + rsLister, err := kube_util.NewTestReplicaSetLister(tc.replicaSets) + assert.NoError(t, err) + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) + + registry = kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + } + deleteOptions := options.NodeDeleteOptions{ SkipNodesWithSystemPods: true, SkipNodesWithLocalStorage: true, @@ -315,7 +775,7 @@ func TestGetPodsToMove(t *testing.T) { rules := append(tc.rules, rules.Default(deleteOptions)...) tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) - p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, nil, tracker, testTime) + p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, registry, tracker, testTime) if tc.wantErr { assert.Error(t, err) } else { diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go index 05f93d288a..894065a3de 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with daemon set pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if pod_util.IsDaemonSetPod(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go index d444d70799..1bd05e7d35 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go @@ -47,7 +47,7 @@ func TestDrainable(t *testing.T) { OwnerReferences: test.GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", ""), }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go index 5edc6b70f6..8b7c6aa64c 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with long terminating pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go index 934453429d..e9660aa34f 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -61,7 +61,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, "long terminating pod with extended grace period": { pod: &apiv1.Pod{ @@ -78,7 +78,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 6bd2f970a4..b02fb9aba0 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -88,7 +88,7 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d for _, r := range rs { d := r.Drainable(drainCtx, pod) - if d.Interrupted || d.Outcome != drainability.UndefinedOutcome { + if d.Outcome != drainability.UndefinedOutcome { return d } } diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go index e9fd565d6d..396e982c02 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with safe to evict pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.HasSafeToEvictAnnotation(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go index d0560f2fea..3052183ffc 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go @@ -49,7 +49,7 @@ func TestDrainable(t *testing.T) { }, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go index 164c61f467..addae17337 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with terminal pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodTerminal(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go index 9d8cfdc566..f63d9b660b 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go @@ -51,7 +51,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodSucceeded, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, "failed pod": { pod: &apiv1.Pod{ @@ -66,7 +66,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodFailed, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/status.go b/cluster-autoscaler/simulator/drainability/status.go index 281d03c9c5..d73f346bea 100644 --- a/cluster-autoscaler/simulator/drainability/status.go +++ b/cluster-autoscaler/simulator/drainability/status.go @@ -48,23 +48,6 @@ type Status struct { BlockingReason drain.BlockingPodReason // Error contains an optional error message. Error error - // Interrupted means that the Rule returning the status exited early and that - // additional Rules should not be run. - Interrupted bool -} - -// Option is used to modify a Status. -type Option func(*Status) - -// Interrupt implies no additional Rules should be run. -func Interrupt(s *Status) { - s.Interrupted = true -} - -func applyOptions(s *Status, opts []Option) { - for _, opt := range opts { - opt(s) - } } // NewDrainableStatus returns a new Status indicating that a pod can be drained. @@ -91,8 +74,6 @@ func NewSkipStatus() Status { } // NewUndefinedStatus returns a new Status that doesn't contain a decision. -func NewUndefinedStatus(opts ...Option) Status { - s := Status{} - applyOptions(&s, opts) - return s +func NewUndefinedStatus() Status { + return Status{} } diff --git a/cluster-autoscaler/utils/drain/drain.go b/cluster-autoscaler/utils/drain/drain.go index 45a28a3841..81eef9ad85 100644 --- a/cluster-autoscaler/utils/drain/drain.go +++ b/cluster-autoscaler/utils/drain/drain.go @@ -21,9 +21,7 @@ import ( "time" apiv1 "k8s.io/api/core/v1" - policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) const ( @@ -70,37 +68,6 @@ const ( UnexpectedError ) -// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node -// drain as well as some extra information about possibly problematic pods -// (unreplicated and DaemonSets). -// -// This function assumes that default drainability rules have already been run -// to verify pod drainability. -func GetPodsForDeletionOnNodeDrain( - podList []*apiv1.Pod, - pdbs []*policyv1.PodDisruptionBudget, - skipNodesWithSystemPods bool, - skipNodesWithLocalStorage bool, - skipNodesWithCustomControllerPods bool, - currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod) { - - pods = []*apiv1.Pod{} - daemonSetPods = []*apiv1.Pod{} - - for _, pod := range podList { - if IsPodLongTerminating(pod, currentTime) { - continue - } - - if pod_util.IsDaemonSetPod(pod) { - daemonSetPods = append(daemonSetPods, pod) - } else { - pods = append(pods, pod) - } - } - return pods, daemonSetPods -} - // ControllerRef returns the OwnerReference to pod's controller. func ControllerRef(pod *apiv1.Pod) *metav1.OwnerReference { return metav1.GetControllerOf(pod) diff --git a/cluster-autoscaler/utils/drain/drain_test.go b/cluster-autoscaler/utils/drain/drain_test.go index 0a20b781a9..95176fdf55 100644 --- a/cluster-autoscaler/utils/drain/drain_test.go +++ b/cluster-autoscaler/utils/drain/drain_test.go @@ -17,544 +17,13 @@ limitations under the License. package drain import ( - "fmt" "testing" "time" - appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" - policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - - "github.com/stretchr/testify/assert" ) -// testOpts represents parameters required for a single unit test -type testOpts struct { - description string - pods []*apiv1.Pod - pdbs []*policyv1.PodDisruptionBudget - rcs []*apiv1.ReplicationController - replicaSets []*appsv1.ReplicaSet - expectPods []*apiv1.Pod - expectDaemonSetPods []*apiv1.Pod - // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods - skipNodesWithCustomControllerPods bool -} - -func TestDrain(t *testing.T) { - testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - replicas := int32(5) - - rc := apiv1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rc", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", - }, - Spec: apiv1.ReplicationControllerSpec{ - Replicas: &replicas, - }, - } - - rcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - kubeSystemRc := apiv1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rc", - Namespace: "kube-system", - SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", - }, - Spec: apiv1.ReplicationControllerSpec{ - Replicas: &replicas, - }, - } - - kubeSystemRcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), - Labels: map[string]string{ - "k8s-app": "bar", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - ds := appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", - }, - } - - dsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - cdsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), - Annotations: map[string]string{ - "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - job := batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: "job", - Namespace: "default", - SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", - }, - } - - jobPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - }, - } - - statefulset := appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ss", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", - }, - } - - ssPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), - }, - } - - rs := appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rs", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicasets/rs", - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: &replicas, - }, - } - - rsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - rsPodDeleted := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - customControllerPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - // Using names like FooController is discouraged - // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#naming-conventions - // vadasambar: I am using it here just because `FooController`` - // is easier to understand than say `FooSet` - OwnerReferences: GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - terminalPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - zeroGracePeriod := int64(0) - longTerminatingPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * PodLongTerminatingExtraThreshold)}, - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - extendedGracePeriod := int64(6 * 60) // 6 minutes - longTerminatingPodWithExtendedGracePeriod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Duration(extendedGracePeriod/2) * time.Second)}, - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - failedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - evictedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - safePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - kubeSystemSafePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptydirSafePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyPDB := &policyv1.PodDisruptionBudget{} - - kubeSystemPDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "kube-system", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "bar", - }, - }, - }, - } - - sharedTests := []testOpts{ - { - description: "RC-managed pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "DS-managed pod", - pods: []*apiv1.Pod{dsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{dsPod}, - }, - { - description: "DS-managed pod by a custom Daemonset", - pods: []*apiv1.Pod{cdsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{cdsPod}, - }, - { - description: "Job-managed pod", - pods: []*apiv1.Pod{jobPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{jobPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "SS-managed pod", - pods: []*apiv1.Pod{ssPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{ssPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "RS-managed pod", - pods: []*apiv1.Pod{rsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - replicaSets: []*appsv1.ReplicaSet{&rs}, - expectPods: []*apiv1.Pod{rsPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "RS-managed pod that is being deleted", - pods: []*apiv1.Pod{rsPodDeleted}, - pdbs: []*policyv1.PodDisruptionBudget{}, - replicaSets: []*appsv1.ReplicaSet{&rs}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "failed pod", - pods: []*apiv1.Pod{failedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{failedPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "long terminating pod with 0 grace period", - pods: []*apiv1.Pod{longTerminatingPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "long terminating pod with extended grace period", - pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "evicted pod", - pods: []*apiv1.Pod{evictedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{evictedPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod in terminal state", - pods: []*apiv1.Pod{terminalPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{terminalPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with PodSafeToEvict annotation", - pods: []*apiv1.Pod{safePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{safePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system pod with PodSafeToEvict annotation", - pods: []*apiv1.Pod{kubeSystemSafePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{kubeSystemSafePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and PodSafeToEvict annotation", - pods: []*apiv1.Pod{emptydirSafePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{emptydirSafePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "empty PDB with RC-managed pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system PDB with matching kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectPods: []*apiv1.Pod{kubeSystemRcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system PDB with default namespace pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - } - - allTests := []testOpts{} - // Note: be careful about modifying the underlying reference values for sharedTest - // since they are shared (changing it once will change it for all shallow copies of sharedTest) - for _, sharedTest := range sharedTests { - for _, skipNodesWithCustomControllerPods := range []bool{true, false} { - // Copy test to prevent side effects. - test := sharedTest - test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods - test.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.description, skipNodesWithCustomControllerPods) - allTests = append(allTests, test) - } - } - - allTests = append(allTests, testOpts{ - description: "Custom-controller-managed non-blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{customControllerPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }) - - for _, test := range allTests { - t.Run(test.description, func(t *testing.T) { - pods, daemonSetPods := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, testTime) - - if len(pods) != len(test.expectPods) { - t.Fatal("wrong pod list content") - } - - assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) - }) - } -} - func TestIsPodLongTerminating(t *testing.T) { testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) twoMinGracePeriod := int64(2 * 60)