diff --git a/cluster-autoscaler/core/scale_up.go b/cluster-autoscaler/core/scale_up.go index 4fda164448..5a825bb0a1 100644 --- a/cluster-autoscaler/core/scale_up.go +++ b/cluster-autoscaler/core/scale_up.go @@ -563,6 +563,7 @@ func getPodsPredicatePassingCheckFunctions( podsPassingPredicatesCache := make(map[string][]*apiv1.Pod) podsNotPassingPredicatesCache := make(map[string]map[*apiv1.Pod]status.Reasons) errorsCache := make(map[string]error) + checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods) computeCaches := func(nodeGroupId string) { nodeInfo, found := nodeInfos[nodeGroupId] @@ -573,7 +574,7 @@ func getPodsPredicatePassingCheckFunctions( podsPassing := make([]*apiv1.Pod, 0) podsNotPassing := make(map[*apiv1.Pod]status.Reasons) - schedulableOnNode := checkPodsSchedulableOnNode(context, unschedulablePods, nodeGroupId, nodeInfo) + schedulableOnNode := checker.checkPodsSchedulableOnNode(nodeGroupId, nodeInfo) for pod, err := range schedulableOnNode { if err == nil { podsPassing = append(podsPassing, pod) diff --git a/cluster-autoscaler/core/utils.go b/cluster-autoscaler/core/utils.go index 496bfec5eb..6af1e99951 100644 --- a/cluster-autoscaler/core/utils.go +++ b/cluster-autoscaler/core/utils.go @@ -18,6 +18,7 @@ package core import ( "fmt" + "k8s.io/apimachinery/pkg/types" "math/rand" "reflect" "time" @@ -150,40 +151,89 @@ func filterOutExpendablePods(pods []*apiv1.Pod, expendablePodsPriorityCutoff int return result } -// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node. -func checkPodsSchedulableOnNode(context *context.AutoscalingContext, pods []*apiv1.Pod, nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError { - schedulingErrors := map[*apiv1.Pod]*simulator.PredicateError{} - loggingQuota := glogx.PodsLoggingQuota() - podSchedulable := make(podSchedulableMap) +type equivalenceGroupId int + +type podsSchedulableOnNodeChecker struct { + context *context.AutoscalingContext + pods []*apiv1.Pod + podsEquivalenceGroups map[types.UID]equivalenceGroupId +} + +func newPodsSchedulableOnNodeChecker(context *context.AutoscalingContext, pods []*apiv1.Pod) *podsSchedulableOnNodeChecker { + checker := podsSchedulableOnNodeChecker{ + context: context, + pods: pods, + podsEquivalenceGroups: make(map[types.UID]equivalenceGroupId), + } + + // compute the podsEquivalenceGroups + var nextGroupId equivalenceGroupId + type equivalanceGroup struct { + id equivalenceGroupId + representant *apiv1.Pod + } + + equivalenceGroupsByController := make(map[types.UID][]equivalanceGroup) for _, pod := range pods { - // Check if pod isn't repeated before overwriting result for it. - if _, repeated := schedulingErrors[pod]; repeated { - // This shouldn't really happen. - klog.Warningf("Pod %v appears multiple time on pods list, will only count it once in scale-up simulation", pod) + controllerRef := drain.ControllerRef(pod) + if controllerRef == nil { + checker.podsEquivalenceGroups[pod.UID] = nextGroupId + nextGroupId++ + continue } - // Try to get result from cache. - err, found := podSchedulable.get(pod) - if found { - schedulingErrors[pod] = err - if err != nil { - glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId) + + matchingFound := false + for _, g := range equivalenceGroupsByController[controllerRef.UID] { + if reflect.DeepEqual(pod.Labels, g.representant.Labels) && apiequality.Semantic.DeepEqual(pod.Spec, g.representant.Spec) { + matchingFound = true + checker.podsEquivalenceGroups[pod.UID] = g.id + break } } + + if !matchingFound { + newGroup := equivalanceGroup{ + id: nextGroupId, + representant: pod, + } + equivalenceGroupsByController[controllerRef.UID] = append(equivalenceGroupsByController[controllerRef.UID], newGroup) + checker.podsEquivalenceGroups[pod.UID] = newGroup.id + nextGroupId++ + } + } + + return &checker +} + +// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node. +func (c *podsSchedulableOnNodeChecker) checkPodsSchedulableOnNode(nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError { + loggingQuota := glogx.PodsLoggingQuota() + schedulingErrors := make(map[equivalenceGroupId]*simulator.PredicateError) + + for _, pod := range c.pods { + equivalenceGroup := c.podsEquivalenceGroups[pod.UID] + err, found := schedulingErrors[equivalenceGroup] + if found && err != nil { + glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId) + } // Not found in cache, have to run the predicates. if !found { - err = context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo) - podSchedulable.set(pod, err) - schedulingErrors[pod] = err + err = c.context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo) + schedulingErrors[equivalenceGroup] = err if err != nil { // Always log for the first pod in a controller. klog.V(2).Infof("Pod %s can't be scheduled on %s, predicate failed: %v", pod.Name, nodeGroupId, err.VerboseError()) } } } - glogx.V(2).Over(loggingQuota).Infof("%v other pods can't be scheduled on %s.", -loggingQuota.Left(), nodeGroupId) - return schedulingErrors + + schedulingErrorsByPod := make(map[*apiv1.Pod]*simulator.PredicateError) + for _, pod := range c.pods { + schedulingErrorsByPod[pod] = schedulingErrors[c.podsEquivalenceGroups[pod.UID]] + } + return schedulingErrorsByPod } // getNodeInfosForGroups finds NodeInfos for all node groups used to manage the given nodes. It also returns a node group to sample node mapping. diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index ed63f62583..5c958b7183 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -193,13 +193,13 @@ func TestFilterSchedulablePodsForNode(t *testing.T) { } p1 := BuildTestPod("p1", 1500, 200000) - p2_1 := BuildTestPod("p2_2", 3000, 200000) + p2_1 := BuildTestPod("p2_1", 3000, 200000) p2_1.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID) p2_2 := BuildTestPod("p2_2", 3000, 200000) p2_2.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID) - p3_1 := BuildTestPod("p3", 100, 200000) + p3_1 := BuildTestPod("p3_1", 100, 200000) p3_1.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID) - p3_2 := BuildTestPod("p3", 100, 200000) + p3_2 := BuildTestPod("p3_2", 100, 200000) p3_2.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID) unschedulablePods := []*apiv1.Pod{p1, p2_1, p2_2, p3_1, p3_2} @@ -212,7 +212,8 @@ func TestFilterSchedulablePodsForNode(t *testing.T) { PredicateChecker: simulator.NewTestPredicateChecker(), } - res := checkPodsSchedulableOnNode(context, unschedulablePods, "T1-abc", tni) + checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods) + res := checker.checkPodsSchedulableOnNode("T1-abc", tni) wantedSchedulable := []*apiv1.Pod{p1, p3_1, p3_2} wantedUnschedulable := []*apiv1.Pod{p2_1, p2_2} diff --git a/cluster-autoscaler/utils/test/test_utils.go b/cluster-autoscaler/utils/test/test_utils.go index 37ec3d4081..0d6604d056 100644 --- a/cluster-autoscaler/utils/test/test_utils.go +++ b/cluster-autoscaler/utils/test/test_utils.go @@ -39,6 +39,7 @@ import ( func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod { pod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ + UID: types.UID(name), Namespace: "default", Name: name, SelfLink: fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name),