Merge pull request #2071 from losipiuk/lo/predicate-checker-speedup

Precompute inter pod equivalence groups in checkPodsSchedulableOnNode
2019-06-03 03:52:16 -07:00 · 2019-06-03 03:52:16 -07:00 · a0853bcc80
parent 4c984a1630 a849ead286
commit a0853bcc80
4 changed files with 78 additions and 25 deletions
--- a/cluster-autoscaler/core/scale_up.go
+++ b/cluster-autoscaler/core/scale_up.go
@ -563,6 +563,7 @@ func getPodsPredicatePassingCheckFunctions(
 	podsPassingPredicatesCache := make(map[string][]*apiv1.Pod)
 	podsNotPassingPredicatesCache := make(map[string]map[*apiv1.Pod]status.Reasons)
 	errorsCache := make(map[string]error)
+	checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods)

 	computeCaches := func(nodeGroupId string) {
 		nodeInfo, found := nodeInfos[nodeGroupId]
@ -573,7 +574,7 @@ func getPodsPredicatePassingCheckFunctions(

 		podsPassing := make([]*apiv1.Pod, 0)
 		podsNotPassing := make(map[*apiv1.Pod]status.Reasons)
-		schedulableOnNode := checkPodsSchedulableOnNode(context, unschedulablePods, nodeGroupId, nodeInfo)
+		schedulableOnNode := checker.checkPodsSchedulableOnNode(nodeGroupId, nodeInfo)
 		for pod, err := range schedulableOnNode {
 			if err == nil {
 				podsPassing = append(podsPassing, pod)
--- a/cluster-autoscaler/core/utils.go
+++ b/cluster-autoscaler/core/utils.go
@ -18,6 +18,7 @@ package core

 import (
 	"fmt"
+	"k8s.io/apimachinery/pkg/types"
 	"math/rand"
 	"reflect"
 	"time"
@ -150,40 +151,89 @@ func filterOutExpendablePods(pods []*apiv1.Pod, expendablePodsPriorityCutoff int
 	return result
 }

-// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node.
-func checkPodsSchedulableOnNode(context *context.AutoscalingContext, pods []*apiv1.Pod, nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError {
-	schedulingErrors := map[*apiv1.Pod]*simulator.PredicateError{}
-	loggingQuota := glogx.PodsLoggingQuota()
-	podSchedulable := make(podSchedulableMap)
+type equivalenceGroupId int
+
+type podsSchedulableOnNodeChecker struct {
+	context               *context.AutoscalingContext
+	pods                  []*apiv1.Pod
+	podsEquivalenceGroups map[types.UID]equivalenceGroupId
+}
+
+func newPodsSchedulableOnNodeChecker(context *context.AutoscalingContext, pods []*apiv1.Pod) *podsSchedulableOnNodeChecker {
+	checker := podsSchedulableOnNodeChecker{
+		context:               context,
+		pods:                  pods,
+		podsEquivalenceGroups: make(map[types.UID]equivalenceGroupId),
+	}
+
+	// compute the podsEquivalenceGroups
+	var nextGroupId equivalenceGroupId
+	type equivalanceGroup struct {
+		id           equivalenceGroupId
+		representant *apiv1.Pod
+	}
+
+	equivalenceGroupsByController := make(map[types.UID][]equivalanceGroup)

 	for _, pod := range pods {
-		// Check if pod isn't repeated before overwriting result for it.
-		if _, repeated := schedulingErrors[pod]; repeated {
-			// This shouldn't really happen.
-			klog.Warningf("Pod %v appears multiple time on pods list, will only count it once in scale-up simulation", pod)
+		controllerRef := drain.ControllerRef(pod)
+		if controllerRef == nil {
+			checker.podsEquivalenceGroups[pod.UID] = nextGroupId
+			nextGroupId++
+			continue
 		}
-		// Try to get result from cache.
-		err, found := podSchedulable.get(pod)
-		if found {
-			schedulingErrors[pod] = err
-			if err != nil {
-				glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId)
+
+		matchingFound := false
+		for _, g := range equivalenceGroupsByController[controllerRef.UID] {
+			if reflect.DeepEqual(pod.Labels, g.representant.Labels) && apiequality.Semantic.DeepEqual(pod.Spec, g.representant.Spec) {
+				matchingFound = true
+				checker.podsEquivalenceGroups[pod.UID] = g.id
+				break
 			}
 		}
+
+		if !matchingFound {
+			newGroup := equivalanceGroup{
+				id:           nextGroupId,
+				representant: pod,
+			}
+			equivalenceGroupsByController[controllerRef.UID] = append(equivalenceGroupsByController[controllerRef.UID], newGroup)
+			checker.podsEquivalenceGroups[pod.UID] = newGroup.id
+			nextGroupId++
+		}
+	}
+
+	return &checker
+}
+
+// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node.
+func (c *podsSchedulableOnNodeChecker) checkPodsSchedulableOnNode(nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError {
+	loggingQuota := glogx.PodsLoggingQuota()
+	schedulingErrors := make(map[equivalenceGroupId]*simulator.PredicateError)
+
+	for _, pod := range c.pods {
+		equivalenceGroup := c.podsEquivalenceGroups[pod.UID]
+		err, found := schedulingErrors[equivalenceGroup]
+		if found && err != nil {
+			glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId)
+		}
 		// Not found in cache, have to run the predicates.
 		if !found {
-			err = context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo)
-			podSchedulable.set(pod, err)
-			schedulingErrors[pod] = err
+			err = c.context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo)
+			schedulingErrors[equivalenceGroup] = err
 			if err != nil {
 				// Always log for the first pod in a controller.
 				klog.V(2).Infof("Pod %s can't be scheduled on %s, predicate failed: %v", pod.Name, nodeGroupId, err.VerboseError())
 			}
 		}
 	}
-
 	glogx.V(2).Over(loggingQuota).Infof("%v other pods can't be scheduled on %s.", -loggingQuota.Left(), nodeGroupId)
-	return schedulingErrors
+
+	schedulingErrorsByPod := make(map[*apiv1.Pod]*simulator.PredicateError)
+	for _, pod := range c.pods {
+		schedulingErrorsByPod[pod] = schedulingErrors[c.podsEquivalenceGroups[pod.UID]]
+	}
+	return schedulingErrorsByPod
 }

 // getNodeInfosForGroups finds NodeInfos for all node groups used to manage the given nodes. It also returns a node group to sample node mapping.
--- a/cluster-autoscaler/core/utils_test.go
+++ b/cluster-autoscaler/core/utils_test.go
@ -193,13 +193,13 @@ func TestFilterSchedulablePodsForNode(t *testing.T) {
 	}

 	p1 := BuildTestPod("p1", 1500, 200000)
-	p2_1 := BuildTestPod("p2_2", 3000, 200000)
+	p2_1 := BuildTestPod("p2_1", 3000, 200000)
 	p2_1.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID)
 	p2_2 := BuildTestPod("p2_2", 3000, 200000)
 	p2_2.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID)
-	p3_1 := BuildTestPod("p3", 100, 200000)
+	p3_1 := BuildTestPod("p3_1", 100, 200000)
 	p3_1.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID)
-	p3_2 := BuildTestPod("p3", 100, 200000)
+	p3_2 := BuildTestPod("p3_2", 100, 200000)
 	p3_2.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID)
 	unschedulablePods := []*apiv1.Pod{p1, p2_1, p2_2, p3_1, p3_2}

@ -212,7 +212,8 @@ func TestFilterSchedulablePodsForNode(t *testing.T) {
 		PredicateChecker: simulator.NewTestPredicateChecker(),
 	}

-	res := checkPodsSchedulableOnNode(context, unschedulablePods, "T1-abc", tni)
+	checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods)
+	res := checker.checkPodsSchedulableOnNode("T1-abc", tni)
 	wantedSchedulable := []*apiv1.Pod{p1, p3_1, p3_2}
 	wantedUnschedulable := []*apiv1.Pod{p2_1, p2_2}

--- a/cluster-autoscaler/utils/test/test_utils.go
+++ b/cluster-autoscaler/utils/test/test_utils.go
@ -39,6 +39,7 @@ import (
 func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
 	pod := &apiv1.Pod{
 		ObjectMeta: metav1.ObjectMeta{
+			UID:         types.UID(name),
 			Namespace:   "default",
 			Name:        name,
 			SelfLink:    fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name),