Merge pull request #2071 from losipiuk/lo/predicate-checker-speedup
Precompute inter pod equivalence groups in checkPodsSchedulableOnNode
This commit is contained in:
commit
a0853bcc80
|
|
@ -563,6 +563,7 @@ func getPodsPredicatePassingCheckFunctions(
|
|||
podsPassingPredicatesCache := make(map[string][]*apiv1.Pod)
|
||||
podsNotPassingPredicatesCache := make(map[string]map[*apiv1.Pod]status.Reasons)
|
||||
errorsCache := make(map[string]error)
|
||||
checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods)
|
||||
|
||||
computeCaches := func(nodeGroupId string) {
|
||||
nodeInfo, found := nodeInfos[nodeGroupId]
|
||||
|
|
@ -573,7 +574,7 @@ func getPodsPredicatePassingCheckFunctions(
|
|||
|
||||
podsPassing := make([]*apiv1.Pod, 0)
|
||||
podsNotPassing := make(map[*apiv1.Pod]status.Reasons)
|
||||
schedulableOnNode := checkPodsSchedulableOnNode(context, unschedulablePods, nodeGroupId, nodeInfo)
|
||||
schedulableOnNode := checker.checkPodsSchedulableOnNode(nodeGroupId, nodeInfo)
|
||||
for pod, err := range schedulableOnNode {
|
||||
if err == nil {
|
||||
podsPassing = append(podsPassing, pod)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ package core
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"time"
|
||||
|
|
@ -150,40 +151,89 @@ func filterOutExpendablePods(pods []*apiv1.Pod, expendablePodsPriorityCutoff int
|
|||
return result
|
||||
}
|
||||
|
||||
// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node.
|
||||
func checkPodsSchedulableOnNode(context *context.AutoscalingContext, pods []*apiv1.Pod, nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError {
|
||||
schedulingErrors := map[*apiv1.Pod]*simulator.PredicateError{}
|
||||
loggingQuota := glogx.PodsLoggingQuota()
|
||||
podSchedulable := make(podSchedulableMap)
|
||||
type equivalenceGroupId int
|
||||
|
||||
type podsSchedulableOnNodeChecker struct {
|
||||
context *context.AutoscalingContext
|
||||
pods []*apiv1.Pod
|
||||
podsEquivalenceGroups map[types.UID]equivalenceGroupId
|
||||
}
|
||||
|
||||
func newPodsSchedulableOnNodeChecker(context *context.AutoscalingContext, pods []*apiv1.Pod) *podsSchedulableOnNodeChecker {
|
||||
checker := podsSchedulableOnNodeChecker{
|
||||
context: context,
|
||||
pods: pods,
|
||||
podsEquivalenceGroups: make(map[types.UID]equivalenceGroupId),
|
||||
}
|
||||
|
||||
// compute the podsEquivalenceGroups
|
||||
var nextGroupId equivalenceGroupId
|
||||
type equivalanceGroup struct {
|
||||
id equivalenceGroupId
|
||||
representant *apiv1.Pod
|
||||
}
|
||||
|
||||
equivalenceGroupsByController := make(map[types.UID][]equivalanceGroup)
|
||||
|
||||
for _, pod := range pods {
|
||||
// Check if pod isn't repeated before overwriting result for it.
|
||||
if _, repeated := schedulingErrors[pod]; repeated {
|
||||
// This shouldn't really happen.
|
||||
klog.Warningf("Pod %v appears multiple time on pods list, will only count it once in scale-up simulation", pod)
|
||||
controllerRef := drain.ControllerRef(pod)
|
||||
if controllerRef == nil {
|
||||
checker.podsEquivalenceGroups[pod.UID] = nextGroupId
|
||||
nextGroupId++
|
||||
continue
|
||||
}
|
||||
// Try to get result from cache.
|
||||
err, found := podSchedulable.get(pod)
|
||||
if found {
|
||||
schedulingErrors[pod] = err
|
||||
if err != nil {
|
||||
glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId)
|
||||
|
||||
matchingFound := false
|
||||
for _, g := range equivalenceGroupsByController[controllerRef.UID] {
|
||||
if reflect.DeepEqual(pod.Labels, g.representant.Labels) && apiequality.Semantic.DeepEqual(pod.Spec, g.representant.Spec) {
|
||||
matchingFound = true
|
||||
checker.podsEquivalenceGroups[pod.UID] = g.id
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !matchingFound {
|
||||
newGroup := equivalanceGroup{
|
||||
id: nextGroupId,
|
||||
representant: pod,
|
||||
}
|
||||
equivalenceGroupsByController[controllerRef.UID] = append(equivalenceGroupsByController[controllerRef.UID], newGroup)
|
||||
checker.podsEquivalenceGroups[pod.UID] = newGroup.id
|
||||
nextGroupId++
|
||||
}
|
||||
}
|
||||
|
||||
return &checker
|
||||
}
|
||||
|
||||
// checkPodsSchedulableOnNode checks if pods can be scheduled on the given node.
|
||||
func (c *podsSchedulableOnNodeChecker) checkPodsSchedulableOnNode(nodeGroupId string, nodeInfo *schedulernodeinfo.NodeInfo) map[*apiv1.Pod]*simulator.PredicateError {
|
||||
loggingQuota := glogx.PodsLoggingQuota()
|
||||
schedulingErrors := make(map[equivalenceGroupId]*simulator.PredicateError)
|
||||
|
||||
for _, pod := range c.pods {
|
||||
equivalenceGroup := c.podsEquivalenceGroups[pod.UID]
|
||||
err, found := schedulingErrors[equivalenceGroup]
|
||||
if found && err != nil {
|
||||
glogx.V(2).UpTo(loggingQuota).Infof("Pod %s can't be scheduled on %s. Used cached predicate check results", pod.Name, nodeGroupId)
|
||||
}
|
||||
// Not found in cache, have to run the predicates.
|
||||
if !found {
|
||||
err = context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo)
|
||||
podSchedulable.set(pod, err)
|
||||
schedulingErrors[pod] = err
|
||||
err = c.context.PredicateChecker.CheckPredicates(pod, nil, nodeInfo)
|
||||
schedulingErrors[equivalenceGroup] = err
|
||||
if err != nil {
|
||||
// Always log for the first pod in a controller.
|
||||
klog.V(2).Infof("Pod %s can't be scheduled on %s, predicate failed: %v", pod.Name, nodeGroupId, err.VerboseError())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
glogx.V(2).Over(loggingQuota).Infof("%v other pods can't be scheduled on %s.", -loggingQuota.Left(), nodeGroupId)
|
||||
return schedulingErrors
|
||||
|
||||
schedulingErrorsByPod := make(map[*apiv1.Pod]*simulator.PredicateError)
|
||||
for _, pod := range c.pods {
|
||||
schedulingErrorsByPod[pod] = schedulingErrors[c.podsEquivalenceGroups[pod.UID]]
|
||||
}
|
||||
return schedulingErrorsByPod
|
||||
}
|
||||
|
||||
// getNodeInfosForGroups finds NodeInfos for all node groups used to manage the given nodes. It also returns a node group to sample node mapping.
|
||||
|
|
|
|||
|
|
@ -193,13 +193,13 @@ func TestFilterSchedulablePodsForNode(t *testing.T) {
|
|||
}
|
||||
|
||||
p1 := BuildTestPod("p1", 1500, 200000)
|
||||
p2_1 := BuildTestPod("p2_2", 3000, 200000)
|
||||
p2_1 := BuildTestPod("p2_1", 3000, 200000)
|
||||
p2_1.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID)
|
||||
p2_2 := BuildTestPod("p2_2", 3000, 200000)
|
||||
p2_2.OwnerReferences = GenerateOwnerReferences(rc1.Name, "ReplicationController", "extensions/v1beta1", rc1.UID)
|
||||
p3_1 := BuildTestPod("p3", 100, 200000)
|
||||
p3_1 := BuildTestPod("p3_1", 100, 200000)
|
||||
p3_1.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID)
|
||||
p3_2 := BuildTestPod("p3", 100, 200000)
|
||||
p3_2 := BuildTestPod("p3_2", 100, 200000)
|
||||
p3_2.OwnerReferences = GenerateOwnerReferences(rc2.Name, "ReplicationController", "extensions/v1beta1", rc2.UID)
|
||||
unschedulablePods := []*apiv1.Pod{p1, p2_1, p2_2, p3_1, p3_2}
|
||||
|
||||
|
|
@ -212,7 +212,8 @@ func TestFilterSchedulablePodsForNode(t *testing.T) {
|
|||
PredicateChecker: simulator.NewTestPredicateChecker(),
|
||||
}
|
||||
|
||||
res := checkPodsSchedulableOnNode(context, unschedulablePods, "T1-abc", tni)
|
||||
checker := newPodsSchedulableOnNodeChecker(context, unschedulablePods)
|
||||
res := checker.checkPodsSchedulableOnNode("T1-abc", tni)
|
||||
wantedSchedulable := []*apiv1.Pod{p1, p3_1, p3_2}
|
||||
wantedUnschedulable := []*apiv1.Pod{p2_1, p2_2}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import (
|
|||
func BuildTestPod(name string, cpu int64, mem int64) *apiv1.Pod {
|
||||
pod := &apiv1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
UID: types.UID(name),
|
||||
Namespace: "default",
|
||||
Name: name,
|
||||
SelfLink: fmt.Sprintf("/api/v1/namespaces/default/pods/%s", name),
|
||||
|
|
|
|||
Loading…
Reference in New Issue