schedule based on cluster resource model

Signed-off-by: Poor12 <shentiecheng@huawei.com>
2022-08-19 16:03:39 +08:00 · 2022-08-19 16:03:39 +08:00 · 6493a49874
parent f6b7511906
commit 6493a49874
3 changed files with 142 additions and 17 deletions
--- a/artifacts/agent/karmada-agent.yaml
+++ b/artifacts/agent/karmada-agent.yaml
@ -32,6 +32,7 @@ spec:
            - --cluster-status-update-frequency=10s
            - --bind-address=0.0.0.0
            - --secure-port=10357
+            - --feature-gates=CustomizedClusterResourceModeling=true
            - --v=4
          livenessProbe:
            httpGet:
--- a/cmd/agent/app/options/options.go
+++ b/cmd/agent/app/options/options.go
@ -10,6 +10,7 @@ import (
 	"k8s.io/client-go/tools/leaderelection/resourcelock"
 	componentbaseconfig "k8s.io/component-base/config"

+	"github.com/karmada-io/karmada/pkg/features"
 	"github.com/karmada-io/karmada/pkg/sharedcli/profileflag"
 	"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
 	"github.com/karmada-io/karmada/pkg/util"
@ -184,5 +185,6 @@ func (o *Options) AddFlags(fs *pflag.FlagSet, allControllers []string) {
 		"The resource modeling might be used by the scheduler to make scheduling decisions in scenario of dynamic replica assignment based on cluster free resources.\n"+
 		"Disable if it does not fit your cases for better performance.")
 	o.RateLimiterOpts.AddFlags(fs)
+	features.FeatureGate.AddFlag(fs)
 	o.ProfileOpts.AddFlags(fs)
 }
--- a/pkg/estimator/client/general.go
+++ b/pkg/estimator/client/general.go
@ -2,11 +2,16 @@ package client

 import (
 	"context"
+	"fmt"
+	"math"

 	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/klog/v2"

 	clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
 	workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
+	"github.com/karmada-io/karmada/pkg/features"
 )

 // GeneralEstimator is the default replica estimator.
@ -47,6 +52,92 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste
 		return int32(maximumReplicas)
 	}

+	// if the allocatableModelings from the cluster status are empty possibly due to
+	// users have not set the models or the state has not been collected,
+	// we consider to use another way to calculate the max replicas.
+	if features.FeatureGate.Enabled(features.CustomizedClusterResourceModeling) && len(cluster.Status.ResourceSummary.AllocatableModelings) > 0 {
+		num, err := getMaximumReplicasBasedOnResourceModels(cluster, replicaRequirements)
+		if err == nil {
+			klog.Infof("cluster %s has max available replicas: %d according to cluster resource models", cluster.GetName(), num)
+			if num < maximumReplicas {
+				maximumReplicas = num
+			}
+
+			return int32(maximumReplicas)
+		}
+		klog.Info(err.Error())
+	}
+
+	num := getMaximumReplicasBasedOnClusterSummary(resourceSummary, replicaRequirements)
+	if num < maximumReplicas {
+		maximumReplicas = num
+	}
+
+	return int32(maximumReplicas)
+}
+
+func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 {
+	var allocatable, allocated, allocating int64
+	if resourceSummary.Allocatable != nil {
+		allocatable = resourceSummary.Allocatable.Pods().Value()
+	}
+	if resourceSummary.Allocated != nil {
+		allocated = resourceSummary.Allocated.Pods().Value()
+	}
+	if resourceSummary.Allocating != nil {
+		allocating = resourceSummary.Allocating.Pods().Value()
+	}
+	allowedPodNumber := allocatable - allocated - allocating
+	// When too many pods have been created, scheduling will fail so that the allocating pods number may be huge.
+	// If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created.
+	if allowedPodNumber <= 0 {
+		return 0
+	}
+	return allowedPodNumber
+}
+
+func convertToResourceModelsMinMap(models []clusterv1alpha1.ResourceModel) map[clusterv1alpha1.ResourceName][]resource.Quantity {
+	resourceModelsMinMap := make(map[clusterv1alpha1.ResourceName][]resource.Quantity)
+	for _, model := range models {
+		for _, resourceModelRange := range model.Ranges {
+			resourceModelsMinMap[resourceModelRange.Name] = append(resourceModelsMinMap[resourceModelRange.Name], resourceModelRange.Min)
+		}
+	}
+
+	return resourceModelsMinMap
+}
+
+func getNodeAvailableReplicas(modelIndex int, replicaRequirements *workv1alpha2.ReplicaRequirements, resourceModelsMinMap map[clusterv1alpha1.ResourceName][]resource.Quantity) int64 {
+	var maximumReplicasOneNode int64 = math.MaxInt64
+	for key, value := range replicaRequirements.ResourceRequest {
+		requestedQuantity := value.Value()
+		if requestedQuantity <= 0 {
+			continue
+		}
+
+		availableMinBoundary := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)][modelIndex]
+
+		availableQuantity := availableMinBoundary.Value()
+		if key == corev1.ResourceCPU {
+			requestedQuantity = value.MilliValue()
+			availableQuantity = availableMinBoundary.MilliValue()
+		}
+
+		maximumReplicasForResource := availableQuantity / requestedQuantity
+		if maximumReplicasForResource < maximumReplicasOneNode {
+			maximumReplicasOneNode = maximumReplicasForResource
+		}
+	}
+
+	// if it is the first suitable model, we consider this case to be able to deploy a Pod.
+	if maximumReplicasOneNode == 0 {
+		return 1
+	}
+	return maximumReplicasOneNode
+}
+
+func getMaximumReplicasBasedOnClusterSummary(resourceSummary *clusterv1alpha1.ResourceSummary, replicaRequirements *workv1alpha2.ReplicaRequirements) int64 {
+	var maximumReplicas int64 = math.MaxInt64
 	for key, value := range replicaRequirements.ResourceRequest {
 		requestedQuantity := value.Value()
 		if requestedQuantity <= 0 {
@ -84,25 +175,56 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste
 		}
 	}

-	return int32(maximumReplicas)
+	return maximumReplicas
 }

-func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 {
-	var allocatable, allocated, allocating int64
-	if resourceSummary.Allocatable != nil {
-		allocatable = resourceSummary.Allocatable.Pods().Value()
+func getMaximumReplicasBasedOnResourceModels(cluster *clusterv1alpha1.Cluster, replicaRequirements *workv1alpha2.ReplicaRequirements) (int64, error) {
+	resourceModelsMinMap := convertToResourceModelsMinMap(cluster.Spec.ResourceModels)
+
+	minCompliantModelIndex := 0
+	for key, value := range replicaRequirements.ResourceRequest {
+		requestedQuantity := value.Value()
+		if requestedQuantity <= 0 {
+			continue
+		}
+
+		quantityArray, ok := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)]
+		if !ok {
+			return -1, fmt.Errorf("resource model is inapplicable as missing resource: %s", string(key))
+		}
+
+		for index, minValue := range quantityArray {
+			// Suppose there is the following resource model:
+			// Model1: cpu [1C,2C)
+			// Model2: cpu [2C,3C)
+			// if pod cpu request is 1.5C, we regard the nodes in model1 as meeting the requirements of the Pod.
+			// Suppose there is the following resource model:
+			// Model1: cpu [1C,2C), memory [1Gi,2Gi)
+			// Model2: cpu [2C,3C), memory [2Gi,3Gi)
+			// if pod cpu request is 1.5C and memory request is 2.5Gi
+			// We regard the node of model1 as not meeting the requirements, and the nodes of model2 and later as meeting the requirements.
+			if minValue.Cmp(value) > 0 {
+				// Since the 'min' value of the first model is always 0, hit here
+				// the index should be >=1, so it's safe to use 'index-1' here.
+				if index-1 > minCompliantModelIndex {
+					minCompliantModelIndex = index - 1
+				}
+				break
+			}
+
+			if index == len(quantityArray)-1 {
+				minCompliantModelIndex = index
+			}
+		}
 	}
-	if resourceSummary.Allocated != nil {
-		allocated = resourceSummary.Allocated.Pods().Value()
+
+	var maximumReplicasForResource int64
+	for i := minCompliantModelIndex; i < len(cluster.Spec.ResourceModels); i++ {
+		if cluster.Status.ResourceSummary.AllocatableModelings[i].Count == 0 {
+			continue
+		}
+		maximumReplicasForResource += int64(cluster.Status.ResourceSummary.AllocatableModelings[i].Count) * getNodeAvailableReplicas(i, replicaRequirements, resourceModelsMinMap)
 	}
-	if resourceSummary.Allocating != nil {
-		allocating = resourceSummary.Allocating.Pods().Value()
-	}
-	allowedPodNumber := allocatable - allocated - allocating
-	// When too many pods have been created, scheduling will fail so that the allocating pods number may be huge.
-	// If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created.
-	if allowedPodNumber <= 0 {
-		return 0
-	}
-	return allowedPodNumber
+
+	return maximumReplicasForResource, nil
 }