schedule based on cluster resource model

Signed-off-by: Poor12 <shentiecheng@huawei.com>
This commit is contained in:
Poor12 2022-08-19 16:03:39 +08:00
parent f6b7511906
commit 6493a49874
3 changed files with 142 additions and 17 deletions

View File

@ -32,6 +32,7 @@ spec:
- --cluster-status-update-frequency=10s
- --bind-address=0.0.0.0
- --secure-port=10357
- --feature-gates=CustomizedClusterResourceModeling=true
- --v=4
livenessProbe:
httpGet:

View File

@ -10,6 +10,7 @@ import (
"k8s.io/client-go/tools/leaderelection/resourcelock"
componentbaseconfig "k8s.io/component-base/config"
"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/sharedcli/profileflag"
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
"github.com/karmada-io/karmada/pkg/util"
@ -184,5 +185,6 @@ func (o *Options) AddFlags(fs *pflag.FlagSet, allControllers []string) {
"The resource modeling might be used by the scheduler to make scheduling decisions in scenario of dynamic replica assignment based on cluster free resources.\n"+
"Disable if it does not fit your cases for better performance.")
o.RateLimiterOpts.AddFlags(fs)
features.FeatureGate.AddFlag(fs)
o.ProfileOpts.AddFlags(fs)
}

View File

@ -2,11 +2,16 @@ package client
import (
"context"
"fmt"
"math"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2"
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
"github.com/karmada-io/karmada/pkg/features"
)
// GeneralEstimator is the default replica estimator.
@ -47,6 +52,92 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste
return int32(maximumReplicas)
}
// if the allocatableModelings from the cluster status are empty possibly due to
// users have not set the models or the state has not been collected,
// we consider to use another way to calculate the max replicas.
if features.FeatureGate.Enabled(features.CustomizedClusterResourceModeling) && len(cluster.Status.ResourceSummary.AllocatableModelings) > 0 {
num, err := getMaximumReplicasBasedOnResourceModels(cluster, replicaRequirements)
if err == nil {
klog.Infof("cluster %s has max available replicas: %d according to cluster resource models", cluster.GetName(), num)
if num < maximumReplicas {
maximumReplicas = num
}
return int32(maximumReplicas)
}
klog.Info(err.Error())
}
num := getMaximumReplicasBasedOnClusterSummary(resourceSummary, replicaRequirements)
if num < maximumReplicas {
maximumReplicas = num
}
return int32(maximumReplicas)
}
func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 {
var allocatable, allocated, allocating int64
if resourceSummary.Allocatable != nil {
allocatable = resourceSummary.Allocatable.Pods().Value()
}
if resourceSummary.Allocated != nil {
allocated = resourceSummary.Allocated.Pods().Value()
}
if resourceSummary.Allocating != nil {
allocating = resourceSummary.Allocating.Pods().Value()
}
allowedPodNumber := allocatable - allocated - allocating
// When too many pods have been created, scheduling will fail so that the allocating pods number may be huge.
// If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created.
if allowedPodNumber <= 0 {
return 0
}
return allowedPodNumber
}
func convertToResourceModelsMinMap(models []clusterv1alpha1.ResourceModel) map[clusterv1alpha1.ResourceName][]resource.Quantity {
resourceModelsMinMap := make(map[clusterv1alpha1.ResourceName][]resource.Quantity)
for _, model := range models {
for _, resourceModelRange := range model.Ranges {
resourceModelsMinMap[resourceModelRange.Name] = append(resourceModelsMinMap[resourceModelRange.Name], resourceModelRange.Min)
}
}
return resourceModelsMinMap
}
func getNodeAvailableReplicas(modelIndex int, replicaRequirements *workv1alpha2.ReplicaRequirements, resourceModelsMinMap map[clusterv1alpha1.ResourceName][]resource.Quantity) int64 {
var maximumReplicasOneNode int64 = math.MaxInt64
for key, value := range replicaRequirements.ResourceRequest {
requestedQuantity := value.Value()
if requestedQuantity <= 0 {
continue
}
availableMinBoundary := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)][modelIndex]
availableQuantity := availableMinBoundary.Value()
if key == corev1.ResourceCPU {
requestedQuantity = value.MilliValue()
availableQuantity = availableMinBoundary.MilliValue()
}
maximumReplicasForResource := availableQuantity / requestedQuantity
if maximumReplicasForResource < maximumReplicasOneNode {
maximumReplicasOneNode = maximumReplicasForResource
}
}
// if it is the first suitable model, we consider this case to be able to deploy a Pod.
if maximumReplicasOneNode == 0 {
return 1
}
return maximumReplicasOneNode
}
func getMaximumReplicasBasedOnClusterSummary(resourceSummary *clusterv1alpha1.ResourceSummary, replicaRequirements *workv1alpha2.ReplicaRequirements) int64 {
var maximumReplicas int64 = math.MaxInt64
for key, value := range replicaRequirements.ResourceRequest {
requestedQuantity := value.Value()
if requestedQuantity <= 0 {
@ -84,25 +175,56 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste
}
}
return int32(maximumReplicas)
return maximumReplicas
}
func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 {
var allocatable, allocated, allocating int64
if resourceSummary.Allocatable != nil {
allocatable = resourceSummary.Allocatable.Pods().Value()
func getMaximumReplicasBasedOnResourceModels(cluster *clusterv1alpha1.Cluster, replicaRequirements *workv1alpha2.ReplicaRequirements) (int64, error) {
resourceModelsMinMap := convertToResourceModelsMinMap(cluster.Spec.ResourceModels)
minCompliantModelIndex := 0
for key, value := range replicaRequirements.ResourceRequest {
requestedQuantity := value.Value()
if requestedQuantity <= 0 {
continue
}
quantityArray, ok := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)]
if !ok {
return -1, fmt.Errorf("resource model is inapplicable as missing resource: %s", string(key))
}
for index, minValue := range quantityArray {
// Suppose there is the following resource model:
// Model1: cpu [1C,2C)
// Model2: cpu [2C,3C)
// if pod cpu request is 1.5C, we regard the nodes in model1 as meeting the requirements of the Pod.
// Suppose there is the following resource model:
// Model1: cpu [1C,2C), memory [1Gi,2Gi)
// Model2: cpu [2C,3C), memory [2Gi,3Gi)
// if pod cpu request is 1.5C and memory request is 2.5Gi
// We regard the node of model1 as not meeting the requirements, and the nodes of model2 and later as meeting the requirements.
if minValue.Cmp(value) > 0 {
// Since the 'min' value of the first model is always 0, hit here
// the index should be >=1, so it's safe to use 'index-1' here.
if index-1 > minCompliantModelIndex {
minCompliantModelIndex = index - 1
}
break
}
if index == len(quantityArray)-1 {
minCompliantModelIndex = index
}
}
}
if resourceSummary.Allocated != nil {
allocated = resourceSummary.Allocated.Pods().Value()
var maximumReplicasForResource int64
for i := minCompliantModelIndex; i < len(cluster.Spec.ResourceModels); i++ {
if cluster.Status.ResourceSummary.AllocatableModelings[i].Count == 0 {
continue
}
maximumReplicasForResource += int64(cluster.Status.ResourceSummary.AllocatableModelings[i].Count) * getNodeAvailableReplicas(i, replicaRequirements, resourceModelsMinMap)
}
if resourceSummary.Allocating != nil {
allocating = resourceSummary.Allocating.Pods().Value()
}
allowedPodNumber := allocatable - allocated - allocating
// When too many pods have been created, scheduling will fail so that the allocating pods number may be huge.
// If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created.
if allowedPodNumber <= 0 {
return 0
}
return allowedPodNumber
return maximumReplicasForResource, nil
}