diff --git a/artifacts/agent/karmada-agent.yaml b/artifacts/agent/karmada-agent.yaml index b95f1171e..f9cb4b30a 100644 --- a/artifacts/agent/karmada-agent.yaml +++ b/artifacts/agent/karmada-agent.yaml @@ -32,6 +32,7 @@ spec: - --cluster-status-update-frequency=10s - --bind-address=0.0.0.0 - --secure-port=10357 + - --feature-gates=CustomizedClusterResourceModeling=true - --v=4 livenessProbe: httpGet: diff --git a/cmd/agent/app/options/options.go b/cmd/agent/app/options/options.go index b9ffb891a..b88c20e39 100644 --- a/cmd/agent/app/options/options.go +++ b/cmd/agent/app/options/options.go @@ -10,6 +10,7 @@ import ( "k8s.io/client-go/tools/leaderelection/resourcelock" componentbaseconfig "k8s.io/component-base/config" + "github.com/karmada-io/karmada/pkg/features" "github.com/karmada-io/karmada/pkg/sharedcli/profileflag" "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" "github.com/karmada-io/karmada/pkg/util" @@ -184,5 +185,6 @@ func (o *Options) AddFlags(fs *pflag.FlagSet, allControllers []string) { "The resource modeling might be used by the scheduler to make scheduling decisions in scenario of dynamic replica assignment based on cluster free resources.\n"+ "Disable if it does not fit your cases for better performance.") o.RateLimiterOpts.AddFlags(fs) + features.FeatureGate.AddFlag(fs) o.ProfileOpts.AddFlags(fs) } diff --git a/pkg/estimator/client/general.go b/pkg/estimator/client/general.go index e30d0e37b..0aba88595 100644 --- a/pkg/estimator/client/general.go +++ b/pkg/estimator/client/general.go @@ -2,11 +2,16 @@ package client import ( "context" + "fmt" + "math" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/features" ) // GeneralEstimator is the default replica estimator. @@ -47,6 +52,92 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste return int32(maximumReplicas) } + // if the allocatableModelings from the cluster status are empty possibly due to + // users have not set the models or the state has not been collected, + // we consider to use another way to calculate the max replicas. + if features.FeatureGate.Enabled(features.CustomizedClusterResourceModeling) && len(cluster.Status.ResourceSummary.AllocatableModelings) > 0 { + num, err := getMaximumReplicasBasedOnResourceModels(cluster, replicaRequirements) + if err == nil { + klog.Infof("cluster %s has max available replicas: %d according to cluster resource models", cluster.GetName(), num) + if num < maximumReplicas { + maximumReplicas = num + } + + return int32(maximumReplicas) + } + klog.Info(err.Error()) + } + + num := getMaximumReplicasBasedOnClusterSummary(resourceSummary, replicaRequirements) + if num < maximumReplicas { + maximumReplicas = num + } + + return int32(maximumReplicas) +} + +func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 { + var allocatable, allocated, allocating int64 + if resourceSummary.Allocatable != nil { + allocatable = resourceSummary.Allocatable.Pods().Value() + } + if resourceSummary.Allocated != nil { + allocated = resourceSummary.Allocated.Pods().Value() + } + if resourceSummary.Allocating != nil { + allocating = resourceSummary.Allocating.Pods().Value() + } + allowedPodNumber := allocatable - allocated - allocating + // When too many pods have been created, scheduling will fail so that the allocating pods number may be huge. + // If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created. + if allowedPodNumber <= 0 { + return 0 + } + return allowedPodNumber +} + +func convertToResourceModelsMinMap(models []clusterv1alpha1.ResourceModel) map[clusterv1alpha1.ResourceName][]resource.Quantity { + resourceModelsMinMap := make(map[clusterv1alpha1.ResourceName][]resource.Quantity) + for _, model := range models { + for _, resourceModelRange := range model.Ranges { + resourceModelsMinMap[resourceModelRange.Name] = append(resourceModelsMinMap[resourceModelRange.Name], resourceModelRange.Min) + } + } + + return resourceModelsMinMap +} + +func getNodeAvailableReplicas(modelIndex int, replicaRequirements *workv1alpha2.ReplicaRequirements, resourceModelsMinMap map[clusterv1alpha1.ResourceName][]resource.Quantity) int64 { + var maximumReplicasOneNode int64 = math.MaxInt64 + for key, value := range replicaRequirements.ResourceRequest { + requestedQuantity := value.Value() + if requestedQuantity <= 0 { + continue + } + + availableMinBoundary := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)][modelIndex] + + availableQuantity := availableMinBoundary.Value() + if key == corev1.ResourceCPU { + requestedQuantity = value.MilliValue() + availableQuantity = availableMinBoundary.MilliValue() + } + + maximumReplicasForResource := availableQuantity / requestedQuantity + if maximumReplicasForResource < maximumReplicasOneNode { + maximumReplicasOneNode = maximumReplicasForResource + } + } + + // if it is the first suitable model, we consider this case to be able to deploy a Pod. + if maximumReplicasOneNode == 0 { + return 1 + } + return maximumReplicasOneNode +} + +func getMaximumReplicasBasedOnClusterSummary(resourceSummary *clusterv1alpha1.ResourceSummary, replicaRequirements *workv1alpha2.ReplicaRequirements) int64 { + var maximumReplicas int64 = math.MaxInt64 for key, value := range replicaRequirements.ResourceRequest { requestedQuantity := value.Value() if requestedQuantity <= 0 { @@ -84,25 +175,56 @@ func (ge *GeneralEstimator) maxAvailableReplicas(cluster *clusterv1alpha1.Cluste } } - return int32(maximumReplicas) + return maximumReplicas } -func getAllowedPodNumber(resourceSummary *clusterv1alpha1.ResourceSummary) int64 { - var allocatable, allocated, allocating int64 - if resourceSummary.Allocatable != nil { - allocatable = resourceSummary.Allocatable.Pods().Value() +func getMaximumReplicasBasedOnResourceModels(cluster *clusterv1alpha1.Cluster, replicaRequirements *workv1alpha2.ReplicaRequirements) (int64, error) { + resourceModelsMinMap := convertToResourceModelsMinMap(cluster.Spec.ResourceModels) + + minCompliantModelIndex := 0 + for key, value := range replicaRequirements.ResourceRequest { + requestedQuantity := value.Value() + if requestedQuantity <= 0 { + continue + } + + quantityArray, ok := resourceModelsMinMap[clusterv1alpha1.ResourceName(key)] + if !ok { + return -1, fmt.Errorf("resource model is inapplicable as missing resource: %s", string(key)) + } + + for index, minValue := range quantityArray { + // Suppose there is the following resource model: + // Model1: cpu [1C,2C) + // Model2: cpu [2C,3C) + // if pod cpu request is 1.5C, we regard the nodes in model1 as meeting the requirements of the Pod. + // Suppose there is the following resource model: + // Model1: cpu [1C,2C), memory [1Gi,2Gi) + // Model2: cpu [2C,3C), memory [2Gi,3Gi) + // if pod cpu request is 1.5C and memory request is 2.5Gi + // We regard the node of model1 as not meeting the requirements, and the nodes of model2 and later as meeting the requirements. + if minValue.Cmp(value) > 0 { + // Since the 'min' value of the first model is always 0, hit here + // the index should be >=1, so it's safe to use 'index-1' here. + if index-1 > minCompliantModelIndex { + minCompliantModelIndex = index - 1 + } + break + } + + if index == len(quantityArray)-1 { + minCompliantModelIndex = index + } + } } - if resourceSummary.Allocated != nil { - allocated = resourceSummary.Allocated.Pods().Value() + + var maximumReplicasForResource int64 + for i := minCompliantModelIndex; i < len(cluster.Spec.ResourceModels); i++ { + if cluster.Status.ResourceSummary.AllocatableModelings[i].Count == 0 { + continue + } + maximumReplicasForResource += int64(cluster.Status.ResourceSummary.AllocatableModelings[i].Count) * getNodeAvailableReplicas(i, replicaRequirements, resourceModelsMinMap) } - if resourceSummary.Allocating != nil { - allocating = resourceSummary.Allocating.Pods().Value() - } - allowedPodNumber := allocatable - allocated - allocating - // When too many pods have been created, scheduling will fail so that the allocating pods number may be huge. - // If allowedPodNumber is less than or equal to 0, we don't allow more pods to be created. - if allowedPodNumber <= 0 { - return 0 - } - return allowedPodNumber + + return maximumReplicasForResource, nil }