559 lines
22 KiB
Go
559 lines
22 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package legacy
|
|
|
|
import (
|
|
"math"
|
|
"reflect"
|
|
"time"
|
|
|
|
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
|
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
|
|
"k8s.io/autoscaler/cluster-autoscaler/context"
|
|
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
|
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility"
|
|
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/resource"
|
|
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable"
|
|
"k8s.io/autoscaler/cluster-autoscaler/metrics"
|
|
"k8s.io/autoscaler/cluster-autoscaler/processors"
|
|
"k8s.io/autoscaler/cluster-autoscaler/processors/status"
|
|
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
|
"k8s.io/autoscaler/cluster-autoscaler/simulator/utilization"
|
|
"k8s.io/autoscaler/cluster-autoscaler/utils"
|
|
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
|
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
|
|
|
apiv1 "k8s.io/api/core/v1"
|
|
policyv1 "k8s.io/api/policy/v1"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
klog "k8s.io/klog/v2"
|
|
)
|
|
|
|
// ScaleDown is responsible for maintaining the state needed to perform unneeded node removals.
|
|
type ScaleDown struct {
|
|
context *context.AutoscalingContext
|
|
processors *processors.AutoscalingProcessors
|
|
clusterStateRegistry *clusterstate.ClusterStateRegistry
|
|
unneededNodes map[string]time.Time
|
|
unneededNodesList []*apiv1.Node
|
|
unremovableNodes *unremovable.Nodes
|
|
podLocationHints map[string]string
|
|
nodeUtilizationMap map[string]utilization.Info
|
|
usageTracker *simulator.UsageTracker
|
|
nodeDeletionTracker *deletiontracker.NodeDeletionTracker
|
|
removalSimulator *simulator.RemovalSimulator
|
|
eligibilityChecker *eligibility.Checker
|
|
resourceLimitsFinder *resource.LimitsFinder
|
|
}
|
|
|
|
// NewScaleDown builds new ScaleDown object.
|
|
func NewScaleDown(context *context.AutoscalingContext, processors *processors.AutoscalingProcessors, clusterStateRegistry *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions) *ScaleDown {
|
|
usageTracker := simulator.NewUsageTracker()
|
|
removalSimulator := simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, context.PredicateChecker, usageTracker, deleteOptions, false)
|
|
unremovableNodes := unremovable.NewNodes()
|
|
return &ScaleDown{
|
|
context: context,
|
|
processors: processors,
|
|
clusterStateRegistry: clusterStateRegistry,
|
|
unneededNodes: make(map[string]time.Time),
|
|
unremovableNodes: unremovableNodes,
|
|
podLocationHints: make(map[string]string),
|
|
nodeUtilizationMap: make(map[string]utilization.Info),
|
|
usageTracker: usageTracker,
|
|
unneededNodesList: make([]*apiv1.Node, 0),
|
|
nodeDeletionTracker: ndt,
|
|
removalSimulator: removalSimulator,
|
|
eligibilityChecker: eligibility.NewChecker(processors.NodeGroupConfigProcessor),
|
|
resourceLimitsFinder: resource.NewLimitsFinder(processors.CustomResourcesProcessor),
|
|
}
|
|
}
|
|
|
|
// CleanUp cleans up the internal ScaleDown state.
|
|
func (sd *ScaleDown) CleanUp(timestamp time.Time) {
|
|
// Use default ScaleDownUnneededTime as in this context the value
|
|
// doesn't apply to any specific NodeGroup.
|
|
sd.usageTracker.CleanUp(timestamp.Add(-sd.context.NodeGroupDefaults.ScaleDownUnneededTime))
|
|
}
|
|
|
|
// CleanUpUnneededNodes clears the list of unneeded nodes.
|
|
func (sd *ScaleDown) CleanUpUnneededNodes() {
|
|
sd.unneededNodesList = make([]*apiv1.Node, 0)
|
|
sd.unneededNodes = make(map[string]time.Time)
|
|
}
|
|
|
|
// UnneededNodes returns a list of nodes that can potentially be scaled down.
|
|
func (sd *ScaleDown) UnneededNodes() []*apiv1.Node {
|
|
return sd.unneededNodesList
|
|
}
|
|
|
|
// UpdateUnneededNodes calculates which nodes are not needed, i.e. all pods can be scheduled somewhere else,
|
|
// and updates unneededNodes map accordingly. It also computes information where pods can be rescheduled and
|
|
// node utilization level. The computations are made only for the nodes managed by CA.
|
|
// * destinationNodes are the nodes that can potentially take in any pods that are evicted because of a scale down.
|
|
// * scaleDownCandidates are the nodes that are being considered for scale down.
|
|
// * timestamp is the current timestamp.
|
|
// * pdbs is a list of pod disruption budgets.
|
|
func (sd *ScaleDown) UpdateUnneededNodes(
|
|
destinationNodes []*apiv1.Node,
|
|
scaleDownCandidates []*apiv1.Node,
|
|
timestamp time.Time,
|
|
pdbs []*policyv1.PodDisruptionBudget,
|
|
) errors.AutoscalerError {
|
|
|
|
// Only scheduled non expendable pods and pods waiting for lower priority pods preemption can prevent node delete.
|
|
// Extract cluster state from snapshot for initial analysis
|
|
allNodeInfos, err := sd.context.ClusterSnapshot.NodeInfos().List()
|
|
if err != nil {
|
|
// This should never happen, List() returns err only because scheduler interface requires it.
|
|
return errors.ToAutoscalerError(errors.InternalError, err)
|
|
}
|
|
|
|
// Phase1 - look at the nodes utilization. Calculate the utilization
|
|
// only for the managed nodes.
|
|
currentlyUnneededNodeNames, utilizationMap := sd.eligibilityChecker.FilterOutUnremovable(sd.context, scaleDownCandidates, timestamp, sd.unremovableNodes)
|
|
|
|
emptyNodesToRemove := sd.getEmptyNodesToRemoveNoResourceLimits(currentlyUnneededNodeNames, timestamp)
|
|
|
|
emptyNodes := make(map[string]bool)
|
|
for _, empty := range emptyNodesToRemove {
|
|
emptyNodes[empty.Node.Name] = true
|
|
}
|
|
|
|
currentlyUnneededNonEmptyNodes := make([]string, 0, len(currentlyUnneededNodeNames))
|
|
for _, node := range currentlyUnneededNodeNames {
|
|
if !emptyNodes[node] {
|
|
currentlyUnneededNonEmptyNodes = append(currentlyUnneededNonEmptyNodes, node)
|
|
}
|
|
}
|
|
|
|
// Phase2 - check which nodes can be probably removed using fast drain.
|
|
currentCandidates, currentNonCandidates := sd.chooseCandidates(currentlyUnneededNonEmptyNodes)
|
|
|
|
destinations := make([]string, 0, len(destinationNodes))
|
|
for _, destinationNode := range destinationNodes {
|
|
destinations = append(destinations, destinationNode.Name)
|
|
}
|
|
|
|
// Look for nodes to remove in the current candidates
|
|
nodesToRemove, unremovable, newHints, simulatorErr := sd.removalSimulator.FindNodesToRemove(
|
|
currentCandidates,
|
|
destinations,
|
|
sd.podLocationHints,
|
|
timestamp,
|
|
pdbs)
|
|
if simulatorErr != nil {
|
|
return sd.markSimulationError(simulatorErr, timestamp)
|
|
}
|
|
|
|
additionalCandidatesCount := sd.context.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove)
|
|
if additionalCandidatesCount > len(currentNonCandidates) {
|
|
additionalCandidatesCount = len(currentNonCandidates)
|
|
}
|
|
// Limit the additional candidates pool size for better performance.
|
|
additionalCandidatesPoolSize := int(math.Ceil(float64(len(allNodeInfos)) * sd.context.ScaleDownCandidatesPoolRatio))
|
|
if additionalCandidatesPoolSize < sd.context.ScaleDownCandidatesPoolMinCount {
|
|
additionalCandidatesPoolSize = sd.context.ScaleDownCandidatesPoolMinCount
|
|
}
|
|
if additionalCandidatesPoolSize > len(currentNonCandidates) {
|
|
additionalCandidatesPoolSize = len(currentNonCandidates)
|
|
}
|
|
if additionalCandidatesCount > 0 {
|
|
// Look for additional nodes to remove among the rest of nodes.
|
|
klog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount)
|
|
additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr :=
|
|
sd.removalSimulator.FindNodesToRemove(
|
|
currentNonCandidates[:additionalCandidatesPoolSize],
|
|
destinations,
|
|
sd.podLocationHints,
|
|
timestamp,
|
|
pdbs)
|
|
if simulatorErr != nil {
|
|
return sd.markSimulationError(simulatorErr, timestamp)
|
|
}
|
|
if len(additionalNodesToRemove) > additionalCandidatesCount {
|
|
additionalNodesToRemove = additionalNodesToRemove[:additionalCandidatesCount]
|
|
}
|
|
nodesToRemove = append(nodesToRemove, additionalNodesToRemove...)
|
|
unremovable = append(unremovable, additionalUnremovable...)
|
|
for key, value := range additionalNewHints {
|
|
newHints[key] = value
|
|
}
|
|
}
|
|
|
|
for _, empty := range emptyNodesToRemove {
|
|
nodesToRemove = append(nodesToRemove, simulator.NodeToBeRemoved{Node: empty.Node, PodsToReschedule: []*apiv1.Pod{}})
|
|
}
|
|
|
|
// Update the timestamp map.
|
|
result := make(map[string]time.Time)
|
|
unneededNodesList := make([]*apiv1.Node, 0, len(nodesToRemove))
|
|
for _, node := range nodesToRemove {
|
|
name := node.Node.Name
|
|
unneededNodesList = append(unneededNodesList, node.Node)
|
|
if val, found := sd.unneededNodes[name]; !found {
|
|
result[name] = timestamp
|
|
} else {
|
|
result[name] = val
|
|
}
|
|
}
|
|
|
|
// Add nodes to unremovable map
|
|
if len(unremovable) > 0 {
|
|
unremovableTimeout := timestamp.Add(sd.context.AutoscalingOptions.UnremovableNodeRecheckTimeout)
|
|
for _, unremovableNode := range unremovable {
|
|
sd.unremovableNodes.AddTimeout(unremovableNode, unremovableTimeout)
|
|
}
|
|
klog.V(1).Infof("%v nodes found to be unremovable in simulation, will re-check them at %v", len(unremovable), unremovableTimeout)
|
|
}
|
|
|
|
// This method won't always check all nodes, so let's give a generic reason for all nodes that weren't checked.
|
|
for _, node := range scaleDownCandidates {
|
|
unremovableReasonProvided := sd.unremovableNodes.HasReason(node.Name)
|
|
_, unneeded := result[node.Name]
|
|
if !unneeded && !unremovableReasonProvided {
|
|
sd.unremovableNodes.AddReason(node, simulator.NotUnneededOtherReason)
|
|
}
|
|
}
|
|
|
|
// Update state and metrics
|
|
sd.unneededNodesList = unneededNodesList
|
|
sd.unneededNodes = result
|
|
sd.podLocationHints = newHints
|
|
sd.nodeUtilizationMap = utilizationMap
|
|
sd.clusterStateRegistry.UpdateScaleDownCandidates(sd.unneededNodesList, timestamp)
|
|
metrics.UpdateUnneededNodesCount(len(sd.unneededNodesList))
|
|
if klog.V(4).Enabled() {
|
|
for key, val := range sd.unneededNodes {
|
|
klog.Infof("%s is unneeded since %s duration %s", key, val.String(), timestamp.Sub(val).String())
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// NodeUtilizationMap returns the most recent mapping from node names to utilization info.
|
|
func (sd *ScaleDown) NodeUtilizationMap() map[string]utilization.Info {
|
|
return sd.nodeUtilizationMap
|
|
}
|
|
|
|
// UnremovableNodes returns a list of nodes that cannot be removed according to
|
|
// the scale down algorithm.
|
|
func (sd *ScaleDown) UnremovableNodes() []*simulator.UnremovableNode {
|
|
return sd.unremovableNodes.AsList()
|
|
}
|
|
|
|
// markSimulationError indicates a simulation error by clearing relevant scale
|
|
// down state and returning an appropriate error.
|
|
func (sd *ScaleDown) markSimulationError(simulatorErr errors.AutoscalerError,
|
|
timestamp time.Time) errors.AutoscalerError {
|
|
klog.Errorf("Error while simulating node drains: %v", simulatorErr)
|
|
sd.unneededNodesList = make([]*apiv1.Node, 0)
|
|
sd.unneededNodes = make(map[string]time.Time)
|
|
sd.nodeUtilizationMap = make(map[string]utilization.Info)
|
|
sd.clusterStateRegistry.UpdateScaleDownCandidates(sd.unneededNodesList, timestamp)
|
|
return simulatorErr.AddPrefix("error while simulating node drains: ")
|
|
}
|
|
|
|
// chooseCandidates splits nodes into current candidates for scale-down and the
|
|
// rest. Current candidates are unneeded nodes from the previous run that are
|
|
// still in the nodes list.
|
|
func (sd *ScaleDown) chooseCandidates(nodes []string) (candidates []string, nonCandidates []string) {
|
|
// Number of candidates should not be capped. We will look for nodes to remove
|
|
// from the whole set of nodes.
|
|
if sd.context.ScaleDownNonEmptyCandidatesCount <= 0 {
|
|
return nodes, nil
|
|
}
|
|
for _, node := range nodes {
|
|
if _, found := sd.unneededNodes[node]; found {
|
|
candidates = append(candidates, node)
|
|
} else {
|
|
nonCandidates = append(nonCandidates, node)
|
|
}
|
|
}
|
|
return candidates, nonCandidates
|
|
}
|
|
|
|
func (sd *ScaleDown) mapNodesToStatusScaleDownNodes(nodes []*apiv1.Node, nodeGroups map[string]cloudprovider.NodeGroup, evictedPodLists map[string][]*apiv1.Pod) []*status.ScaleDownNode {
|
|
var result []*status.ScaleDownNode
|
|
for _, node := range nodes {
|
|
result = append(result, &status.ScaleDownNode{
|
|
Node: node,
|
|
NodeGroup: nodeGroups[node.Name],
|
|
UtilInfo: sd.nodeUtilizationMap[node.Name],
|
|
EvictedPods: evictedPodLists[node.Name],
|
|
})
|
|
}
|
|
return result
|
|
}
|
|
|
|
// NodesToDelete selects the nodes to delete for scale down.
|
|
func (sd *ScaleDown) NodesToDelete(currentTime time.Time, pdbs []*policyv1.PodDisruptionBudget) (empty, drain []*apiv1.Node, res status.ScaleDownResult, err errors.AutoscalerError) {
|
|
_, drained := sd.nodeDeletionTracker.DeletionsInProgress()
|
|
if len(drained) > 0 {
|
|
return nil, nil, status.ScaleDownInProgress, nil
|
|
}
|
|
|
|
findNodesToRemoveDuration := time.Duration(0)
|
|
defer updateScaleDownMetrics(time.Now(), &findNodesToRemoveDuration)
|
|
|
|
allNodeInfos, errSnapshot := sd.context.ClusterSnapshot.NodeInfos().List()
|
|
if errSnapshot != nil {
|
|
// This should never happen, List() returns err only because scheduler interface requires it.
|
|
return nil, nil, status.ScaleDownError, errors.ToAutoscalerError(errors.InternalError, errSnapshot)
|
|
}
|
|
|
|
allNodes := make([]*apiv1.Node, 0, len(allNodeInfos))
|
|
allNodeNames := make([]string, 0, len(allNodeInfos))
|
|
for _, ni := range allNodeInfos {
|
|
allNodes = append(allNodes, ni.Node())
|
|
allNodeNames = append(allNodeNames, ni.Node().Name)
|
|
}
|
|
|
|
candidateNames := make([]string, 0)
|
|
readinessMap := make(map[string]bool)
|
|
candidateNodeGroups := make(map[string]cloudprovider.NodeGroup)
|
|
|
|
resourceLimiter, errCP := sd.context.CloudProvider.GetResourceLimiter()
|
|
if errCP != nil {
|
|
return nil, nil, status.ScaleDownError, errors.ToAutoscalerError(errors.CloudProviderError, errCP)
|
|
}
|
|
|
|
scaleDownResourcesLeft := sd.resourceLimitsFinder.LimitsLeft(sd.context, allNodes, resourceLimiter, currentTime)
|
|
|
|
nodeGroupSize := utils.GetNodeGroupSizeMap(sd.context.CloudProvider)
|
|
resourcesWithLimits := resourceLimiter.GetResources()
|
|
for nodeName, unneededSince := range sd.unneededNodes {
|
|
klog.V(2).Infof("%s was unneeded for %s", nodeName, currentTime.Sub(unneededSince).String())
|
|
|
|
nodeInfo, err := sd.context.ClusterSnapshot.NodeInfos().Get(nodeName)
|
|
if err != nil {
|
|
klog.Errorf("Can't retrieve unneeded node %s from snapshot, err: %v", nodeName, err)
|
|
continue
|
|
}
|
|
|
|
node := nodeInfo.Node()
|
|
|
|
// Check if node is marked with no scale down annotation.
|
|
if eligibility.HasNoScaleDownAnnotation(node) {
|
|
klog.V(4).Infof("Skipping %s - scale down disabled annotation found", node.Name)
|
|
sd.unremovableNodes.AddReason(node, simulator.ScaleDownDisabledAnnotation)
|
|
continue
|
|
}
|
|
|
|
ready, _, _ := kube_util.GetReadinessState(node)
|
|
readinessMap[node.Name] = ready
|
|
|
|
nodeGroup, err := sd.context.CloudProvider.NodeGroupForNode(node)
|
|
if err != nil {
|
|
klog.Errorf("Error while checking node group for %s: %v", node.Name, err)
|
|
sd.unremovableNodes.AddReason(node, simulator.UnexpectedError)
|
|
continue
|
|
}
|
|
if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
|
klog.V(4).Infof("Skipping %s - no node group config", node.Name)
|
|
sd.unremovableNodes.AddReason(node, simulator.NotAutoscaled)
|
|
continue
|
|
}
|
|
|
|
if ready {
|
|
// Check how long a ready node was underutilized.
|
|
unneededTime, err := sd.processors.NodeGroupConfigProcessor.GetScaleDownUnneededTime(sd.context, nodeGroup)
|
|
if err != nil {
|
|
klog.Errorf("Error trying to get ScaleDownUnneededTime for node %s (in group: %s)", node.Name, nodeGroup.Id())
|
|
continue
|
|
}
|
|
if !unneededSince.Add(unneededTime).Before(currentTime) {
|
|
sd.unremovableNodes.AddReason(node, simulator.NotUnneededLongEnough)
|
|
continue
|
|
}
|
|
} else {
|
|
// Unready nodes may be deleted after a different time than underutilized nodes.
|
|
unreadyTime, err := sd.processors.NodeGroupConfigProcessor.GetScaleDownUnreadyTime(sd.context, nodeGroup)
|
|
if err != nil {
|
|
klog.Errorf("Error trying to get ScaleDownUnreadyTime for node %s (in group: %s)", node.Name, nodeGroup.Id())
|
|
continue
|
|
}
|
|
if !unneededSince.Add(unreadyTime).Before(currentTime) {
|
|
sd.unremovableNodes.AddReason(node, simulator.NotUnreadyLongEnough)
|
|
continue
|
|
}
|
|
}
|
|
|
|
size, found := nodeGroupSize[nodeGroup.Id()]
|
|
if !found {
|
|
klog.Errorf("Error while checking node group size %s: group size not found in cache", nodeGroup.Id())
|
|
sd.unremovableNodes.AddReason(node, simulator.UnexpectedError)
|
|
continue
|
|
}
|
|
|
|
deletionsInProgress := sd.nodeDeletionTracker.DeletionsCount(nodeGroup.Id())
|
|
if size-deletionsInProgress <= nodeGroup.MinSize() {
|
|
klog.V(1).Infof("Skipping %s - node group min size reached", node.Name)
|
|
sd.unremovableNodes.AddReason(node, simulator.NodeGroupMinSizeReached)
|
|
continue
|
|
}
|
|
|
|
scaleDownResourcesDelta, err := sd.resourceLimitsFinder.DeltaForNode(sd.context, node, nodeGroup, resourcesWithLimits)
|
|
if err != nil {
|
|
klog.Errorf("Error getting node resources: %v", err)
|
|
sd.unremovableNodes.AddReason(node, simulator.UnexpectedError)
|
|
continue
|
|
}
|
|
|
|
checkResult := scaleDownResourcesLeft.CheckDeltaWithinLimits(scaleDownResourcesDelta)
|
|
if checkResult.Exceeded() {
|
|
klog.V(4).Infof("Skipping %s - minimal limit exceeded for %v", node.Name, checkResult.ExceededResources)
|
|
sd.unremovableNodes.AddReason(node, simulator.MinimalResourceLimitExceeded)
|
|
for _, resource := range checkResult.ExceededResources {
|
|
switch resource {
|
|
case cloudprovider.ResourceNameCores:
|
|
metrics.RegisterSkippedScaleDownCPU()
|
|
case cloudprovider.ResourceNameMemory:
|
|
metrics.RegisterSkippedScaleDownMemory()
|
|
default:
|
|
continue
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
|
|
candidateNames = append(candidateNames, node.Name)
|
|
candidateNodeGroups[node.Name] = nodeGroup
|
|
}
|
|
|
|
if len(candidateNames) == 0 {
|
|
klog.V(1).Infof("No candidates for scale down")
|
|
return nil, nil, status.ScaleDownNoUnneeded, nil
|
|
}
|
|
|
|
// Trying to delete empty nodes in bulk. If there are no empty nodes then CA will
|
|
// try to delete not-so-empty nodes, possibly killing some pods and allowing them
|
|
// to recreate on other nodes.
|
|
emptyNodesToRemove := sd.getEmptyNodesToRemove(candidateNames, scaleDownResourcesLeft, currentTime)
|
|
emptyNodesToRemove = sd.processors.ScaleDownSetProcessor.GetNodesToRemove(sd.context, emptyNodesToRemove, sd.context.MaxEmptyBulkDelete)
|
|
if len(emptyNodesToRemove) > 0 {
|
|
var nodes []*apiv1.Node
|
|
for _, node := range emptyNodesToRemove {
|
|
// Nothing super-bad should happen if the node is removed from tracker prematurely.
|
|
simulator.RemoveNodeFromTracker(sd.usageTracker, node.Node.Name, sd.unneededNodes)
|
|
nodes = append(nodes, node.Node)
|
|
}
|
|
return nodes, nil, status.ScaleDownNodeDeleteStarted, nil
|
|
}
|
|
|
|
findNodesToRemoveStart := time.Now()
|
|
// We look for only 1 node so new hints may be incomplete.
|
|
nodesToRemove, unremovable, _, err := sd.removalSimulator.FindNodesToRemove(
|
|
candidateNames,
|
|
allNodeNames,
|
|
sd.podLocationHints,
|
|
time.Now(),
|
|
pdbs)
|
|
findNodesToRemoveDuration = time.Now().Sub(findNodesToRemoveStart)
|
|
|
|
for _, unremovableNode := range unremovable {
|
|
sd.unremovableNodes.Add(unremovableNode)
|
|
}
|
|
if err != nil {
|
|
return nil, nil, status.ScaleDownError, err.AddPrefix("Find node to remove failed: ")
|
|
}
|
|
|
|
nodesToRemove = sd.processors.ScaleDownSetProcessor.GetNodesToRemove(sd.context, nodesToRemove, 1)
|
|
if len(nodesToRemove) == 0 {
|
|
klog.V(1).Infof("No node to remove")
|
|
return nil, nil, status.ScaleDownNoNodeDeleted, nil
|
|
}
|
|
toRemove := nodesToRemove[0]
|
|
// Nothing super-bad should happen if the node is removed from tracker prematurely.
|
|
simulator.RemoveNodeFromTracker(sd.usageTracker, toRemove.Node.Name, sd.unneededNodes)
|
|
return nil, []*apiv1.Node{toRemove.Node}, status.ScaleDownNodeDeleteStarted, nil
|
|
}
|
|
|
|
// updateScaleDownMetrics registers duration of different parts of scale down.
|
|
// Separates time spent on finding nodes to remove, deleting nodes and other operations.
|
|
func updateScaleDownMetrics(scaleDownStart time.Time, findNodesToRemoveDuration *time.Duration) {
|
|
stop := time.Now()
|
|
miscDuration := stop.Sub(scaleDownStart) - *findNodesToRemoveDuration
|
|
metrics.UpdateDuration(metrics.ScaleDownFindNodesToRemove, *findNodesToRemoveDuration)
|
|
metrics.UpdateDuration(metrics.ScaleDownMiscOperations, miscDuration)
|
|
}
|
|
|
|
func (sd *ScaleDown) getEmptyNodesToRemoveNoResourceLimits(candidates []string, timestamp time.Time) []simulator.NodeToBeRemoved {
|
|
return sd.getEmptyNodesToRemove(candidates, resource.NoLimits(), timestamp)
|
|
}
|
|
|
|
// This functions finds empty nodes among passed candidates and returns a list of empty nodes
|
|
// that can be deleted at the same time.
|
|
func (sd *ScaleDown) getEmptyNodesToRemove(candidates []string, resourcesLimits resource.Limits,
|
|
timestamp time.Time) []simulator.NodeToBeRemoved {
|
|
|
|
emptyNodes := sd.removalSimulator.FindEmptyNodesToRemove(candidates, timestamp)
|
|
availabilityMap := make(map[string]int)
|
|
nodesToRemove := make([]simulator.NodeToBeRemoved, 0)
|
|
resourcesLimitsCopy := resourcesLimits.DeepCopy() // we do not want to modify input parameter
|
|
resourcesNames := sets.StringKeySet(resourcesLimits).List()
|
|
for _, nodeName := range emptyNodes {
|
|
nodeInfo, err := sd.context.ClusterSnapshot.NodeInfos().Get(nodeName)
|
|
if err != nil {
|
|
klog.Errorf("Can't retrieve node %s from snapshot, err: %v", nodeName, err)
|
|
continue
|
|
}
|
|
node := nodeInfo.Node()
|
|
nodeGroup, err := sd.context.CloudProvider.NodeGroupForNode(node)
|
|
if err != nil {
|
|
klog.Errorf("Failed to get group for %s", nodeName)
|
|
continue
|
|
}
|
|
if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
|
continue
|
|
}
|
|
var available int
|
|
var found bool
|
|
if available, found = availabilityMap[nodeGroup.Id()]; !found {
|
|
// Will be cached.
|
|
size, err := nodeGroup.TargetSize()
|
|
if err != nil {
|
|
klog.Errorf("Failed to get size for %s: %v ", nodeGroup.Id(), err)
|
|
continue
|
|
}
|
|
deletionsInProgress := sd.nodeDeletionTracker.DeletionsCount(nodeGroup.Id())
|
|
available = size - nodeGroup.MinSize() - deletionsInProgress
|
|
if available < 0 {
|
|
available = 0
|
|
}
|
|
availabilityMap[nodeGroup.Id()] = available
|
|
}
|
|
if available > 0 {
|
|
resourcesDelta, err := sd.resourceLimitsFinder.DeltaForNode(sd.context, node, nodeGroup, resourcesNames)
|
|
if err != nil {
|
|
klog.Errorf("Error: %v", err)
|
|
continue
|
|
}
|
|
checkResult := resourcesLimitsCopy.TryDecrementBy(resourcesDelta)
|
|
if checkResult.Exceeded() {
|
|
continue
|
|
}
|
|
available--
|
|
availabilityMap[nodeGroup.Id()] = available
|
|
nodesToRemove = append(nodesToRemove, simulator.NodeToBeRemoved{
|
|
Node: node,
|
|
})
|
|
}
|
|
}
|
|
|
|
return nodesToRemove
|
|
}
|