autoscaler/cluster-autoscaler/simulator/cluster.go

281 lines
10 KiB
Go

/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package simulator
import (
"flag"
"fmt"
"time"
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/autoscaler/cluster-autoscaler/utils/tpu"
apiv1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
klog "k8s.io/klog/v2"
)
var (
skipNodesWithSystemPods = flag.Bool("skip-nodes-with-system-pods", true,
"If true cluster autoscaler will never delete nodes with pods from kube-system (except for DaemonSet "+
"or mirror pods)")
skipNodesWithLocalStorage = flag.Bool("skip-nodes-with-local-storage", true,
"If true cluster autoscaler will never delete nodes with pods with local storage, e.g. EmptyDir or HostPath")
minReplicaCount = flag.Int("min-replica-count", 0,
"Minimum number or replicas that a replica set or replication controller should have to allow their pods deletion in scale down")
)
// NodeToBeRemoved contain information about a node that can be removed.
type NodeToBeRemoved struct {
// Node to be removed.
Node *apiv1.Node
// PodsToReschedule contains pods on the node that should be rescheduled elsewhere.
PodsToReschedule []*apiv1.Pod
DaemonSetPods []*apiv1.Pod
}
// UnremovableNode represents a node that can't be removed by CA.
type UnremovableNode struct {
Node *apiv1.Node
Reason UnremovableReason
BlockingPod *drain.BlockingPod
}
// UnremovableReason represents a reason why a node can't be removed by CA.
type UnremovableReason int
const (
// NoReason - sanity check, this should never be set explicitly. If this is found in the wild, it means that it was
// implicitly initialized and might indicate a bug.
NoReason UnremovableReason = iota
// ScaleDownDisabledAnnotation - node can't be removed because it has a "scale down disabled" annotation.
ScaleDownDisabledAnnotation
// NotAutoscaled - node can't be removed because it doesn't belong to an autoscaled node group.
NotAutoscaled
// NotUnneededLongEnough - node can't be removed because it wasn't unneeded for long enough.
NotUnneededLongEnough
// NotUnreadyLongEnough - node can't be removed because it wasn't unready for long enough.
NotUnreadyLongEnough
// NodeGroupMinSizeReached - node can't be removed because its node group is at its minimal size already.
NodeGroupMinSizeReached
// MinimalResourceLimitExceeded - node can't be removed because it would violate cluster-wide minimal resource limits.
MinimalResourceLimitExceeded
// CurrentlyBeingDeleted - node can't be removed because it's already in the process of being deleted.
CurrentlyBeingDeleted
// NotUnderutilized - node can't be removed because it's not underutilized.
NotUnderutilized
// NotUnneededOtherReason - node can't be removed because it's not marked as unneeded for other reasons (e.g. it wasn't inspected at all in a given autoscaler loop).
NotUnneededOtherReason
// RecentlyUnremovable - node can't be removed because it was recently found to be unremovable.
RecentlyUnremovable
// NoPlaceToMovePods - node can't be removed because there's no place to move its pods to.
NoPlaceToMovePods
// BlockedByPod - node can't be removed because a pod running on it can't be moved. The reason why should be in BlockingPod.
BlockedByPod
// UnexpectedError - node can't be removed because of an unexpected error.
UnexpectedError
)
// RemovalSimulator is a helper object for simulating node removal scenarios.
type RemovalSimulator struct {
listers kube_util.ListerRegistry
clusterSnapshot ClusterSnapshot
predicateChecker PredicateChecker
usageTracker *UsageTracker
}
// NewRemovalSimulator returns a new RemovalSimulator.
func NewRemovalSimulator(listers kube_util.ListerRegistry, clusterSnapshot ClusterSnapshot, predicateChecker PredicateChecker, usageTracker *UsageTracker) *RemovalSimulator {
return &RemovalSimulator{
listers: listers,
clusterSnapshot: clusterSnapshot,
predicateChecker: predicateChecker,
usageTracker: usageTracker,
}
}
// FindNodesToRemove finds nodes that can be removed. Returns also an
// information about good rescheduling location for each of the pods.
func (r *RemovalSimulator) FindNodesToRemove(
candidates []string,
destinations []string,
oldHints map[string]string,
timestamp time.Time,
pdbs []*policyv1.PodDisruptionBudget,
) (nodesToRemove []NodeToBeRemoved, unremovableNodes []*UnremovableNode, podReschedulingHints map[string]string, finalError errors.AutoscalerError) {
result := make([]NodeToBeRemoved, 0)
unremovable := make([]*UnremovableNode, 0)
newHints := make(map[string]string, len(oldHints))
destinationMap := make(map[string]bool, len(destinations))
for _, destination := range destinations {
destinationMap[destination] = true
}
for _, nodeName := range candidates {
rn, urn := r.CheckNodeRemoval(nodeName, destinationMap, oldHints, newHints, timestamp, pdbs)
if rn != nil {
result = append(result, *rn)
} else if urn != nil {
unremovable = append(unremovable, urn)
}
}
return result, unremovable, newHints, nil
}
// CheckNodeRemoval checks whether a specific node can be removed. Depending on
// the outcome, exactly one of (NodeToBeRemoved, UnremovableNode) will be
// populated in the return value, the other will be nil.
func (r *RemovalSimulator) CheckNodeRemoval(
nodeName string,
destinationMap map[string]bool,
oldHints map[string]string,
newHints map[string]string,
timestamp time.Time,
pdbs []*policyv1.PodDisruptionBudget,
) (*NodeToBeRemoved, *UnremovableNode) {
nodeInfo, err := r.clusterSnapshot.NodeInfos().Get(nodeName)
if err != nil {
klog.Errorf("Can't retrieve node %s from snapshot, err: %v", nodeName, err)
}
klog.V(2).Infof("%s for removal", nodeName)
if _, found := destinationMap[nodeName]; !found {
klog.V(2).Infof("nodeInfo for %s not found", nodeName)
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError}
}
podsToRemove, daemonSetPods, blockingPod, err := DetailedGetPodsForMove(nodeInfo, *skipNodesWithSystemPods,
*skipNodesWithLocalStorage, r.listers, int32(*minReplicaCount), pdbs, timestamp)
if err != nil {
klog.V(2).Infof("node %s cannot be removed: %v", nodeName, err)
if blockingPod != nil {
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: BlockedByPod, BlockingPod: blockingPod}
}
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: UnexpectedError}
}
err = r.findPlaceFor(nodeName, podsToRemove, destinationMap, oldHints, newHints, timestamp)
if err != nil {
klog.V(2).Infof("node %s is not suitable for removal: %v", nodeName, err)
return nil, &UnremovableNode{Node: nodeInfo.Node(), Reason: NoPlaceToMovePods}
}
klog.V(2).Infof("node %s may be removed", nodeName)
return &NodeToBeRemoved{
Node: nodeInfo.Node(),
PodsToReschedule: podsToRemove,
DaemonSetPods: daemonSetPods,
}, nil
}
// FindEmptyNodesToRemove finds empty nodes that can be removed.
func (r *RemovalSimulator) FindEmptyNodesToRemove(candidates []string, timestamp time.Time) []string {
result := make([]string, 0)
for _, node := range candidates {
nodeInfo, err := r.clusterSnapshot.NodeInfos().Get(node)
if err != nil {
klog.Errorf("Can't retrieve node %s from snapshot, err: %v", node, err)
continue
}
// Should block on all pods.
podsToRemove, _, _, err := FastGetPodsToMove(nodeInfo, true, true, nil, timestamp)
if err == nil && len(podsToRemove) == 0 {
result = append(result, node)
}
}
return result
}
func (r *RemovalSimulator) findPlaceFor(removedNode string, pods []*apiv1.Pod, nodes map[string]bool,
oldHints map[string]string, newHints map[string]string, timestamp time.Time) error {
if err := r.clusterSnapshot.Fork(); err != nil {
return err
}
defer func() {
err := r.clusterSnapshot.Revert()
if err != nil {
klog.Fatalf("Got error when calling ClusterSnapshot.Revert(); %v", err)
}
}()
podKey := func(pod *apiv1.Pod) string {
return fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)
}
isCandidateNode := func(nodeName string) bool {
return nodeName != removedNode && nodes[nodeName]
}
pods = tpu.ClearTPURequests(pods)
// remove pods from clusterSnapshot first
for _, pod := range pods {
if err := r.clusterSnapshot.RemovePod(pod.Namespace, pod.Name, removedNode); err != nil {
// just log error
klog.Errorf("Simulating removal of %s/%s return error; %v", pod.Namespace, pod.Name, err)
}
}
for _, podptr := range pods {
newpod := *podptr
newpod.Spec.NodeName = ""
pod := &newpod
foundPlace := false
targetNode := ""
klog.V(5).Infof("Looking for place for %s/%s", pod.Namespace, pod.Name)
if hintedNode, hasHint := oldHints[podKey(pod)]; hasHint && isCandidateNode(hintedNode) {
if err := r.predicateChecker.CheckPredicates(r.clusterSnapshot, pod, hintedNode); err == nil {
klog.V(4).Infof("Pod %s/%s can be moved to %s", pod.Namespace, pod.Name, hintedNode)
if err := r.clusterSnapshot.AddPod(pod, hintedNode); err != nil {
return fmt.Errorf("Simulating scheduling of %s/%s to %s return error; %v", pod.Namespace, pod.Name, hintedNode, err)
}
newHints[podKey(pod)] = hintedNode
foundPlace = true
targetNode = hintedNode
}
}
if !foundPlace {
newNodeName, err := r.predicateChecker.FitsAnyNodeMatching(r.clusterSnapshot, pod, func(nodeInfo *schedulerframework.NodeInfo) bool {
return isCandidateNode(nodeInfo.Node().Name)
})
if err == nil {
klog.V(4).Infof("Pod %s/%s can be moved to %s", pod.Namespace, pod.Name, newNodeName)
if err := r.clusterSnapshot.AddPod(pod, newNodeName); err != nil {
return fmt.Errorf("Simulating scheduling of %s/%s to %s return error; %v", pod.Namespace, pod.Name, newNodeName, err)
}
newHints[podKey(pod)] = newNodeName
targetNode = newNodeName
} else {
return fmt.Errorf("failed to find place for %s", podKey(pod))
}
}
r.usageTracker.RegisterUsage(removedNode, targetNode, timestamp)
}
return nil
}