Support atomic scale-down option for node groups
This commit is contained in:
parent
292a517300
commit
7e3e15bd16
|
|
@ -17,7 +17,6 @@ limitations under the License.
|
|||
package actuation
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
|
@ -25,6 +24,7 @@ import (
|
|||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown"
|
||||
|
|
@ -38,29 +38,31 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
)
|
||||
|
||||
// Actuator is responsible for draining and deleting nodes.
|
||||
type Actuator struct {
|
||||
ctx *context.AutoscalingContext
|
||||
clusterState *clusterstate.ClusterStateRegistry
|
||||
nodeDeletionTracker *deletiontracker.NodeDeletionTracker
|
||||
nodeDeletionBatcher *NodeDeletionBatcher
|
||||
evictor Evictor
|
||||
deleteOptions simulator.NodeDeleteOptions
|
||||
ctx *context.AutoscalingContext
|
||||
clusterState *clusterstate.ClusterStateRegistry
|
||||
nodeDeletionTracker *deletiontracker.NodeDeletionTracker
|
||||
nodeDeletionScheduler *GroupDeletionScheduler
|
||||
deleteOptions simulator.NodeDeleteOptions
|
||||
// TODO: Move budget processor to scaledown planner, potentially merge into PostFilteringScaleDownNodeProcessor
|
||||
// This is a larger change to the code structure which impacts some existing actuator unit tests
|
||||
// as well as Cluster Autoscaler implementations that may override ScaleDownSetProcessor
|
||||
budgetProcessor *ScaleDownBudgetProcessor
|
||||
}
|
||||
|
||||
// NewActuator returns a new instance of Actuator.
|
||||
func NewActuator(ctx *context.AutoscalingContext, csr *clusterstate.ClusterStateRegistry, ndt *deletiontracker.NodeDeletionTracker, deleteOptions simulator.NodeDeleteOptions) *Actuator {
|
||||
nbd := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval)
|
||||
ndb := NewNodeDeletionBatcher(ctx, csr, ndt, ctx.NodeDeletionBatcherInterval)
|
||||
return &Actuator{
|
||||
ctx: ctx,
|
||||
clusterState: csr,
|
||||
nodeDeletionTracker: ndt,
|
||||
nodeDeletionBatcher: nbd,
|
||||
evictor: NewDefaultEvictor(deleteOptions, ndt),
|
||||
deleteOptions: deleteOptions,
|
||||
ctx: ctx,
|
||||
clusterState: csr,
|
||||
nodeDeletionTracker: ndt,
|
||||
nodeDeletionScheduler: NewGroupDeletionScheduler(ctx, ndt, ndb, deleteOptions, NewDefaultEvictor(deleteOptions, ndt)),
|
||||
budgetProcessor: NewScaleDownBudgetProcessor(ctx, ndt),
|
||||
deleteOptions: deleteOptions,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +84,7 @@ func (a *Actuator) StartDeletion(empty, drain []*apiv1.Node) (*status.ScaleDownS
|
|||
results, ts := a.nodeDeletionTracker.DeletionResults()
|
||||
scaleDownStatus := &status.ScaleDownStatus{NodeDeleteResults: results, NodeDeleteResultsAsOf: ts}
|
||||
|
||||
emptyToDelete, drainToDelete := a.cropNodesToBudgets(empty, drain)
|
||||
emptyToDelete, drainToDelete := a.budgetProcessor.CropNodes(empty, drain)
|
||||
if len(emptyToDelete) == 0 && len(drainToDelete) == 0 {
|
||||
scaleDownStatus.Result = status.ScaleDownNoNodeDeleted
|
||||
return scaleDownStatus, nil
|
||||
|
|
@ -95,12 +97,8 @@ func (a *Actuator) StartDeletion(empty, drain []*apiv1.Node) (*status.ScaleDownS
|
|||
return scaleDownStatus, err
|
||||
}
|
||||
|
||||
emptyScaledDown, err := a.deleteAsyncEmpty(emptyToDelete)
|
||||
emptyScaledDown := a.deleteAsyncEmpty(emptyToDelete)
|
||||
scaleDownStatus.ScaledDownNodes = append(scaleDownStatus.ScaledDownNodes, emptyScaledDown...)
|
||||
if err != nil {
|
||||
scaleDownStatus.Result = status.ScaleDownError
|
||||
return scaleDownStatus, err
|
||||
}
|
||||
}
|
||||
|
||||
if len(drainToDelete) > 0 {
|
||||
|
|
@ -121,87 +119,76 @@ func (a *Actuator) StartDeletion(empty, drain []*apiv1.Node) (*status.ScaleDownS
|
|||
}
|
||||
|
||||
// deleteAsyncEmpty immediately starts deletions asynchronously.
|
||||
// scaledDownNodes return value contains all nodes for which deletion successfully started. It's valid and should be consumed
|
||||
// even if err != nil.
|
||||
func (a *Actuator) deleteAsyncEmpty(empty []*apiv1.Node) (scaledDownNodes []*status.ScaleDownNode, err errors.AutoscalerError) {
|
||||
var groupIds []string
|
||||
var validNodes []*apiv1.Node
|
||||
for _, emptyNode := range empty {
|
||||
klog.V(0).Infof("Scale-down: removing empty node %q", emptyNode.Name)
|
||||
a.ctx.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDownEmpty", "Scale-down: removing empty node %q", emptyNode.Name)
|
||||
// scaledDownNodes return value contains all nodes for which deletion successfully started.
|
||||
func (a *Actuator) deleteAsyncEmpty(nodeBuckets []*nodeBucket) (reportedSDNodes []*status.ScaleDownNode) {
|
||||
for _, bucket := range nodeBuckets {
|
||||
for _, node := range bucket.nodes {
|
||||
klog.V(0).Infof("Scale-down: removing empty node %q", node.Name)
|
||||
a.ctx.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDownEmpty", "Scale-down: removing empty node %q", node.Name)
|
||||
|
||||
nodeGroup, err := a.ctx.CloudProvider.NodeGroupForNode(emptyNode)
|
||||
if err != nil || nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
||||
klog.Errorf("Failed to find node group for %s: %v", emptyNode.Name, err)
|
||||
continue
|
||||
if sdNode, err := a.scaleDownNodeToReport(node, false); err == nil {
|
||||
reportedSDNodes = append(reportedSDNodes, sdNode)
|
||||
} else {
|
||||
klog.Errorf("Scale-down: couldn't report scaled down node, err: %v", err)
|
||||
}
|
||||
|
||||
a.nodeDeletionTracker.StartDeletion(bucket.group.Id(), node.Name)
|
||||
}
|
||||
|
||||
if sdNode, err := a.scaleDownNodeToReport(emptyNode, false); err == nil {
|
||||
scaledDownNodes = append(scaledDownNodes, sdNode)
|
||||
} else {
|
||||
klog.Errorf("Scale-down: couldn't report scaled down node, err: %v", err)
|
||||
}
|
||||
|
||||
a.nodeDeletionTracker.StartDeletion(nodeGroup.Id(), emptyNode.Name)
|
||||
groupIds = append(groupIds, nodeGroup.Id())
|
||||
validNodes = append(validNodes, emptyNode)
|
||||
}
|
||||
|
||||
go a.deleteNodesAsync(validNodes, groupIds, false)
|
||||
for _, bucket := range nodeBuckets {
|
||||
go a.deleteNodesAsync(bucket.nodes, bucket.group, false)
|
||||
}
|
||||
|
||||
return scaledDownNodes, nil
|
||||
return reportedSDNodes
|
||||
}
|
||||
|
||||
// taintNodesSync synchronously taints all provided nodes with NoSchedule. If tainting fails for any of the nodes, already
|
||||
// applied taints are cleaned up.
|
||||
func (a *Actuator) taintNodesSync(nodes []*apiv1.Node) errors.AutoscalerError {
|
||||
func (a *Actuator) taintNodesSync(nodeBuckets []*nodeBucket) errors.AutoscalerError {
|
||||
var taintedNodes []*apiv1.Node
|
||||
for _, node := range nodes {
|
||||
err := a.taintNode(node)
|
||||
if err != nil {
|
||||
a.ctx.Recorder.Eventf(node, apiv1.EventTypeWarning, "ScaleDownFailed", "failed to mark the node as toBeDeleted/unschedulable: %v", err)
|
||||
// Clean up already applied taints in case of issues.
|
||||
for _, taintedNode := range taintedNodes {
|
||||
_, _ = taints.CleanToBeDeleted(taintedNode, a.ctx.ClientSet, a.ctx.CordonNodeBeforeTerminate)
|
||||
for _, bucket := range nodeBuckets {
|
||||
for _, node := range bucket.nodes {
|
||||
err := a.taintNode(node)
|
||||
if err != nil {
|
||||
a.ctx.Recorder.Eventf(node, apiv1.EventTypeWarning, "ScaleDownFailed", "failed to mark the node as toBeDeleted/unschedulable: %v", err)
|
||||
// Clean up already applied taints in case of issues.
|
||||
for _, taintedNode := range taintedNodes {
|
||||
_, _ = taints.CleanToBeDeleted(taintedNode, a.ctx.ClientSet, a.ctx.CordonNodeBeforeTerminate)
|
||||
}
|
||||
return errors.NewAutoscalerError(errors.ApiCallError, "couldn't taint node %q with ToBeDeleted", node)
|
||||
}
|
||||
return errors.NewAutoscalerError(errors.ApiCallError, "couldn't taint node %q with ToBeDeleted", node)
|
||||
taintedNodes = append(taintedNodes, node)
|
||||
}
|
||||
taintedNodes = append(taintedNodes, node)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteAsyncDrain asynchronously starts deletions with drain for all provided nodes. scaledDownNodes return value contains all nodes for which
|
||||
// deletion successfully started.
|
||||
func (a *Actuator) deleteAsyncDrain(drain []*apiv1.Node) (scaledDownNodes []*status.ScaleDownNode) {
|
||||
var groupIds []string
|
||||
var validNodes []*apiv1.Node
|
||||
for _, drainNode := range drain {
|
||||
if sdNode, err := a.scaleDownNodeToReport(drainNode, true); err == nil {
|
||||
klog.V(0).Infof("Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods))
|
||||
a.ctx.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDown", "Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods))
|
||||
scaledDownNodes = append(scaledDownNodes, sdNode)
|
||||
} else {
|
||||
klog.Errorf("Scale-down: couldn't report scaled down node, err: %v", err)
|
||||
}
|
||||
func (a *Actuator) deleteAsyncDrain(nodeBuckets []*nodeBucket) (reportedSDNodes []*status.ScaleDownNode) {
|
||||
for _, bucket := range nodeBuckets {
|
||||
for _, drainNode := range bucket.nodes {
|
||||
if sdNode, err := a.scaleDownNodeToReport(drainNode, true); err == nil {
|
||||
klog.V(0).Infof("Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods))
|
||||
a.ctx.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDown", "Scale-down: removing node %s, utilization: %v, pods to reschedule: %s", drainNode.Name, sdNode.UtilInfo, joinPodNames(sdNode.EvictedPods))
|
||||
reportedSDNodes = append(reportedSDNodes, sdNode)
|
||||
} else {
|
||||
klog.Errorf("Scale-down: couldn't report scaled down node, err: %v", err)
|
||||
}
|
||||
|
||||
nodeGroup, err := a.ctx.CloudProvider.NodeGroupForNode(drainNode)
|
||||
if err != nil || nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
||||
klog.Errorf("Failed to find node group for %s: %v", drainNode.Name, err)
|
||||
continue
|
||||
a.nodeDeletionTracker.StartDeletionWithDrain(bucket.group.Id(), drainNode.Name)
|
||||
}
|
||||
|
||||
a.nodeDeletionTracker.StartDeletionWithDrain(nodeGroup.Id(), drainNode.Name)
|
||||
groupIds = append(groupIds, nodeGroup.Id())
|
||||
validNodes = append(validNodes, drainNode)
|
||||
}
|
||||
|
||||
go a.deleteNodesAsync(validNodes, groupIds, true)
|
||||
for _, bucket := range nodeBuckets {
|
||||
go a.deleteNodesAsync(bucket.nodes, bucket.group, true)
|
||||
}
|
||||
|
||||
return scaledDownNodes
|
||||
return reportedSDNodes
|
||||
}
|
||||
|
||||
func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, groupIds []string, drain bool) {
|
||||
func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, nodeGroup cloudprovider.NodeGroup, drain bool) {
|
||||
var pdbs []*policyv1.PodDisruptionBudget
|
||||
var registry kube_util.ListerRegistry
|
||||
|
||||
|
|
@ -215,12 +202,11 @@ func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, groupIds []string, drai
|
|||
}
|
||||
|
||||
clusterSnapshot, err := a.createSnapshot(nodes)
|
||||
|
||||
if err != nil {
|
||||
klog.Errorf("Scale-down: couldn't create delete snapshot, err: %v", err)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "createSnapshot returned error %v", err)}
|
||||
for i, node := range nodes {
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, groupIds[i], drain, a.nodeDeletionTracker, "failed to create delete snapshot", nodeDeleteResult)
|
||||
for _, node := range nodes {
|
||||
a.nodeDeletionScheduler.RollbackNodeDeletion(node, nodeGroup.Id(), drain, "failed to create delete snapshot", nodeDeleteResult)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
@ -230,8 +216,8 @@ func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, groupIds []string, drai
|
|||
if err != nil {
|
||||
klog.Errorf("Scale-down: couldn't fetch pod disruption budgets, err: %v", err)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "podDisruptionBudgetLister.List returned error %v", err)}
|
||||
for i, node := range nodes {
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, groupIds[i], drain, a.nodeDeletionTracker, "failed to fetch pod disruption budgets", nodeDeleteResult)
|
||||
for _, node := range nodes {
|
||||
a.nodeDeletionScheduler.RollbackNodeDeletion(node, nodeGroup.Id(), drain, "failed to fetch pod disruption budgets", nodeDeleteResult)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
@ -239,12 +225,12 @@ func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, groupIds []string, drai
|
|||
registry = a.ctx.ListerRegistry
|
||||
}
|
||||
|
||||
for i, node := range nodes {
|
||||
for _, node := range nodes {
|
||||
nodeInfo, err := clusterSnapshot.NodeInfos().Get(node.Name)
|
||||
if err != nil {
|
||||
klog.Errorf("Scale-down: can't retrieve node %q from snapshot, err: %v", node.Name, err)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "nodeInfos.Get for %q returned error: %v", node.Name, err)}
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, groupIds[i], drain, a.nodeDeletionTracker, "failed to get node info", nodeDeleteResult)
|
||||
a.nodeDeletionScheduler.RollbackNodeDeletion(node, nodeGroup.Id(), drain, "failed to get node info", nodeDeleteResult)
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -252,18 +238,18 @@ func (a *Actuator) deleteNodesAsync(nodes []*apiv1.Node, groupIds []string, drai
|
|||
if err != nil {
|
||||
klog.Errorf("Scale-down: couldn't delete node %q, err: %v", node.Name, err)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "GetPodsToMove for %q returned error: %v", node.Name, err)}
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, groupIds[i], drain, a.nodeDeletionTracker, "failed to get pods to move on node", nodeDeleteResult)
|
||||
a.nodeDeletionScheduler.RollbackNodeDeletion(node, nodeGroup.Id(), drain, "failed to get pods to move on node", nodeDeleteResult)
|
||||
continue
|
||||
}
|
||||
|
||||
if !drain && len(podsToRemove) != 0 {
|
||||
klog.Errorf("Scale-down: couldn't delete empty node %q, new pods got scheduled", node.Name)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "failed to delete empty node %q, new pods scheduled", node.Name)}
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, groupIds[i], drain, a.nodeDeletionTracker, "node is not empty", nodeDeleteResult)
|
||||
a.nodeDeletionScheduler.RollbackNodeDeletion(node, nodeGroup.Id(), drain, "node is not empty", nodeDeleteResult)
|
||||
continue
|
||||
}
|
||||
|
||||
go a.scheduleDeletion(nodeInfo, groupIds[i], drain)
|
||||
go a.nodeDeletionScheduler.ScheduleDeletion(nodeInfo, nodeGroup, len(nodes), drain)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -304,40 +290,6 @@ func (a *Actuator) taintNode(node *apiv1.Node) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (a *Actuator) prepareNodeForDeletion(nodeInfo *framework.NodeInfo, drain bool) status.NodeDeleteResult {
|
||||
node := nodeInfo.Node()
|
||||
if drain {
|
||||
if evictionResults, err := a.evictor.DrainNode(a.ctx, nodeInfo); err != nil {
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
|
||||
}
|
||||
} else {
|
||||
if err := a.evictor.EvictDaemonSetPods(a.ctx, nodeInfo, time.Now()); err != nil {
|
||||
// Evicting DS pods is best-effort, so proceed with the deletion even if there are errors.
|
||||
klog.Warningf("Error while evicting DS pods from an empty node %q: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
if err := WaitForDelayDeletion(node, a.ctx.ListerRegistry.AllNodeLister(), a.ctx.AutoscalingOptions.NodeDeletionDelayTimeout); err != nil {
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToDelete, Err: err}
|
||||
}
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteOk}
|
||||
}
|
||||
|
||||
// scheduleDeletion schedule the deletion on of the provided node by adding a node to NodeDeletionBatcher. If drain is true, the node is drained before being deleted.
|
||||
func (a *Actuator) scheduleDeletion(nodeInfo *framework.NodeInfo, nodeGroupId string, drain bool) {
|
||||
node := nodeInfo.Node()
|
||||
nodeDeleteResult := a.prepareNodeForDeletion(nodeInfo, drain)
|
||||
if nodeDeleteResult.Err != nil {
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, nodeGroupId, drain, a.nodeDeletionTracker, "prepareNodeForDeletion failed", nodeDeleteResult)
|
||||
return
|
||||
}
|
||||
err := a.nodeDeletionBatcher.AddNode(node, drain)
|
||||
if err != nil {
|
||||
klog.Errorf("Couldn't add node to nodeDeletionBatcher, err: %v", err)
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "nodeDeletionBatcher.AddNode for %s returned error: %v", node.Name, err)}
|
||||
CleanUpAndRecordFailedScaleDownEvent(a.ctx, node, nodeGroupId, drain, a.nodeDeletionTracker, "failed add node to the nodeDeletionBatche", nodeDeleteResult)
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Actuator) createSnapshot(nodes []*apiv1.Node) (clustersnapshot.ClusterSnapshot, error) {
|
||||
knownNodes := make(map[string]bool)
|
||||
snapshot := clustersnapshot.NewBasicClusterSnapshot()
|
||||
|
|
|
|||
|
|
@ -35,12 +35,14 @@ import (
|
|||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/core/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator/utilization"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
|
||||
|
|
@ -48,12 +50,14 @@ import (
|
|||
)
|
||||
|
||||
func TestStartDeletion(t *testing.T) {
|
||||
testNg := testprovider.NewTestNodeGroup("test-ng", 0, 100, 3, true, false, "n1-standard-2", nil, nil)
|
||||
testNg := testprovider.NewTestNodeGroup("test", 0, 100, 3, true, false, "n1-standard-2", nil, nil)
|
||||
atomic2 := sizedNodeGroup("atomic-2", 2, true)
|
||||
atomic4 := sizedNodeGroup("atomic-4", 4, true)
|
||||
toBeDeletedTaint := apiv1.Taint{Key: taints.ToBeDeletedTaint, Effect: apiv1.TaintEffectNoSchedule}
|
||||
|
||||
for tn, tc := range map[string]struct {
|
||||
emptyNodes []*apiv1.Node
|
||||
drainNodes []*apiv1.Node
|
||||
emptyNodes []*nodeBucket
|
||||
drainNodes []*nodeBucket
|
||||
pods map[string][]*apiv1.Pod
|
||||
failedPodDrain map[string]bool
|
||||
failedNodeDeletion map[string]bool
|
||||
|
|
@ -73,152 +77,208 @@ func TestStartDeletion(t *testing.T) {
|
|||
},
|
||||
},
|
||||
"empty node deletion": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"empty-node-0", "empty-node-1"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"empty-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"empty atomic node deletion": {
|
||||
emptyNodes: generatenodeBucketList(atomic2, 0, 2),
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("atomic-2-node-0"),
|
||||
NodeGroup: atomic2,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("atomic-2-node-1"),
|
||||
NodeGroup: atomic2,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"atomic-2-node-0", "atomic-2-node-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"atomic-2-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"atomic-2-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"atomic-2-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"atomic-2-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"deletion with drain": {
|
||||
drainNodes: generateNodes(2, "drain"),
|
||||
drainNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(2, "drain-node-0"),
|
||||
"drain-node-1": removablePods(2, "drain-node-1"),
|
||||
"test-node-0": removablePods(2, "test-node-0"),
|
||||
"test-node-1": removablePods(2, "test-node-1"),
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("drain-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-0"),
|
||||
EvictedPods: removablePods(2, "test-node-0"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-1"),
|
||||
EvictedPods: removablePods(2, "test-node-1"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"drain-node-0", "drain-node-1"},
|
||||
wantDeletedPods: []string{"drain-node-0-pod-0", "drain-node-0-pod-1", "drain-node-1-pod-0", "drain-node-1-pod-1"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
|
||||
wantDeletedPods: []string{"test-node-0-pod-0", "test-node-0-pod-1", "test-node-1-pod-0", "test-node-1-pod-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"drain-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"drain-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"drain-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"drain-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"empty and drain deletion work correctly together": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
drainNodes: generateNodes(2, "drain"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
drainNodes: generatenodeBucketList(testNg, 2, 4),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(2, "drain-node-0"),
|
||||
"drain-node-1": removablePods(2, "drain-node-1"),
|
||||
"test-node-2": removablePods(2, "test-node-2"),
|
||||
"test-node-3": removablePods(2, "test-node-3"),
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-0"),
|
||||
Node: generateNode("test-node-2"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-0"),
|
||||
EvictedPods: removablePods(2, "test-node-2"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-1"),
|
||||
Node: generateNode("test-node-3"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-1"),
|
||||
EvictedPods: removablePods(2, "test-node-3"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"empty-node-0", "empty-node-1", "drain-node-0", "drain-node-1"},
|
||||
wantDeletedPods: []string{"drain-node-0-pod-0", "drain-node-0-pod-1", "drain-node-1-pod-0", "drain-node-1-pod-1"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1", "test-node-2", "test-node-3"},
|
||||
wantDeletedPods: []string{"test-node-2-pod-0", "test-node-2-pod-1", "test-node-3-pod-0", "test-node-3-pod-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"drain-node-0": {
|
||||
"test-node-2": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"drain-node-1": {
|
||||
"test-node-3": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"empty-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"drain-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"drain-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-2": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-3": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"failure to taint empty node stops deletion and cleans already applied taints": {
|
||||
emptyNodes: generateNodes(4, "empty"),
|
||||
drainNodes: generateNodes(1, "drain"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 4),
|
||||
drainNodes: generatenodeBucketList(testNg, 4, 5),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(2, "drain-node-0"),
|
||||
"test-node-4": removablePods(2, "test-node-4"),
|
||||
},
|
||||
failedNodeTaint: map[string]bool{"empty-node-2": true},
|
||||
failedNodeTaint: map[string]bool{"test-node-2": true},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownError,
|
||||
ScaledDownNodes: nil,
|
||||
},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
},
|
||||
wantErr: cmpopts.AnyError,
|
||||
},
|
||||
"failure to taint empty atomic node stops deletion and cleans already applied taints": {
|
||||
emptyNodes: generatenodeBucketList(atomic4, 0, 4),
|
||||
drainNodes: generatenodeBucketList(testNg, 4, 5),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"test-node-4": removablePods(2, "test-node-4"),
|
||||
},
|
||||
failedNodeTaint: map[string]bool{"atomic-4-node-2": true},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownError,
|
||||
ScaledDownNodes: nil,
|
||||
},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"atomic-4-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"atomic-4-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
|
|
@ -226,259 +286,301 @@ func TestStartDeletion(t *testing.T) {
|
|||
wantErr: cmpopts.AnyError,
|
||||
},
|
||||
"failure to taint drain node stops further deletion and cleans already applied taints": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
drainNodes: generateNodes(4, "drain"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
drainNodes: generatenodeBucketList(testNg, 2, 6),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(2, "drain-node-0"),
|
||||
"drain-node-1": removablePods(2, "drain-node-1"),
|
||||
"drain-node-2": removablePods(2, "drain-node-2"),
|
||||
"drain-node-3": removablePods(2, "drain-node-3"),
|
||||
"test-node-2": removablePods(2, "test-node-2"),
|
||||
"test-node-3": removablePods(2, "test-node-3"),
|
||||
"test-node-4": removablePods(2, "test-node-4"),
|
||||
"test-node-5": removablePods(2, "test-node-5"),
|
||||
},
|
||||
failedNodeTaint: map[string]bool{"drain-node-2": true},
|
||||
failedNodeTaint: map[string]bool{"test-node-2": true},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownError,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"empty-node-0", "empty-node-1"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"empty-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
wantErr: cmpopts.AnyError,
|
||||
},
|
||||
"failure to taint drain atomic node stops further deletion and cleans already applied taints": {
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
drainNodes: generatenodeBucketList(atomic4, 2, 6),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"atomic-4-node-2": removablePods(2, "atomic-4-node-2"),
|
||||
"atomic-4-node-3": removablePods(2, "atomic-4-node-3"),
|
||||
"atomic-4-node-4": removablePods(2, "atomic-4-node-4"),
|
||||
"atomic-4-node-5": removablePods(2, "atomic-4-node-5"),
|
||||
},
|
||||
failedNodeTaint: map[string]bool{"atomic-4-node-2": true},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownError,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
wantErr: cmpopts.AnyError,
|
||||
},
|
||||
"nodes that failed drain are correctly reported in results": {
|
||||
drainNodes: generateNodes(4, "drain"),
|
||||
drainNodes: generatenodeBucketList(testNg, 0, 4),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(3, "drain-node-0"),
|
||||
"drain-node-1": removablePods(3, "drain-node-1"),
|
||||
"drain-node-2": removablePods(3, "drain-node-2"),
|
||||
"drain-node-3": removablePods(3, "drain-node-3"),
|
||||
"test-node-0": removablePods(3, "test-node-0"),
|
||||
"test-node-1": removablePods(3, "test-node-1"),
|
||||
"test-node-2": removablePods(3, "test-node-2"),
|
||||
"test-node-3": removablePods(3, "test-node-3"),
|
||||
},
|
||||
failedPodDrain: map[string]bool{
|
||||
"drain-node-0-pod-0": true,
|
||||
"drain-node-0-pod-1": true,
|
||||
"drain-node-2-pod-1": true,
|
||||
"test-node-0-pod-0": true,
|
||||
"test-node-0-pod-1": true,
|
||||
"test-node-2-pod-1": true,
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("drain-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(3, "drain-node-0"),
|
||||
EvictedPods: removablePods(3, "test-node-0"),
|
||||
UtilInfo: generateUtilInfo(3./8., 3./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(3, "drain-node-1"),
|
||||
EvictedPods: removablePods(3, "test-node-1"),
|
||||
UtilInfo: generateUtilInfo(3./8., 3./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-2"),
|
||||
Node: generateNode("test-node-2"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(3, "drain-node-2"),
|
||||
EvictedPods: removablePods(3, "test-node-2"),
|
||||
UtilInfo: generateUtilInfo(3./8., 3./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-3"),
|
||||
Node: generateNode("test-node-3"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(3, "drain-node-3"),
|
||||
EvictedPods: removablePods(3, "test-node-3"),
|
||||
UtilInfo: generateUtilInfo(3./8., 3./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"drain-node-1", "drain-node-3"},
|
||||
wantDeletedNodes: []string{"test-node-1", "test-node-3"},
|
||||
wantDeletedPods: []string{
|
||||
"drain-node-0-pod-2",
|
||||
"drain-node-1-pod-0", "drain-node-1-pod-1", "drain-node-1-pod-2",
|
||||
"drain-node-2-pod-0", "drain-node-2-pod-2",
|
||||
"drain-node-3-pod-0", "drain-node-3-pod-1", "drain-node-3-pod-2",
|
||||
"test-node-0-pod-2",
|
||||
"test-node-1-pod-0", "test-node-1-pod-1", "test-node-1-pod-2",
|
||||
"test-node-2-pod-0", "test-node-2-pod-2",
|
||||
"test-node-3-pod-0", "test-node-3-pod-1", "test-node-3-pod-2",
|
||||
},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"drain-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"drain-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"drain-node-2": {
|
||||
"test-node-2": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"drain-node-3": {
|
||||
"test-node-3": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"drain-node-0": {
|
||||
"test-node-0": {
|
||||
ResultType: status.NodeDeleteErrorFailedToEvictPods,
|
||||
Err: cmpopts.AnyError,
|
||||
PodEvictionResults: map[string]status.PodEvictionResult{
|
||||
"drain-node-0-pod-0": {Pod: removablePod("drain-node-0-pod-0", "drain-node-0"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"drain-node-0-pod-1": {Pod: removablePod("drain-node-0-pod-1", "drain-node-0"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"drain-node-0-pod-2": {Pod: removablePod("drain-node-0-pod-2", "drain-node-0")},
|
||||
"test-node-0-pod-0": {Pod: removablePod("test-node-0-pod-0", "test-node-0"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"test-node-0-pod-1": {Pod: removablePod("test-node-0-pod-1", "test-node-0"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"test-node-0-pod-2": {Pod: removablePod("test-node-0-pod-2", "test-node-0")},
|
||||
},
|
||||
},
|
||||
"drain-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"drain-node-2": {
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-2": {
|
||||
ResultType: status.NodeDeleteErrorFailedToEvictPods,
|
||||
Err: cmpopts.AnyError,
|
||||
PodEvictionResults: map[string]status.PodEvictionResult{
|
||||
"drain-node-2-pod-0": {Pod: removablePod("drain-node-2-pod-0", "drain-node-2")},
|
||||
"drain-node-2-pod-1": {Pod: removablePod("drain-node-2-pod-1", "drain-node-2"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"drain-node-2-pod-2": {Pod: removablePod("drain-node-2-pod-2", "drain-node-2")},
|
||||
"test-node-2-pod-0": {Pod: removablePod("test-node-2-pod-0", "test-node-2")},
|
||||
"test-node-2-pod-1": {Pod: removablePod("test-node-2-pod-1", "test-node-2"), Err: cmpopts.AnyError, TimedOut: true},
|
||||
"test-node-2-pod-2": {Pod: removablePod("test-node-2-pod-2", "test-node-2")},
|
||||
},
|
||||
},
|
||||
"drain-node-3": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-3": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"nodes that failed deletion are correctly reported in results": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
drainNodes: generateNodes(2, "drain"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
drainNodes: generatenodeBucketList(testNg, 2, 4),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"drain-node-0": removablePods(2, "drain-node-0"),
|
||||
"drain-node-1": removablePods(2, "drain-node-1"),
|
||||
"test-node-2": removablePods(2, "test-node-2"),
|
||||
"test-node-3": removablePods(2, "test-node-3"),
|
||||
},
|
||||
failedNodeDeletion: map[string]bool{
|
||||
"empty-node-1": true,
|
||||
"drain-node-1": true,
|
||||
"test-node-1": true,
|
||||
"test-node-3": true,
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-0"),
|
||||
Node: generateNode("test-node-2"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-0"),
|
||||
EvictedPods: removablePods(2, "test-node-2"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("drain-node-1"),
|
||||
Node: generateNode("test-node-3"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: removablePods(2, "drain-node-1"),
|
||||
EvictedPods: removablePods(2, "test-node-3"),
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"empty-node-0", "drain-node-0"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-2"},
|
||||
wantDeletedPods: []string{
|
||||
"drain-node-0-pod-0", "drain-node-0-pod-1",
|
||||
"drain-node-1-pod-0", "drain-node-1-pod-1",
|
||||
"test-node-2-pod-0", "test-node-2-pod-1",
|
||||
"test-node-3-pod-0", "test-node-3-pod-1",
|
||||
},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"drain-node-0": {
|
||||
"test-node-2": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"drain-node-1": {
|
||||
"test-node-3": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"empty-node-1": {ResultType: status.NodeDeleteErrorFailedToDelete, Err: cmpopts.AnyError},
|
||||
"drain-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"drain-node-1": {ResultType: status.NodeDeleteErrorFailedToDelete, Err: cmpopts.AnyError},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteErrorFailedToDelete, Err: cmpopts.AnyError},
|
||||
"test-node-2": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-3": {ResultType: status.NodeDeleteErrorFailedToDelete, Err: cmpopts.AnyError},
|
||||
},
|
||||
},
|
||||
"DS pods are evicted from empty nodes, but don't block deletion on error": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"empty-node-0": {generateDsPod("empty-node-0-ds-pod-0", "empty-node-0"), generateDsPod("empty-node-0-ds-pod-1", "empty-node-0")},
|
||||
"empty-node-1": {generateDsPod("empty-node-1-ds-pod-0", "empty-node-1"), generateDsPod("empty-node-1-ds-pod-1", "empty-node-1")},
|
||||
"test-node-0": {generateDsPod("test-node-0-ds-pod-0", "test-node-0"), generateDsPod("test-node-0-ds-pod-1", "test-node-0")},
|
||||
"test-node-1": {generateDsPod("test-node-1-ds-pod-0", "test-node-1"), generateDsPod("test-node-1-ds-pod-1", "test-node-1")},
|
||||
},
|
||||
failedPodDrain: map[string]bool{"empty-node-1-ds-pod-0": true},
|
||||
failedPodDrain: map[string]bool{"test-node-1-ds-pod-0": true},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"empty-node-0", "empty-node-1"},
|
||||
wantDeletedPods: []string{"empty-node-0-ds-pod-0", "empty-node-0-ds-pod-1", "empty-node-1-ds-pod-1"},
|
||||
wantDeletedNodes: []string{"test-node-0", "test-node-1"},
|
||||
wantDeletedPods: []string{"test-node-0-ds-pod-0", "test-node-0-ds-pod-1", "test-node-1-ds-pod-1"},
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"empty-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"empty-node-1": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteOk},
|
||||
},
|
||||
},
|
||||
"nodes with pods are not deleted if the node is passed as empty": {
|
||||
emptyNodes: generateNodes(2, "empty-but-with-pods"),
|
||||
emptyNodes: generatenodeBucketList(testNg, 0, 2),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"empty-but-with-pods-node-0": removablePods(2, "empty-but-with-pods-node-0"),
|
||||
"empty-but-with-pods-node-1": removablePods(2, "empty-but-with-pods-node-1"),
|
||||
"test-node-0": removablePods(2, "test-node-0"),
|
||||
"test-node-1": removablePods(2, "test-node-1"),
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("empty-but-with-pods-node-0"),
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("empty-but-with-pods-node-1"),
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
|
|
@ -488,18 +590,82 @@ func TestStartDeletion(t *testing.T) {
|
|||
wantDeletedNodes: nil,
|
||||
wantDeletedPods: nil,
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"empty-but-with-pods-node-0": {
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"empty-but-with-pods-node-1": {
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"empty-but-with-pods-node-0": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
"empty-but-with-pods-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
"test-node-0": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
"test-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
},
|
||||
},
|
||||
"atomic nodes with pods are not deleted if the node is passed as empty": {
|
||||
emptyNodes: append(
|
||||
generatenodeBucketList(testNg, 0, 2),
|
||||
generatenodeBucketList(atomic2, 0, 2)...,
|
||||
),
|
||||
pods: map[string][]*apiv1.Pod{
|
||||
"test-node-1": removablePods(2, "test-node-1"),
|
||||
"atomic-2-node-1": removablePods(2, "atomic-2-node-1"),
|
||||
},
|
||||
wantStatus: &status.ScaleDownStatus{
|
||||
Result: status.ScaleDownNodeDeleteStarted,
|
||||
ScaledDownNodes: []*status.ScaleDownNode{
|
||||
{
|
||||
Node: generateNode("test-node-0"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("test-node-1"),
|
||||
NodeGroup: testNg,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
{
|
||||
Node: generateNode("atomic-2-node-0"),
|
||||
NodeGroup: atomic2,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(0, 0),
|
||||
},
|
||||
{
|
||||
Node: generateNode("atomic-2-node-1"),
|
||||
NodeGroup: atomic2,
|
||||
EvictedPods: nil,
|
||||
UtilInfo: generateUtilInfo(2./8., 2./8.),
|
||||
},
|
||||
},
|
||||
},
|
||||
wantDeletedNodes: []string{"test-node-0"},
|
||||
wantDeletedPods: nil,
|
||||
wantTaintUpdates: map[string][][]apiv1.Taint{
|
||||
"test-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
},
|
||||
"test-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"atomic-2-node-0": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
"atomic-2-node-1": {
|
||||
{toBeDeletedTaint},
|
||||
{},
|
||||
},
|
||||
},
|
||||
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
|
||||
"test-node-0": {ResultType: status.NodeDeleteOk},
|
||||
"test-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
"atomic-2-node-0": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
"atomic-2-node-1": {ResultType: status.NodeDeleteErrorInternal, Err: cmpopts.AnyError},
|
||||
},
|
||||
},
|
||||
} {
|
||||
|
|
@ -508,13 +674,20 @@ func TestStartDeletion(t *testing.T) {
|
|||
// of a single test case, and the goroutines eventually access tc in fakeClient hooks below.
|
||||
tc := tc
|
||||
// Insert all nodes into a map to support live node updates and GETs.
|
||||
allEmptyNodes, allDrainNodes := []*apiv1.Node{}, []*apiv1.Node{}
|
||||
nodesByName := make(map[string]*apiv1.Node)
|
||||
nodesLock := sync.Mutex{}
|
||||
for _, node := range tc.emptyNodes {
|
||||
nodesByName[node.Name] = node
|
||||
for _, bucket := range tc.emptyNodes {
|
||||
allEmptyNodes = append(allEmptyNodes, bucket.nodes...)
|
||||
for _, node := range allEmptyNodes {
|
||||
nodesByName[node.Name] = node
|
||||
}
|
||||
}
|
||||
for _, node := range tc.drainNodes {
|
||||
nodesByName[node.Name] = node
|
||||
for _, bucket := range tc.drainNodes {
|
||||
allDrainNodes = append(allDrainNodes, bucket.nodes...)
|
||||
for _, node := range bucket.nodes {
|
||||
nodesByName[node.Name] = node
|
||||
}
|
||||
}
|
||||
|
||||
// Set up a fake k8s client to hook and verify certain actions.
|
||||
|
|
@ -591,10 +764,19 @@ func TestStartDeletion(t *testing.T) {
|
|||
deletedNodes <- node
|
||||
return nil
|
||||
})
|
||||
testNg.SetCloudProvider(provider)
|
||||
provider.InsertNodeGroup(testNg)
|
||||
for _, node := range nodesByName {
|
||||
provider.AddNode("test-ng", node)
|
||||
for _, bucket := range tc.emptyNodes {
|
||||
bucket.group.(*testprovider.TestNodeGroup).SetCloudProvider(provider)
|
||||
provider.InsertNodeGroup(bucket.group)
|
||||
for _, node := range bucket.nodes {
|
||||
provider.AddNode(bucket.group.Id(), node)
|
||||
}
|
||||
}
|
||||
for _, bucket := range tc.drainNodes {
|
||||
bucket.group.(*testprovider.TestNodeGroup).SetCloudProvider(provider)
|
||||
provider.InsertNodeGroup(bucket.group)
|
||||
for _, node := range bucket.nodes {
|
||||
provider.AddNode(bucket.group.Id(), node)
|
||||
}
|
||||
}
|
||||
|
||||
// Set up other needed structures and options.
|
||||
|
|
@ -624,31 +806,37 @@ func TestStartDeletion(t *testing.T) {
|
|||
t.Fatalf("Couldn't set up autoscaling context: %v", err)
|
||||
}
|
||||
csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute))
|
||||
for _, node := range tc.emptyNodes {
|
||||
err := ctx.ClusterSnapshot.AddNodeWithPods(node, tc.pods[node.Name])
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err)
|
||||
for _, bucket := range tc.emptyNodes {
|
||||
for _, node := range bucket.nodes {
|
||||
err := ctx.ClusterSnapshot.AddNodeWithPods(node, tc.pods[node.Name])
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, node := range tc.drainNodes {
|
||||
pods, found := tc.pods[node.Name]
|
||||
if !found {
|
||||
t.Fatalf("Drain node %q doesn't have pods defined in the test case.", node.Name)
|
||||
}
|
||||
err := ctx.ClusterSnapshot.AddNodeWithPods(node, pods)
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err)
|
||||
for _, bucket := range tc.drainNodes {
|
||||
for _, node := range bucket.nodes {
|
||||
pods, found := tc.pods[node.Name]
|
||||
if !found {
|
||||
t.Fatalf("Drain node %q doesn't have pods defined in the test case.", node.Name)
|
||||
}
|
||||
err := ctx.ClusterSnapshot.AddNodeWithPods(node, pods)
|
||||
if err != nil {
|
||||
t.Fatalf("Couldn't add node %q to snapshot: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create Actuator, run StartDeletion, and verify the error.
|
||||
ndt := deletiontracker.NewNodeDeletionTracker(0)
|
||||
ndb := NewNodeDeletionBatcher(&ctx, csr, ndt, 0*time.Second)
|
||||
evictor := Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom}
|
||||
actuator := Actuator{
|
||||
ctx: &ctx, clusterState: csr, nodeDeletionTracker: ndt,
|
||||
nodeDeletionBatcher: NewNodeDeletionBatcher(&ctx, csr, ndt, 0*time.Second),
|
||||
evictor: Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom},
|
||||
nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, simulator.NodeDeleteOptions{}, evictor),
|
||||
budgetProcessor: NewScaleDownBudgetProcessor(&ctx, ndt),
|
||||
}
|
||||
gotStatus, gotErr := actuator.StartDeletion(tc.emptyNodes, tc.drainNodes)
|
||||
gotStatus, gotErr := actuator.StartDeletion(allEmptyNodes, allDrainNodes)
|
||||
if diff := cmp.Diff(tc.wantErr, gotErr, cmpopts.EquateErrors()); diff != "" {
|
||||
t.Errorf("StartDeletion error diff (-want +got):\n%s", diff)
|
||||
}
|
||||
|
|
@ -851,10 +1039,11 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
|
|||
provider.InsertNodeGroup(ng)
|
||||
ng.SetCloudProvider(provider)
|
||||
for i, num := range numNodes {
|
||||
nodes := generateNodes(num, ng.Id())
|
||||
deleteNodes[i] = append(deleteNodes[i], nodes...)
|
||||
for _, node := range nodes {
|
||||
provider.AddNode(ng.Id(), node)
|
||||
singleBucketList := generatenodeBucketList(ng, 0, num)
|
||||
bucket := singleBucketList[0]
|
||||
deleteNodes[i] = append(deleteNodes[i], bucket.nodes...)
|
||||
for _, node := range bucket.nodes {
|
||||
provider.AddNode(bucket.group.Id(), node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -874,10 +1063,12 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
|
|||
}
|
||||
csr := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, ctx.LogRecorder, NewBackoff(), clusterstate.NewStaticMaxNodeProvisionTimeProvider(15*time.Minute))
|
||||
ndt := deletiontracker.NewNodeDeletionTracker(0)
|
||||
ndb := NewNodeDeletionBatcher(&ctx, csr, ndt, deleteInterval)
|
||||
evictor := Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom}
|
||||
actuator := Actuator{
|
||||
ctx: &ctx, clusterState: csr, nodeDeletionTracker: ndt,
|
||||
nodeDeletionBatcher: NewNodeDeletionBatcher(&ctx, csr, ndt, deleteInterval),
|
||||
evictor: Evictor{EvictionRetryTime: 0, DsEvictionRetryTime: 0, DsEvictionEmptyNodeTimeout: 0, PodEvictionHeadroom: DefaultPodEvictionHeadroom},
|
||||
nodeDeletionScheduler: NewGroupDeletionScheduler(&ctx, ndt, ndb, simulator.NodeDeleteOptions{}, evictor),
|
||||
budgetProcessor: NewScaleDownBudgetProcessor(&ctx, ndt),
|
||||
}
|
||||
|
||||
for _, nodes := range deleteNodes {
|
||||
|
|
@ -909,9 +1100,17 @@ func TestStartDeletionInBatchBasic(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func generateNodes(count int, prefix string) []*apiv1.Node {
|
||||
func sizedNodeGroup(id string, size int, atomic bool) cloudprovider.NodeGroup {
|
||||
ng := testprovider.NewTestNodeGroup(id, 10000, 0, size, true, false, "n1-standard-2", nil, nil)
|
||||
ng.SetOptions(&config.NodeGroupAutoscalingOptions{
|
||||
AtomicScaleDown: atomic,
|
||||
})
|
||||
return ng
|
||||
}
|
||||
|
||||
func generateNodes(from, to int, prefix string) []*apiv1.Node {
|
||||
var result []*apiv1.Node
|
||||
for i := 0; i < count; i++ {
|
||||
for i := from; i < to; i++ {
|
||||
name := fmt.Sprintf("node-%d", i)
|
||||
if prefix != "" {
|
||||
name = prefix + "-" + name
|
||||
|
|
@ -921,18 +1120,13 @@ func generateNodes(count int, prefix string) []*apiv1.Node {
|
|||
return result
|
||||
}
|
||||
|
||||
func generateNodesAndNodeGroupMap(count int, prefix string) map[string]*testprovider.TestNodeGroup {
|
||||
result := make(map[string]*testprovider.TestNodeGroup)
|
||||
for i := 0; i < count; i++ {
|
||||
name := fmt.Sprintf("node-%d", i)
|
||||
ngName := fmt.Sprintf("test-ng-%v", i)
|
||||
if prefix != "" {
|
||||
name = prefix + "-" + name
|
||||
ngName = prefix + "-" + ngName
|
||||
}
|
||||
result[name] = testprovider.NewTestNodeGroup(ngName, 0, 100, 3, true, false, "n1-standard-2", nil, nil)
|
||||
func generatenodeBucketList(ng cloudprovider.NodeGroup, from, to int) []*nodeBucket {
|
||||
return []*nodeBucket{
|
||||
{
|
||||
group: ng,
|
||||
nodes: generateNodes(from, to, ng.Id()),
|
||||
},
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func generateNode(name string) *apiv1.Node {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
|
@ -17,33 +17,138 @@ limitations under the License.
|
|||
package actuation
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
)
|
||||
|
||||
// cropNodesToBudgets crops the provided node lists to respect scale-down max parallelism budgets.
|
||||
func (a *Actuator) cropNodesToBudgets(empty, needDrain []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
|
||||
emptyInProgress, drainInProgress := a.nodeDeletionTracker.DeletionsInProgress()
|
||||
parallelismBudget := a.ctx.MaxScaleDownParallelism - len(emptyInProgress) - len(drainInProgress)
|
||||
drainBudget := a.ctx.MaxDrainParallelism - len(drainInProgress)
|
||||
type nodeBucket struct {
|
||||
group cloudprovider.NodeGroup
|
||||
nodes []*apiv1.Node
|
||||
}
|
||||
|
||||
var emptyToDelete []*apiv1.Node
|
||||
for _, node := range empty {
|
||||
if len(emptyToDelete) >= parallelismBudget {
|
||||
break
|
||||
// ScaleDownBudgetProcessor is responsible for keeping the number of nodes deleted in parallel within defined limits.
|
||||
type ScaleDownBudgetProcessor struct {
|
||||
ctx *context.AutoscalingContext
|
||||
nodeDeletionTracker *deletiontracker.NodeDeletionTracker
|
||||
}
|
||||
|
||||
// NewScaleDownBudgetProcessor creates a ScaleDownBudgetProcessor instance.
|
||||
func NewScaleDownBudgetProcessor(ctx *context.AutoscalingContext, ndt *deletiontracker.NodeDeletionTracker) *ScaleDownBudgetProcessor {
|
||||
return &ScaleDownBudgetProcessor{
|
||||
ctx: ctx,
|
||||
nodeDeletionTracker: ndt,
|
||||
}
|
||||
}
|
||||
|
||||
// CropNodes crops the provided node lists to respect scale-down max parallelism budgets.
|
||||
// The returned nodes are grouped by a node group.
|
||||
func (bp *ScaleDownBudgetProcessor) CropNodes(empty, drain []*apiv1.Node) (emptyToDelete, drainToDelete []*nodeBucket) {
|
||||
emptyIndividual, emptyAtomic := bp.groupByNodeGroup(empty)
|
||||
drainIndividual, drainAtomic := bp.groupByNodeGroup(drain)
|
||||
|
||||
emptyInProgress, drainInProgress := bp.nodeDeletionTracker.DeletionsInProgress()
|
||||
parallelismBudget := bp.ctx.MaxScaleDownParallelism - len(emptyInProgress) - len(drainInProgress)
|
||||
drainBudget := bp.ctx.MaxDrainParallelism - len(drainInProgress)
|
||||
|
||||
emptyToDelete = []*nodeBucket{}
|
||||
for _, bucket := range emptyAtomic {
|
||||
if parallelismBudget < len(bucket.nodes) {
|
||||
// One pod slice can sneak in even if it would exceed parallelism budget.
|
||||
// This is to help avoid starvation of pod slices by regular nodes,
|
||||
// also larger pod slices will immediately exceed parallelism budget.
|
||||
if parallelismBudget == 0 || len(emptyToDelete) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
emptyToDelete = append(emptyToDelete, node)
|
||||
emptyToDelete = append(emptyToDelete, bucket)
|
||||
parallelismBudget -= len(bucket.nodes)
|
||||
}
|
||||
|
||||
parallelismBudgetLeft := parallelismBudget - len(emptyToDelete)
|
||||
drainBudget = min(parallelismBudgetLeft, drainBudget)
|
||||
drainBudget = min(parallelismBudget, drainBudget)
|
||||
|
||||
var drainToDelete []*apiv1.Node
|
||||
for _, node := range needDrain {
|
||||
if len(drainToDelete) >= drainBudget {
|
||||
drainToDelete = []*nodeBucket{}
|
||||
for _, bucket := range drainAtomic {
|
||||
if drainBudget < len(bucket.nodes) {
|
||||
// One pod slice can sneak in even if it would exceed parallelism budget.
|
||||
// This is to help avoid starvation of pod slices by regular nodes,
|
||||
// also larger pod slices will immediately exceed parallelism budget.
|
||||
if drainBudget == 0 || len(emptyToDelete) > 0 || len(drainToDelete) > 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
drainToDelete = append(drainToDelete, bucket)
|
||||
drainBudget -= len(bucket.nodes)
|
||||
parallelismBudget -= len(bucket.nodes)
|
||||
}
|
||||
|
||||
for _, bucket := range emptyIndividual {
|
||||
if parallelismBudget < 1 {
|
||||
break
|
||||
}
|
||||
drainToDelete = append(drainToDelete, node)
|
||||
if parallelismBudget < len(bucket.nodes) {
|
||||
bucket.nodes = bucket.nodes[:parallelismBudget]
|
||||
}
|
||||
emptyToDelete = append(emptyToDelete, bucket)
|
||||
parallelismBudget -= len(bucket.nodes)
|
||||
}
|
||||
|
||||
drainBudget = min(parallelismBudget, drainBudget)
|
||||
|
||||
for _, bucket := range drainIndividual {
|
||||
if drainBudget < 1 {
|
||||
break
|
||||
}
|
||||
if drainBudget < len(bucket.nodes) {
|
||||
bucket.nodes = bucket.nodes[:drainBudget]
|
||||
}
|
||||
drainToDelete = append(drainToDelete, bucket)
|
||||
drainBudget -= 1
|
||||
}
|
||||
|
||||
return emptyToDelete, drainToDelete
|
||||
}
|
||||
|
||||
func (bp *ScaleDownBudgetProcessor) groupByNodeGroup(nodes []*apiv1.Node) (individual, atomic []*nodeBucket) {
|
||||
individualGroup, atomicGroup := map[cloudprovider.NodeGroup]int{}, map[cloudprovider.NodeGroup]int{}
|
||||
individual, atomic = []*nodeBucket{}, []*nodeBucket{}
|
||||
for _, node := range nodes {
|
||||
nodeGroup, err := bp.ctx.CloudProvider.NodeGroupForNode(node)
|
||||
if err != nil || nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
||||
klog.Errorf("Failed to find node group for %s: %v", node.Name, err)
|
||||
continue
|
||||
}
|
||||
autoscalingOptions, err := nodeGroup.GetOptions(bp.ctx.NodeGroupDefaults)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
|
||||
continue
|
||||
}
|
||||
if autoscalingOptions != nil && autoscalingOptions.AtomicScaleDown {
|
||||
if idx, ok := atomicGroup[nodeGroup]; ok {
|
||||
atomic[idx].nodes = append(atomic[idx].nodes, node)
|
||||
} else {
|
||||
atomicGroup[nodeGroup] = len(atomic)
|
||||
atomic = append(atomic, &nodeBucket{
|
||||
group: nodeGroup,
|
||||
nodes: []*apiv1.Node{node},
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if idx, ok := individualGroup[nodeGroup]; ok {
|
||||
individual[idx].nodes = append(individual[idx].nodes, node)
|
||||
} else {
|
||||
individualGroup[nodeGroup] = len(individual)
|
||||
individual = append(individual, &nodeBucket{
|
||||
group: nodeGroup,
|
||||
nodes: []*apiv1.Node{node},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return individual, atomic
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,182 +25,284 @@ import (
|
|||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
|
||||
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
||||
)
|
||||
|
||||
func TestCropNodesToBudgets(t *testing.T) {
|
||||
testNg := testprovider.NewTestNodeGroup("test-ng", 0, 100, 3, true, false, "n1-standard-2", nil, nil)
|
||||
atomic3 := sizedNodeGroup("atomic-3", 3, true)
|
||||
atomic4 := sizedNodeGroup("atomic-4", 4, true)
|
||||
atomic8 := sizedNodeGroup("atomic-8", 8, true)
|
||||
atomic11 := sizedNodeGroup("atomic-11", 11, true)
|
||||
for tn, tc := range map[string]struct {
|
||||
emptyDeletionsInProgress int
|
||||
drainDeletionsInProgress int
|
||||
emptyNodes []*apiv1.Node
|
||||
drainNodes []*apiv1.Node
|
||||
wantEmpty []*apiv1.Node
|
||||
wantDrain []*apiv1.Node
|
||||
empty []*nodeBucket
|
||||
drain []*nodeBucket
|
||||
wantEmpty []*nodeBucket
|
||||
wantDrain []*nodeBucket
|
||||
}{
|
||||
"no nodes": {
|
||||
emptyNodes: []*apiv1.Node{},
|
||||
drainNodes: []*apiv1.Node{},
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: []*nodeBucket{},
|
||||
drain: []*nodeBucket{},
|
||||
wantEmpty: []*nodeBucket{},
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
// Empty nodes only.
|
||||
"empty nodes within max limit, no deletions in progress": {
|
||||
emptyNodes: generateNodes(10, "empty"),
|
||||
wantEmpty: generateNodes(10, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 10),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 10),
|
||||
},
|
||||
"empty nodes exceeding max limit, no deletions in progress": {
|
||||
emptyNodes: generateNodes(11, "empty"),
|
||||
wantEmpty: generateNodes(10, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 11),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 10),
|
||||
},
|
||||
"empty atomic node group exceeding max limit": {
|
||||
empty: generatenodeBucketList(atomic11, 0, 11),
|
||||
wantEmpty: generatenodeBucketList(atomic11, 0, 11),
|
||||
},
|
||||
"empty regular and atomic": {
|
||||
empty: append(generatenodeBucketList(testNg, 0, 8), generatenodeBucketList(atomic3, 0, 3)...),
|
||||
wantEmpty: append(generatenodeBucketList(atomic3, 0, 3), generatenodeBucketList(testNg, 0, 7)...),
|
||||
},
|
||||
"multiple empty atomic": {
|
||||
empty: append(
|
||||
append(
|
||||
generatenodeBucketList(testNg, 0, 3),
|
||||
generatenodeBucketList(atomic8, 0, 8)...),
|
||||
generatenodeBucketList(atomic3, 0, 3)...),
|
||||
wantEmpty: append(generatenodeBucketList(atomic8, 0, 8), generatenodeBucketList(testNg, 0, 2)...),
|
||||
},
|
||||
"empty nodes with deletions in progress, within budget": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 1,
|
||||
emptyNodes: generateNodes(8, "empty"),
|
||||
wantEmpty: generateNodes(8, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 8),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 8),
|
||||
},
|
||||
"empty nodes with deletions in progress, exceeding budget": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 1,
|
||||
emptyNodes: generateNodes(10, "empty"),
|
||||
wantEmpty: generateNodes(8, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 10),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 8),
|
||||
},
|
||||
"empty atomic nodes with deletions in progress, exceeding budget": {
|
||||
emptyDeletionsInProgress: 3,
|
||||
drainDeletionsInProgress: 3,
|
||||
empty: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantEmpty: generatenodeBucketList(atomic8, 0, 8),
|
||||
},
|
||||
"empty nodes with deletions in progress, 0 budget left": {
|
||||
emptyDeletionsInProgress: 5,
|
||||
drainDeletionsInProgress: 5,
|
||||
emptyNodes: generateNodes(10, "empty"),
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 10),
|
||||
wantEmpty: []*nodeBucket{},
|
||||
},
|
||||
"empty nodes with deletions in progress, budget exceeded (shouldn't happen, just a sanity check)": {
|
||||
"empty atomic nodes with deletions in progress, 0 budget left": {
|
||||
emptyDeletionsInProgress: 5,
|
||||
drainDeletionsInProgress: 5,
|
||||
empty: generatenodeBucketList(atomic3, 0, 3),
|
||||
wantEmpty: []*nodeBucket{},
|
||||
},
|
||||
"empty nodes with deletions in progress, budget exceeded": {
|
||||
emptyDeletionsInProgress: 50,
|
||||
drainDeletionsInProgress: 50,
|
||||
emptyNodes: generateNodes(10, "empty"),
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 10),
|
||||
wantEmpty: []*nodeBucket{},
|
||||
},
|
||||
// Drain nodes only.
|
||||
"drain nodes within max limit, no deletions in progress": {
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantDrain: generateNodes(5, "drain"),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 5),
|
||||
},
|
||||
"drain nodes exceeding max limit, no deletions in progress": {
|
||||
drainNodes: generateNodes(6, "drain"),
|
||||
wantDrain: generateNodes(5, "drain"),
|
||||
drain: generatenodeBucketList(testNg, 0, 6),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 5),
|
||||
},
|
||||
"drain atomic exceeding limit": {
|
||||
drain: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantDrain: generatenodeBucketList(atomic8, 0, 8),
|
||||
},
|
||||
"drain regular and atomic exceeding limit": {
|
||||
drain: append(generatenodeBucketList(testNg, 0, 3), generatenodeBucketList(atomic3, 0, 3)...),
|
||||
wantDrain: append(generatenodeBucketList(atomic3, 0, 3), generatenodeBucketList(testNg, 0, 2)...),
|
||||
},
|
||||
"multiple drain atomic": {
|
||||
drain: append(
|
||||
append(
|
||||
generatenodeBucketList(testNg, 0, 3),
|
||||
generatenodeBucketList(atomic3, 0, 3)...),
|
||||
generatenodeBucketList(atomic4, 0, 4)...),
|
||||
wantDrain: append(generatenodeBucketList(atomic3, 0, 3), generatenodeBucketList(testNg, 0, 2)...),
|
||||
},
|
||||
"drain nodes with deletions in progress, within budget": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 2,
|
||||
drainNodes: generateNodes(3, "drain"),
|
||||
wantDrain: generateNodes(3, "drain"),
|
||||
drain: generatenodeBucketList(testNg, 0, 3),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 3),
|
||||
},
|
||||
"drain nodes with deletions in progress, exceeding drain budget": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 2,
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantDrain: generateNodes(3, "drain"),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 3),
|
||||
},
|
||||
"drain atomic nodes with deletions in progress, exceeding drain budget": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 2,
|
||||
drain: generatenodeBucketList(atomic4, 0, 4),
|
||||
wantDrain: generatenodeBucketList(atomic4, 0, 4),
|
||||
},
|
||||
"drain nodes with deletions in progress, 0 drain budget left": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 5,
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"drain nodes with deletions in progress, drain budget exceeded (shouldn't happen, just a sanity check)": {
|
||||
"drain atomic nodes with deletions in progress, 0 drain budget left": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 5,
|
||||
drain: generatenodeBucketList(atomic4, 0, 4),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"drain nodes with deletions in progress, drain budget exceeded": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 50,
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"drain nodes with deletions in progress, exceeding overall budget": {
|
||||
emptyDeletionsInProgress: 7,
|
||||
drainDeletionsInProgress: 1,
|
||||
drainNodes: generateNodes(4, "drain"),
|
||||
wantDrain: generateNodes(2, "drain"),
|
||||
drain: generatenodeBucketList(testNg, 0, 4),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 2),
|
||||
},
|
||||
"drain nodes with deletions in progress, 0 overall budget left": {
|
||||
emptyDeletionsInProgress: 10,
|
||||
drainNodes: generateNodes(4, "drain"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
drain: generatenodeBucketList(testNg, 0, 4),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"drain nodes with deletions in progress, overall budget exceeded (shouldn't happen, just a sanity check)": {
|
||||
"drain nodes with deletions in progress, overall budget exceeded": {
|
||||
emptyDeletionsInProgress: 50,
|
||||
drainNodes: generateNodes(4, "drain"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
drain: generatenodeBucketList(testNg, 0, 4),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
// Empty and drain nodes together.
|
||||
"empty&drain nodes within max limits, no deletions in progress": {
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantDrain: generateNodes(5, "drain"),
|
||||
wantEmpty: generateNodes(5, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 5),
|
||||
},
|
||||
"empty&drain atomic nodes within max limits, no deletions in progress": {
|
||||
empty: generatenodeBucketList(atomic3, 0, 3),
|
||||
drain: generatenodeBucketList(atomic4, 0, 4),
|
||||
wantEmpty: generatenodeBucketList(atomic3, 0, 3),
|
||||
wantDrain: generatenodeBucketList(atomic4, 0, 4),
|
||||
},
|
||||
"empty&drain nodes exceeding overall limit, no deletions in progress": {
|
||||
emptyNodes: generateNodes(8, "empty"),
|
||||
drainNodes: generateNodes(8, "drain"),
|
||||
wantDrain: generateNodes(2, "drain"),
|
||||
wantEmpty: generateNodes(8, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 8),
|
||||
drain: generatenodeBucketList(testNg, 0, 8),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 2),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 8),
|
||||
},
|
||||
"empty&drain atomic nodes exceeding overall limit, no deletions in progress": {
|
||||
empty: generatenodeBucketList(atomic8, 0, 8),
|
||||
drain: generatenodeBucketList(atomic4, 0, 4),
|
||||
wantEmpty: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain atomic nodes exceeding drain limit, no deletions in progress": {
|
||||
empty: generatenodeBucketList(atomic4, 0, 4),
|
||||
drain: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantEmpty: generatenodeBucketList(atomic4, 0, 4),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain atomic and regular nodes exceeding drain limit, no deletions in progress": {
|
||||
empty: append(generatenodeBucketList(testNg, 0, 5), generatenodeBucketList(atomic3, 0, 3)...),
|
||||
drain: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantEmpty: append(generatenodeBucketList(atomic3, 0, 3), generatenodeBucketList(testNg, 0, 5)...),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty regular and drain atomic nodes exceeding overall limit, no deletions in progress": {
|
||||
drain: generatenodeBucketList(atomic8, 0, 8),
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: generatenodeBucketList(atomic8, 0, 8),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 2),
|
||||
},
|
||||
"empty&drain nodes exceeding drain limit, no deletions in progress": {
|
||||
emptyNodes: generateNodes(2, "empty"),
|
||||
drainNodes: generateNodes(8, "drain"),
|
||||
wantDrain: generateNodes(5, "drain"),
|
||||
wantEmpty: generateNodes(2, "empty"),
|
||||
empty: generatenodeBucketList(testNg, 0, 2),
|
||||
drain: generatenodeBucketList(testNg, 0, 8),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 2),
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, 0 overall budget left": {
|
||||
emptyDeletionsInProgress: 10,
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: []*nodeBucket{},
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, overall budget exceeded (shouldn't happen, just a sanity check)": {
|
||||
emptyDeletionsInProgress: 50,
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: []*nodeBucket{},
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, 0 drain budget left": {
|
||||
drainDeletionsInProgress: 5,
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantEmpty: generateNodes(5, "empty"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 5),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, drain budget exceeded (shouldn't happen, just a sanity check)": {
|
||||
drainDeletionsInProgress: 9,
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantEmpty: generateNodes(1, "empty"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 1),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, overall budget exceeded, only empty nodes fit": {
|
||||
emptyDeletionsInProgress: 5,
|
||||
drainDeletionsInProgress: 3,
|
||||
emptyNodes: generateNodes(5, "empty"),
|
||||
drainNodes: generateNodes(2, "drain"),
|
||||
wantEmpty: generateNodes(2, "empty"),
|
||||
wantDrain: []*apiv1.Node{},
|
||||
empty: generatenodeBucketList(testNg, 0, 5),
|
||||
drain: generatenodeBucketList(testNg, 0, 2),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 2),
|
||||
wantDrain: []*nodeBucket{},
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, overall budget exceeded, both empty&drain nodes fit": {
|
||||
emptyDeletionsInProgress: 5,
|
||||
drainDeletionsInProgress: 3,
|
||||
emptyNodes: generateNodes(1, "empty"),
|
||||
drainNodes: generateNodes(2, "drain"),
|
||||
wantEmpty: generateNodes(1, "empty"),
|
||||
wantDrain: generateNodes(1, "drain"),
|
||||
empty: generatenodeBucketList(testNg, 0, 1),
|
||||
drain: generatenodeBucketList(testNg, 0, 2),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 1),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 1),
|
||||
},
|
||||
"empty&drain nodes with deletions in progress, drain budget exceeded": {
|
||||
emptyDeletionsInProgress: 1,
|
||||
drainDeletionsInProgress: 3,
|
||||
emptyNodes: generateNodes(4, "empty"),
|
||||
drainNodes: generateNodes(5, "drain"),
|
||||
wantEmpty: generateNodes(4, "empty"),
|
||||
wantDrain: generateNodes(2, "drain"),
|
||||
empty: generatenodeBucketList(testNg, 0, 4),
|
||||
drain: generatenodeBucketList(testNg, 0, 5),
|
||||
wantEmpty: generatenodeBucketList(testNg, 0, 4),
|
||||
wantDrain: generatenodeBucketList(testNg, 0, 2),
|
||||
},
|
||||
} {
|
||||
t.Run(tn, func(t *testing.T) {
|
||||
provider := testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error {
|
||||
return nil
|
||||
})
|
||||
for _, bucket := range append(tc.empty, tc.drain...) {
|
||||
bucket.group.(*testprovider.TestNodeGroup).SetCloudProvider(provider)
|
||||
provider.InsertNodeGroup(bucket.group)
|
||||
for _, node := range bucket.nodes {
|
||||
provider.AddNode(bucket.group.Id(), node)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := &context.AutoscalingContext{
|
||||
AutoscalingOptions: config.AutoscalingOptions{
|
||||
MaxScaleDownParallelism: 10,
|
||||
|
|
@ -208,29 +310,43 @@ func TestCropNodesToBudgets(t *testing.T) {
|
|||
NodeDeletionBatcherInterval: 0 * time.Second,
|
||||
NodeDeleteDelayAfterTaint: 1 * time.Second,
|
||||
},
|
||||
CloudProvider: provider,
|
||||
}
|
||||
deleteOptions := simulator.NodeDeleteOptions{
|
||||
SkipNodesWithSystemPods: true,
|
||||
SkipNodesWithLocalStorage: true,
|
||||
MinReplicaCount: 0,
|
||||
SkipNodesWithCustomControllerPods: true,
|
||||
}
|
||||
ndr := deletiontracker.NewNodeDeletionTracker(1 * time.Hour)
|
||||
ndt := deletiontracker.NewNodeDeletionTracker(1 * time.Hour)
|
||||
for i := 0; i < tc.emptyDeletionsInProgress; i++ {
|
||||
ndr.StartDeletion("ng1", fmt.Sprintf("empty-node-%d", i))
|
||||
ndt.StartDeletion("ng1", fmt.Sprintf("empty-node-%d", i))
|
||||
}
|
||||
for i := 0; i < tc.drainDeletionsInProgress; i++ {
|
||||
ndr.StartDeletionWithDrain("ng2", fmt.Sprintf("drain-node-%d", i))
|
||||
ndt.StartDeletionWithDrain("ng2", fmt.Sprintf("drain-node-%d", i))
|
||||
}
|
||||
emptyList, drainList := []*apiv1.Node{}, []*apiv1.Node{}
|
||||
for _, bucket := range tc.empty {
|
||||
emptyList = append(emptyList, bucket.nodes...)
|
||||
}
|
||||
for _, bucket := range tc.drain {
|
||||
drainList = append(drainList, bucket.nodes...)
|
||||
}
|
||||
|
||||
actuator := NewActuator(ctx, nil, ndr, deleteOptions)
|
||||
gotEmpty, gotDrain := actuator.cropNodesToBudgets(tc.emptyNodes, tc.drainNodes)
|
||||
if diff := cmp.Diff(tc.wantEmpty, gotEmpty, cmpopts.EquateEmpty()); diff != "" {
|
||||
budgeter := NewScaleDownBudgetProcessor(ctx, ndt)
|
||||
gotEmpty, gotDrain := budgeter.CropNodes(emptyList, drainList)
|
||||
// a
|
||||
if diff := cmp.Diff(tc.wantEmpty, gotEmpty, cmpopts.EquateEmpty(), transformnodeBucket); diff != "" {
|
||||
t.Errorf("cropNodesToBudgets empty nodes diff (-want +got):\n%s", diff)
|
||||
}
|
||||
if diff := cmp.Diff(tc.wantDrain, gotDrain, cmpopts.EquateEmpty()); diff != "" {
|
||||
if diff := cmp.Diff(tc.wantDrain, gotDrain, cmpopts.EquateEmpty(), transformnodeBucket); diff != "" {
|
||||
t.Errorf("cropNodesToBudgets drain nodes diff (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// transformnodeBucket transforms a nodeBucket to a structure that can be directly compared with other node bucket.
|
||||
var transformnodeBucket = cmp.Transformer("transformnodeBucket", func(b nodeBucket) interface{} {
|
||||
return struct {
|
||||
Group string
|
||||
Nodes []*apiv1.Node
|
||||
}{
|
||||
Group: b.group.Id(),
|
||||
Nodes: b.nodes,
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ package actuation
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
|
|
@ -68,59 +67,55 @@ func NewNodeDeletionBatcher(ctx *context.AutoscalingContext, csr *clusterstate.C
|
|||
}
|
||||
}
|
||||
|
||||
// AddNode adds node to delete candidates and schedule deletion.
|
||||
func (d *NodeDeletionBatcher) AddNode(node *apiv1.Node, drain bool) error {
|
||||
// AddNodes adds node list to delete candidates and schedules deletion.
|
||||
func (d *NodeDeletionBatcher) AddNodes(nodes []*apiv1.Node, nodeGroup cloudprovider.NodeGroup, drain bool) {
|
||||
// If delete interval is 0, than instantly start node deletion.
|
||||
if d.deleteInterval == 0 {
|
||||
nodeGroup, err := deleteNodesFromCloudProvider(d.ctx, []*apiv1.Node{node})
|
||||
if err != nil {
|
||||
result := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToDelete, Err: err}
|
||||
CleanUpAndRecordFailedScaleDownEvent(d.ctx, node, nodeGroup.Id(), drain, d.nodeDeletionTracker, "", result)
|
||||
} else {
|
||||
RegisterAndRecordSuccessfulScaleDownEvent(d.ctx, d.clusterState, node, nodeGroup, drain, d.nodeDeletionTracker)
|
||||
err := deleteNodesFromCloudProvider(d.ctx, nodes, nodeGroup)
|
||||
for _, node := range nodes {
|
||||
if err != nil {
|
||||
result := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToDelete, Err: err}
|
||||
CleanUpAndRecordFailedScaleDownEvent(d.ctx, node, nodeGroup.Id(), drain, d.nodeDeletionTracker, "", result)
|
||||
} else {
|
||||
RegisterAndRecordSuccessfulScaleDownEvent(d.ctx, d.clusterState, node, nodeGroup, drain, d.nodeDeletionTracker)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
nodeGroupId, first, err := d.addNodeToBucket(node, drain)
|
||||
if err != nil {
|
||||
return err
|
||||
return
|
||||
}
|
||||
first := d.addNodesToBucket(nodes, nodeGroup, drain)
|
||||
if first {
|
||||
go func(nodeGroupId string) {
|
||||
go func(nodeGroup cloudprovider.NodeGroup) {
|
||||
time.Sleep(d.deleteInterval)
|
||||
d.remove(nodeGroupId)
|
||||
}(nodeGroupId)
|
||||
d.executeForBucket(nodeGroup)
|
||||
}(nodeGroup)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddToBucket adds node to delete candidates and return if it's a first node in the group.
|
||||
func (d *NodeDeletionBatcher) addNodeToBucket(node *apiv1.Node, drain bool) (string, bool, error) {
|
||||
func (d *NodeDeletionBatcher) addNodesToBucket(nodes []*apiv1.Node, nodeGroup cloudprovider.NodeGroup, drain bool) bool {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
nodeGroup, err := d.ctx.CloudProvider.NodeGroupForNode(node)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
for _, node := range nodes {
|
||||
d.drainedNodeDeletions[node.Name] = drain
|
||||
}
|
||||
d.drainedNodeDeletions[node.Name] = drain
|
||||
val, ok := d.deletionsPerNodeGroup[nodeGroup.Id()]
|
||||
if !ok || len(val) == 0 {
|
||||
d.deletionsPerNodeGroup[nodeGroup.Id()] = []*apiv1.Node{node}
|
||||
return nodeGroup.Id(), true, nil
|
||||
d.deletionsPerNodeGroup[nodeGroup.Id()] = nodes
|
||||
return true
|
||||
}
|
||||
d.deletionsPerNodeGroup[nodeGroup.Id()] = append(d.deletionsPerNodeGroup[nodeGroup.Id()], node)
|
||||
return nodeGroup.Id(), false, nil
|
||||
d.deletionsPerNodeGroup[nodeGroup.Id()] = append(d.deletionsPerNodeGroup[nodeGroup.Id()], nodes...)
|
||||
return false
|
||||
}
|
||||
|
||||
// remove delete nodes of a given nodeGroup, if successful, the deletion is recorded in CSR, and an event is emitted on the node.
|
||||
func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
|
||||
// executeForBucket deletes nodes of a given nodeGroup, if successful, the deletion is recorded in CSR, and an event is emitted on the node.
|
||||
func (d *NodeDeletionBatcher) executeForBucket(nodeGroup cloudprovider.NodeGroup) error {
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
nodes, ok := d.deletionsPerNodeGroup[nodeGroupId]
|
||||
nodes, ok := d.deletionsPerNodeGroup[nodeGroup.Id()]
|
||||
if !ok {
|
||||
return fmt.Errorf("Node Group %s is not present in the batch deleter", nodeGroupId)
|
||||
return fmt.Errorf("Node Group %s is not present in the batch deleter", nodeGroup.Id())
|
||||
}
|
||||
delete(d.deletionsPerNodeGroup, nodeGroupId)
|
||||
delete(d.deletionsPerNodeGroup, nodeGroup.Id())
|
||||
drainedNodeDeletions := make(map[string]bool)
|
||||
for _, node := range nodes {
|
||||
drainedNodeDeletions[node.Name] = d.drainedNodeDeletions[node.Name]
|
||||
|
|
@ -129,7 +124,7 @@ func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
|
|||
|
||||
go func(nodes []*apiv1.Node, drainedNodeDeletions map[string]bool) {
|
||||
var result status.NodeDeleteResult
|
||||
nodeGroup, err := deleteNodesFromCloudProvider(d.ctx, nodes)
|
||||
err := deleteNodesFromCloudProvider(d.ctx, nodes, nodeGroup)
|
||||
for _, node := range nodes {
|
||||
drain := drainedNodeDeletions[node.Name]
|
||||
if err != nil {
|
||||
|
|
@ -138,7 +133,6 @@ func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
|
|||
} else {
|
||||
RegisterAndRecordSuccessfulScaleDownEvent(d.ctx, d.clusterState, node, nodeGroup, drain, d.nodeDeletionTracker)
|
||||
}
|
||||
|
||||
}
|
||||
}(nodes, drainedNodeDeletions)
|
||||
return nil
|
||||
|
|
@ -146,18 +140,11 @@ func (d *NodeDeletionBatcher) remove(nodeGroupId string) error {
|
|||
|
||||
// deleteNodeFromCloudProvider removes the given nodes from cloud provider. No extra pre-deletion actions are executed on
|
||||
// the Kubernetes side.
|
||||
func deleteNodesFromCloudProvider(ctx *context.AutoscalingContext, nodes []*apiv1.Node) (cloudprovider.NodeGroup, error) {
|
||||
nodeGroup, err := ctx.CloudProvider.NodeGroupForNode(nodes[0])
|
||||
if err != nil {
|
||||
return nodeGroup, errors.NewAutoscalerError(errors.CloudProviderError, "failed to find node group for %s: %v", nodes[0].Name, err)
|
||||
func deleteNodesFromCloudProvider(ctx *context.AutoscalingContext, nodes []*apiv1.Node, nodeGroup cloudprovider.NodeGroup) error {
|
||||
if err := nodeGroup.DeleteNodes(nodes); err != nil {
|
||||
return errors.NewAutoscalerError(errors.CloudProviderError, "failed to delete nodes from group %s: %v", nodeGroup.Id(), err)
|
||||
}
|
||||
if nodeGroup == nil || reflect.ValueOf(nodeGroup).IsNil() {
|
||||
return nodeGroup, errors.NewAutoscalerError(errors.InternalError, "picked node that doesn't belong to a node group: %s", nodes[0].Name)
|
||||
}
|
||||
if err = nodeGroup.DeleteNodes(nodes); err != nil {
|
||||
return nodeGroup, errors.NewAutoscalerError(errors.CloudProviderError, "failed to delete %s: %v", nodes[0].Name, err)
|
||||
}
|
||||
return nodeGroup, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func nodeScaleDownReason(node *apiv1.Node, drain bool) metrics.NodeScaleDownReason {
|
||||
|
|
|
|||
|
|
@ -43,8 +43,8 @@ func TestAddNodeToBucket(t *testing.T) {
|
|||
}
|
||||
nodeGroup1 := "ng-1"
|
||||
nodeGroup2 := "ng-2"
|
||||
nodes1 := generateNodes(5, "ng-1")
|
||||
nodes2 := generateNodes(5, "ng-2")
|
||||
nodes1 := generateNodes(0, 5, "ng-1")
|
||||
nodes2 := generateNodes(0, 5, "ng-2")
|
||||
provider.AddNodeGroup(nodeGroup1, 1, 10, 5)
|
||||
provider.AddNodeGroup(nodeGroup2, 1, 10, 5)
|
||||
for _, node := range nodes1 {
|
||||
|
|
@ -91,10 +91,11 @@ func TestAddNodeToBucket(t *testing.T) {
|
|||
}
|
||||
batchCount := 0
|
||||
for _, node := range test.nodes {
|
||||
_, first, err := d.addNodeToBucket(node, test.drained)
|
||||
nodeGroup, err := provider.NodeGroupForNode(node)
|
||||
if err != nil {
|
||||
t.Errorf("addNodeToBucket return error %q when addidng node %v", err, node)
|
||||
t.Errorf("couldn't get node info for node %s: %s", node.Name, err)
|
||||
}
|
||||
first := d.addNodesToBucket([]*apiv1.Node{node}, nodeGroup, test.drained)
|
||||
if first {
|
||||
batchCount += 1
|
||||
}
|
||||
|
|
@ -168,6 +169,7 @@ func TestRemove(t *testing.T) {
|
|||
|
||||
ng := "ng"
|
||||
provider.AddNodeGroup(ng, 1, 10, test.numNodes)
|
||||
nodeGroup := provider.GetNodeGroup(ng)
|
||||
|
||||
d := NodeDeletionBatcher{
|
||||
ctx: &ctx,
|
||||
|
|
@ -176,7 +178,7 @@ func TestRemove(t *testing.T) {
|
|||
deletionsPerNodeGroup: make(map[string][]*apiv1.Node),
|
||||
drainedNodeDeletions: make(map[string]bool),
|
||||
}
|
||||
nodes := generateNodes(test.numNodes, ng)
|
||||
nodes := generateNodes(0, test.numNodes, ng)
|
||||
failedDeletion := test.failedDeletion
|
||||
for _, node := range nodes {
|
||||
if failedDeletion > 0 {
|
||||
|
|
@ -191,14 +193,11 @@ func TestRemove(t *testing.T) {
|
|||
Key: taints.ToBeDeletedTaint,
|
||||
Effect: apiv1.TaintEffectNoSchedule,
|
||||
})
|
||||
_, _, err := d.addNodeToBucket(node, true)
|
||||
if err != nil {
|
||||
t.Errorf("addNodeToBucket return error %q when addidng node %v", err, node)
|
||||
}
|
||||
d.addNodesToBucket([]*apiv1.Node{node}, nodeGroup, true)
|
||||
}
|
||||
}
|
||||
|
||||
err = d.remove(ng)
|
||||
err = d.executeForBucket(nodeGroup)
|
||||
if test.err {
|
||||
if err == nil {
|
||||
t.Errorf("remove() should return error, but return nil")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
Copyright 2023 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package actuation
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/simulator"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
)
|
||||
|
||||
// GroupDeletionScheduler is a wrapper over NodeDeletionBatcher responsible for grouping nodes for deletion
|
||||
// and rolling back deletion of all nodes from a group in case deletion fails for any of the other nodes.
|
||||
type GroupDeletionScheduler struct {
|
||||
sync.Mutex
|
||||
ctx *context.AutoscalingContext
|
||||
nodeDeletionTracker *deletiontracker.NodeDeletionTracker
|
||||
nodeDeletionBatcher *NodeDeletionBatcher
|
||||
evictor Evictor
|
||||
nodeQueue map[string][]*apiv1.Node
|
||||
failuresForGroup map[string]bool
|
||||
}
|
||||
|
||||
// NewGroupDeletionScheduler creates an instance of GroupDeletionScheduler.
|
||||
func NewGroupDeletionScheduler(ctx *context.AutoscalingContext, ndt *deletiontracker.NodeDeletionTracker, ndb *NodeDeletionBatcher, deleteOptions simulator.NodeDeleteOptions, evictor Evictor) *GroupDeletionScheduler {
|
||||
return &GroupDeletionScheduler{
|
||||
ctx: ctx,
|
||||
nodeDeletionTracker: ndt,
|
||||
nodeDeletionBatcher: ndb,
|
||||
evictor: evictor,
|
||||
nodeQueue: map[string][]*apiv1.Node{},
|
||||
failuresForGroup: map[string]bool{},
|
||||
}
|
||||
}
|
||||
|
||||
// ScheduleDeletion schedules deletion of the node. Nodes that should be deleted in groups are queued until whole group is scheduled for deletion,
|
||||
// other nodes are passed over to NodeDeletionBatcher immediately.
|
||||
func (ds *GroupDeletionScheduler) ScheduleDeletion(nodeInfo *framework.NodeInfo, nodeGroup cloudprovider.NodeGroup, batchSize int, drain bool) {
|
||||
opts, err := nodeGroup.GetOptions(ds.ctx.NodeGroupDefaults)
|
||||
if err != nil {
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "GetOptions returned error %v", err)}
|
||||
ds.RollbackNodeDeletion(nodeInfo.Node(), nodeGroup.Id(), drain, "failed to get autoscaling options for a node group", nodeDeleteResult)
|
||||
return
|
||||
}
|
||||
if opts == nil {
|
||||
opts = &config.NodeGroupAutoscalingOptions{}
|
||||
}
|
||||
|
||||
nodeDeleteResult := ds.prepareNodeForDeletion(nodeInfo, drain)
|
||||
if nodeDeleteResult.Err != nil {
|
||||
ds.RollbackNodeDeletion(nodeInfo.Node(), nodeGroup.Id(), drain, "prepareNodeForDeletion failed", nodeDeleteResult)
|
||||
return
|
||||
}
|
||||
|
||||
ds.addToBatcher(nodeInfo, nodeGroup, batchSize, drain, opts.AtomicScaleDown)
|
||||
}
|
||||
|
||||
// prepareNodeForDeletion is a long-running operation, so it needs to avoid locking the AtomicDeletionScheduler object
|
||||
func (ds *GroupDeletionScheduler) prepareNodeForDeletion(nodeInfo *framework.NodeInfo, drain bool) status.NodeDeleteResult {
|
||||
node := nodeInfo.Node()
|
||||
if drain {
|
||||
if evictionResults, err := ds.evictor.DrainNode(ds.nodeDeletionBatcher.ctx, nodeInfo); err != nil {
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToEvictPods, Err: err, PodEvictionResults: evictionResults}
|
||||
}
|
||||
} else {
|
||||
if err := ds.evictor.EvictDaemonSetPods(ds.ctx, nodeInfo, time.Now()); err != nil {
|
||||
// Evicting DS pods is best-effort, so proceed with the deletion even if there are errors.
|
||||
klog.Warningf("Error while evicting DS pods from an empty node %q: %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
if err := WaitForDelayDeletion(node, ds.ctx.ListerRegistry.AllNodeLister(), ds.ctx.AutoscalingOptions.NodeDeletionDelayTimeout); err != nil {
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteErrorFailedToDelete, Err: err}
|
||||
}
|
||||
return status.NodeDeleteResult{ResultType: status.NodeDeleteOk}
|
||||
}
|
||||
|
||||
func (ds *GroupDeletionScheduler) addToBatcher(nodeInfo *framework.NodeInfo, nodeGroup cloudprovider.NodeGroup, batchSize int, drain, atomic bool) {
|
||||
ds.Lock()
|
||||
defer ds.Unlock()
|
||||
ds.nodeQueue[nodeGroup.Id()] = append(ds.nodeQueue[nodeGroup.Id()], nodeInfo.Node())
|
||||
if atomic {
|
||||
if ds.failuresForGroup[nodeGroup.Id()] {
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "couldn't scale down other nodes in this node group")}
|
||||
CleanUpAndRecordFailedScaleDownEvent(ds.ctx, nodeInfo.Node(), nodeGroup.Id(), drain, ds.nodeDeletionTracker, "scale down failed for node group as a whole", nodeDeleteResult)
|
||||
delete(ds.nodeQueue, nodeGroup.Id())
|
||||
}
|
||||
if len(ds.nodeQueue[nodeGroup.Id()]) < batchSize {
|
||||
// node group should be scaled down atomically, but not all nodes are ready yet
|
||||
return
|
||||
}
|
||||
}
|
||||
ds.nodeDeletionBatcher.AddNodes(ds.nodeQueue[nodeGroup.Id()], nodeGroup, drain)
|
||||
ds.nodeQueue[nodeGroup.Id()] = []*apiv1.Node{}
|
||||
}
|
||||
|
||||
// RollbackNodeDeletion frees up a node that couldn't be deleted successfully. If it was a part of a group, the same is applied for other nodes queued for deletion.
|
||||
func (ds *GroupDeletionScheduler) RollbackNodeDeletion(node *apiv1.Node, nodeGroupId string, drain bool, errMsg string, result status.NodeDeleteResult) {
|
||||
ds.Lock()
|
||||
defer ds.Unlock()
|
||||
ds.failuresForGroup[nodeGroupId] = true
|
||||
CleanUpAndRecordFailedScaleDownEvent(ds.ctx, node, nodeGroupId, drain, ds.nodeDeletionTracker, errMsg, result)
|
||||
for _, otherNode := range ds.nodeQueue[nodeGroupId] {
|
||||
if otherNode == node {
|
||||
continue
|
||||
}
|
||||
nodeDeleteResult := status.NodeDeleteResult{ResultType: status.NodeDeleteErrorInternal, Err: errors.NewAutoscalerError(errors.InternalError, "couldn't scale down other nodes in this node group")}
|
||||
CleanUpAndRecordFailedScaleDownEvent(ds.ctx, otherNode, nodeGroupId, drain, ds.nodeDeletionTracker, "scale down failed for node group as a whole", nodeDeleteResult)
|
||||
}
|
||||
delete(ds.nodeQueue, nodeGroupId)
|
||||
}
|
||||
|
|
@ -18,11 +18,13 @@ package planner
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/eligibility"
|
||||
|
|
@ -132,6 +134,7 @@ func (p *Planner) CleanUpUnneededNodes() {
|
|||
// NodesToDelete returns all Nodes that could be removed right now, according
|
||||
// to the Planner.
|
||||
func (p *Planner) NodesToDelete(_ time.Time) (empty, needDrain []*apiv1.Node) {
|
||||
empty, needDrain = []*apiv1.Node{}, []*apiv1.Node{}
|
||||
nodes, err := allNodes(p.context.ClusterSnapshot)
|
||||
if err != nil {
|
||||
klog.Errorf("Nothing will scale down, failed to list nodes from ClusterSnapshot: %v", err)
|
||||
|
|
@ -154,7 +157,10 @@ func (p *Planner) NodesToDelete(_ time.Time) (empty, needDrain []*apiv1.Node) {
|
|||
// downs already in progress. If we pass the empty nodes first, they will be first
|
||||
// to get deleted, thus we decrease chances of hitting the limit on non-empty scale down.
|
||||
append(emptyRemovable, needDrainRemovable...),
|
||||
p.context.AutoscalingOptions.MaxScaleDownParallelism)
|
||||
// No need to limit the number of nodes, since it will happen later, in the actuation stage.
|
||||
// It will make a more appropriate decision by using additional information about deletions
|
||||
// in progress.
|
||||
math.MaxInt)
|
||||
for _, nodeToRemove := range nodesToRemove {
|
||||
if len(nodeToRemove.PodsToReschedule) > 0 {
|
||||
needDrain = append(needDrain, nodeToRemove.Node)
|
||||
|
|
@ -162,9 +168,52 @@ func (p *Planner) NodesToDelete(_ time.Time) (empty, needDrain []*apiv1.Node) {
|
|||
empty = append(empty, nodeToRemove.Node)
|
||||
}
|
||||
}
|
||||
|
||||
empty, filteredOut := p.filterOutIncompleteAtomicNodeGroups(empty)
|
||||
needDrain, _ = p.filterOutIncompleteAtomicNodeGroups(append(needDrain, filteredOut...))
|
||||
return empty, needDrain
|
||||
}
|
||||
|
||||
func (p *Planner) filterOutIncompleteAtomicNodeGroups(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
|
||||
nodesByGroup := map[cloudprovider.NodeGroup][]*apiv1.Node{}
|
||||
result := []*apiv1.Node{}
|
||||
filteredOut := []*apiv1.Node{}
|
||||
for _, node := range nodes {
|
||||
nodeGroup, err := p.context.CloudProvider.NodeGroupForNode(node)
|
||||
if err != nil {
|
||||
klog.Errorf("Node %v will not scale down, failed to get node info: %s", node.Name, err)
|
||||
continue
|
||||
}
|
||||
autoscalingOptions, err := nodeGroup.GetOptions(p.context.NodeGroupDefaults)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to get autoscaling options for node group %s: %v", nodeGroup.Id(), err)
|
||||
continue
|
||||
}
|
||||
if autoscalingOptions != nil && autoscalingOptions.AtomicScaleDown {
|
||||
klog.V(2).Infof("Considering node %s for atomic scale down", node.Name)
|
||||
nodesByGroup[nodeGroup] = append(nodesByGroup[nodeGroup], node)
|
||||
} else {
|
||||
klog.V(2).Infof("Considering node %s for standard scale down", node.Name)
|
||||
result = append(result, node)
|
||||
}
|
||||
}
|
||||
for nodeGroup, nodes := range nodesByGroup {
|
||||
ngSize, err := nodeGroup.TargetSize()
|
||||
if err != nil {
|
||||
klog.Errorf("Nodes from group %s will not scale down, failed to get target size: %s", nodeGroup.Id(), err)
|
||||
continue
|
||||
}
|
||||
if ngSize == len(nodes) {
|
||||
klog.V(2).Infof("Scheduling atomic scale down for all %v nodes from node group %s", len(nodes), nodeGroup.Id())
|
||||
result = append(result, nodes...)
|
||||
} else {
|
||||
klog.V(2).Infof("Skipping scale down for %v nodes from node group %s, all %v nodes have to be scaled down atomically", len(nodes), nodeGroup.Id(), ngSize)
|
||||
filteredOut = append(filteredOut, nodes...)
|
||||
}
|
||||
}
|
||||
return result, filteredOut
|
||||
}
|
||||
|
||||
func allNodes(s clustersnapshot.ClusterSnapshot) ([]*apiv1.Node, error) {
|
||||
nodeInfos, err := s.NodeInfos().List()
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -27,9 +27,11 @@ import (
|
|||
policyv1 "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/deletiontracker"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable"
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/core/test"
|
||||
|
|
@ -620,6 +622,195 @@ func TestUpdateClusterStatUnneededNodesLimit(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestNodesToDelete(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
nodes map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved
|
||||
wantEmpty []*apiv1.Node
|
||||
wantDrain []*apiv1.Node
|
||||
}{
|
||||
{
|
||||
name: "empty",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{},
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
},
|
||||
{
|
||||
name: "single empty",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("test-ng", 3, false): {
|
||||
buildRemovableNode("test-node", 0),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{
|
||||
buildRemovableNode("test-node", 0).Node,
|
||||
},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
},
|
||||
{
|
||||
name: "single drain",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("test-ng", 3, false): {
|
||||
buildRemovableNode("test-node", 1),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{
|
||||
buildRemovableNode("test-node", 1).Node,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "single empty atomic",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("atomic-ng", 3, true): {
|
||||
buildRemovableNode("node-1", 0),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
},
|
||||
{
|
||||
name: "all empty atomic",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("atomic-ng", 3, true): {
|
||||
buildRemovableNode("node-1", 0),
|
||||
buildRemovableNode("node-2", 0),
|
||||
buildRemovableNode("node-3", 0),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{
|
||||
buildRemovableNode("node-1", 0).Node,
|
||||
buildRemovableNode("node-2", 0).Node,
|
||||
buildRemovableNode("node-3", 0).Node,
|
||||
},
|
||||
wantDrain: []*apiv1.Node{},
|
||||
},
|
||||
{
|
||||
name: "some drain atomic",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("atomic-ng", 3, true): {
|
||||
buildRemovableNode("node-1", 0),
|
||||
buildRemovableNode("node-2", 0),
|
||||
buildRemovableNode("node-3", 1),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{},
|
||||
wantDrain: []*apiv1.Node{
|
||||
buildRemovableNode("node-1", 0).Node,
|
||||
buildRemovableNode("node-2", 0).Node,
|
||||
buildRemovableNode("node-3", 1).Node,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "different groups",
|
||||
nodes: map[cloudprovider.NodeGroup][]simulator.NodeToBeRemoved{
|
||||
sizedNodeGroup("standard-empty-ng", 3, false): {
|
||||
buildRemovableNode("node-1", 0),
|
||||
buildRemovableNode("node-2", 0),
|
||||
buildRemovableNode("node-3", 0),
|
||||
},
|
||||
sizedNodeGroup("standard-drain-ng", 3, false): {
|
||||
buildRemovableNode("node-4", 1),
|
||||
buildRemovableNode("node-5", 2),
|
||||
buildRemovableNode("node-6", 3),
|
||||
},
|
||||
sizedNodeGroup("standard-mixed-ng", 3, false): {
|
||||
buildRemovableNode("node-7", 0),
|
||||
buildRemovableNode("node-8", 1),
|
||||
buildRemovableNode("node-9", 2),
|
||||
},
|
||||
sizedNodeGroup("atomic-empty-ng", 3, true): {
|
||||
buildRemovableNode("node-10", 0),
|
||||
buildRemovableNode("node-11", 0),
|
||||
buildRemovableNode("node-12", 0),
|
||||
},
|
||||
sizedNodeGroup("atomic-mixed-ng", 3, true): {
|
||||
buildRemovableNode("node-13", 0),
|
||||
buildRemovableNode("node-14", 1),
|
||||
buildRemovableNode("node-15", 2),
|
||||
},
|
||||
sizedNodeGroup("atomic-partial-ng", 3, true): {
|
||||
buildRemovableNode("node-16", 0),
|
||||
buildRemovableNode("node-17", 1),
|
||||
},
|
||||
},
|
||||
wantEmpty: []*apiv1.Node{
|
||||
buildRemovableNode("node-1", 0).Node,
|
||||
buildRemovableNode("node-2", 0).Node,
|
||||
buildRemovableNode("node-3", 0).Node,
|
||||
buildRemovableNode("node-7", 0).Node,
|
||||
buildRemovableNode("node-10", 0).Node,
|
||||
buildRemovableNode("node-11", 0).Node,
|
||||
buildRemovableNode("node-12", 0).Node,
|
||||
},
|
||||
wantDrain: []*apiv1.Node{
|
||||
buildRemovableNode("node-4", 0).Node,
|
||||
buildRemovableNode("node-5", 0).Node,
|
||||
buildRemovableNode("node-6", 0).Node,
|
||||
buildRemovableNode("node-8", 0).Node,
|
||||
buildRemovableNode("node-9", 0).Node,
|
||||
buildRemovableNode("node-13", 0).Node,
|
||||
buildRemovableNode("node-14", 0).Node,
|
||||
buildRemovableNode("node-15", 0).Node,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
allNodes := []*apiv1.Node{}
|
||||
allRemovables := []simulator.NodeToBeRemoved{}
|
||||
for ng, nodes := range tc.nodes {
|
||||
provider.InsertNodeGroup(ng)
|
||||
for _, removable := range nodes {
|
||||
allNodes = append(allNodes, removable.Node)
|
||||
allRemovables = append(allRemovables, removable)
|
||||
provider.AddNode(ng.Id(), removable.Node)
|
||||
}
|
||||
}
|
||||
context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{
|
||||
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
|
||||
ScaleDownUnneededTime: 10 * time.Minute,
|
||||
ScaleDownUnreadyTime: 0 * time.Minute,
|
||||
},
|
||||
}, &fake.Clientset{}, nil, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
clustersnapshot.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, allNodes, nil)
|
||||
deleteOptions := simulator.NodeDeleteOptions{}
|
||||
p := New(&context, NewTestProcessors(&context), deleteOptions)
|
||||
p.latestUpdate = time.Now()
|
||||
p.actuationStatus = deletiontracker.NewNodeDeletionTracker(0 * time.Second)
|
||||
p.unneededNodes.Update(allRemovables, time.Now().Add(-1*time.Hour))
|
||||
p.eligibilityChecker = &fakeEligibilityChecker{eligible: asMap(nodeNames(allNodes))}
|
||||
empty, drain := p.NodesToDelete(time.Now())
|
||||
assert.ElementsMatch(t, tc.wantEmpty, empty)
|
||||
assert.ElementsMatch(t, tc.wantDrain, drain)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func sizedNodeGroup(id string, size int, atomic bool) cloudprovider.NodeGroup {
|
||||
ng := testprovider.NewTestNodeGroup(id, 10000, 0, size, true, false, "n1-standard-2", nil, nil)
|
||||
ng.SetOptions(&config.NodeGroupAutoscalingOptions{
|
||||
AtomicScaleDown: atomic,
|
||||
})
|
||||
return ng
|
||||
}
|
||||
|
||||
func buildRemovableNode(name string, podCount int) simulator.NodeToBeRemoved {
|
||||
podsToReschedule := []*apiv1.Pod{}
|
||||
for i := 0; i < podCount; i++ {
|
||||
podsToReschedule = append(podsToReschedule, &apiv1.Pod{})
|
||||
}
|
||||
return simulator.NodeToBeRemoved{
|
||||
Node: BuildTestNode(name, 1000, 10),
|
||||
PodsToReschedule: podsToReschedule,
|
||||
}
|
||||
}
|
||||
|
||||
func generateReplicaSets(name string, replicas int32) []*appsv1.ReplicaSet {
|
||||
return []*appsv1.ReplicaSet{
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue