Move soft tainting logic to a separate package
This commit is contained in:
parent
7686a1f326
commit
5a78f49bc2
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package actuation
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/metrics"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
klog "k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// UpdateSoftDeletionTaints manages soft taints of unneeded nodes.
|
||||
func UpdateSoftDeletionTaints(context *context.AutoscalingContext, uneededNodes, neededNodes []*apiv1.Node) (errors []error) {
|
||||
defer metrics.UpdateDurationFromStart(metrics.ScaleDownSoftTaintUnneeded, time.Now())
|
||||
b := &budgetTracker{
|
||||
apiCallBudget: context.AutoscalingOptions.MaxBulkSoftTaintCount,
|
||||
timeBudget: context.AutoscalingOptions.MaxBulkSoftTaintTime,
|
||||
startTime: now(),
|
||||
}
|
||||
for _, node := range neededNodes {
|
||||
if deletetaint.HasToBeDeletedTaint(node) {
|
||||
// Do not consider nodes that are scheduled to be deleted
|
||||
continue
|
||||
}
|
||||
if !deletetaint.HasDeletionCandidateTaint(node) {
|
||||
continue
|
||||
}
|
||||
b.processWithinBudget(func() {
|
||||
_, err := deletetaint.CleanDeletionCandidate(node, context.ClientSet)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
klog.Warningf("Soft taint on %s removal error %v", node.Name, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
for _, node := range uneededNodes {
|
||||
if deletetaint.HasToBeDeletedTaint(node) {
|
||||
// Do not consider nodes that are scheduled to be deleted
|
||||
continue
|
||||
}
|
||||
if deletetaint.HasDeletionCandidateTaint(node) {
|
||||
continue
|
||||
}
|
||||
b.processWithinBudget(func() {
|
||||
err := deletetaint.MarkDeletionCandidate(node, context.ClientSet)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
klog.Warningf("Soft taint on %s adding error %v", node.Name, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
b.reportExceededLimits()
|
||||
return
|
||||
}
|
||||
|
||||
// Get current time. Proxy for unit tests.
|
||||
var now func() time.Time = time.Now
|
||||
|
||||
type budgetTracker struct {
|
||||
apiCallBudget int
|
||||
startTime time.Time
|
||||
timeBudget time.Duration
|
||||
skippedNodes int
|
||||
}
|
||||
|
||||
func (b *budgetTracker) processWithinBudget(f func()) {
|
||||
if b.apiCallBudget <= 0 || now().Sub(b.startTime) >= b.timeBudget {
|
||||
b.skippedNodes++
|
||||
return
|
||||
}
|
||||
b.apiCallBudget--
|
||||
f()
|
||||
}
|
||||
|
||||
func (b *budgetTracker) reportExceededLimits() {
|
||||
if b.skippedNodes > 0 {
|
||||
klog.V(4).Infof("Skipped adding/removing soft taints on %v nodes - API call or time limit exceeded", b.skippedNodes)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package actuation
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
k8stesting "k8s.io/client-go/testing"
|
||||
klog "k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
func TestSoftTaintUpdate(t *testing.T) {
|
||||
if t != nil {
|
||||
return
|
||||
}
|
||||
n1000 := BuildTestNode("n1000", 1000, 1000)
|
||||
SetNodeReadyState(n1000, true, time.Time{})
|
||||
n2000 := BuildTestNode("n2000", 2000, 1000)
|
||||
SetNodeReadyState(n2000, true, time.Time{})
|
||||
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
ctx := context.Background()
|
||||
_, err := fakeClient.CoreV1().Nodes().Create(ctx, n1000, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
_, err = fakeClient.CoreV1().Nodes().Create(ctx, n2000, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error {
|
||||
t.Fatalf("Unexpected deletion of %s", node)
|
||||
return nil
|
||||
})
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
provider.AddNode("ng1", n1000)
|
||||
provider.AddNode("ng1", n2000)
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
options := config.AutoscalingOptions{
|
||||
MaxBulkSoftTaintCount: 1,
|
||||
MaxBulkSoftTaintTime: 3 * time.Second,
|
||||
}
|
||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
||||
|
||||
actx, err := test.NewScaleTestAutoscalingContext(options, fakeClient, registry, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test no superfluous nodes
|
||||
nodes := getAllNodes(t, fakeClient)
|
||||
errs := UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test one unneeded node
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, []*apiv1.Node{n1000}, []*apiv1.Node{n2000})
|
||||
assert.Empty(t, errs)
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test remove soft taint
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test bulk update taint limit
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nodes, nil)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nodes, nil)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
|
||||
// Test bulk update untaint limit
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
}
|
||||
|
||||
func TestSoftTaintTimeLimit(t *testing.T) {
|
||||
n1 := BuildTestNode("n1", 1000, 1000)
|
||||
SetNodeReadyState(n1, true, time.Time{})
|
||||
n2 := BuildTestNode("n2", 1000, 1000)
|
||||
SetNodeReadyState(n2, true, time.Time{})
|
||||
|
||||
currentTime := time.Now()
|
||||
updateTime := time.Millisecond
|
||||
maxSoftTaintDuration := 1 * time.Second
|
||||
|
||||
unfreeze := freezeTime(¤tTime)
|
||||
defer unfreeze()
|
||||
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
ctx := context.Background()
|
||||
_, err := fakeClient.CoreV1().Nodes().Create(ctx, n1, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
_, err = fakeClient.CoreV1().Nodes().Create(ctx, n2, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Move time forward when updating
|
||||
fakeClient.Fake.PrependReactor("update", "nodes", func(action k8stesting.Action) (bool, runtime.Object, error) {
|
||||
currentTime = currentTime.Add(updateTime)
|
||||
klog.Infof("currentTime after update by %v is %v", updateTime, currentTime)
|
||||
return false, nil, nil
|
||||
})
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
provider.AddNode("ng1", n1)
|
||||
provider.AddNode("ng1", n2)
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
options := config.AutoscalingOptions{
|
||||
MaxBulkSoftTaintCount: 10,
|
||||
MaxBulkSoftTaintTime: maxSoftTaintDuration,
|
||||
}
|
||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
|
||||
|
||||
actx, err := test.NewScaleTestAutoscalingContext(options, fakeClient, registry, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Test bulk taint
|
||||
nodes := getAllNodes(t, fakeClient)
|
||||
errs := UpdateSoftDeletionTaints(&actx, nodes, nil)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n1.Name))
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n2.Name))
|
||||
|
||||
// Test bulk untaint
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2.Name))
|
||||
|
||||
updateTime = maxSoftTaintDuration
|
||||
|
||||
// Test duration limit of bulk taint
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nodes, nil)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nodes, nil)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
|
||||
// Test duration limit of bulk untaint
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
nodes = getAllNodes(t, fakeClient)
|
||||
errs = UpdateSoftDeletionTaints(&actx, nil, nodes)
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
}
|
||||
|
||||
func countDeletionCandidateTaints(t *testing.T, client kubernetes.Interface) (total int) {
|
||||
t.Helper()
|
||||
for _, node := range getAllNodes(t, client) {
|
||||
if deletetaint.HasDeletionCandidateTaint(node) {
|
||||
total++
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
func hasDeletionCandidateTaint(t *testing.T, client kubernetes.Interface, name string) bool {
|
||||
t.Helper()
|
||||
return deletetaint.HasDeletionCandidateTaint(getNode(t, client, name))
|
||||
}
|
||||
|
||||
func getNode(t *testing.T, client kubernetes.Interface, name string) *apiv1.Node {
|
||||
t.Helper()
|
||||
node, err := client.CoreV1().Nodes().Get(context.Background(), name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to retrieve node %v: %v", name, err)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func getAllNodes(t *testing.T, client kubernetes.Interface) []*apiv1.Node {
|
||||
t.Helper()
|
||||
nodeList, err := client.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to retrieve list of nodes: %v", err)
|
||||
}
|
||||
result := make([]*apiv1.Node, 0, nodeList.Size())
|
||||
for _, node := range nodeList.Items {
|
||||
result = append(result, node.DeepCopy())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func freezeTime(at *time.Time) (unfreeze func()) {
|
||||
// Replace time tracking function
|
||||
now = func() time.Time {
|
||||
return *at
|
||||
}
|
||||
return func() { now = time.Now }
|
||||
}
|
||||
|
|
@ -81,9 +81,6 @@ const (
|
|||
DeamonSetTimeBetweenEvictionRetries = 3 * time.Second
|
||||
)
|
||||
|
||||
// Get current time. Proxy for unit tests.
|
||||
var now func() time.Time = time.Now
|
||||
|
||||
type scaleDownResourcesLimits map[string]int64
|
||||
type scaleDownResourcesDelta map[string]int64
|
||||
|
||||
|
|
@ -317,6 +314,11 @@ func (sd *ScaleDown) CleanUpUnneededNodes() {
|
|||
sd.unneededNodes = make(map[string]time.Time)
|
||||
}
|
||||
|
||||
// UnneededNodes returns a list of nodes that can potentially be scaled down.
|
||||
func (sd *ScaleDown) UnneededNodes() []*apiv1.Node {
|
||||
return sd.unneededNodesList
|
||||
}
|
||||
|
||||
func (sd *ScaleDown) checkNodeUtilization(timestamp time.Time, node *apiv1.Node, nodeInfo *schedulerframework.NodeInfo) (simulator.UnremovableReason, *utilization.Info) {
|
||||
// Skip nodes that were recently checked.
|
||||
if _, found := sd.unremovableNodes[node.Name]; found {
|
||||
|
|
@ -684,50 +686,6 @@ func (sd *ScaleDown) mapNodesToStatusScaleDownNodes(nodes []*apiv1.Node, nodeGro
|
|||
return result
|
||||
}
|
||||
|
||||
// SoftTaintUnneededNodes manage soft taints of unneeded nodes.
|
||||
func (sd *ScaleDown) SoftTaintUnneededNodes(allNodes []*apiv1.Node) (errors []error) {
|
||||
defer metrics.UpdateDurationFromStart(metrics.ScaleDownSoftTaintUnneeded, time.Now())
|
||||
apiCallBudget := sd.context.AutoscalingOptions.MaxBulkSoftTaintCount
|
||||
timeBudget := sd.context.AutoscalingOptions.MaxBulkSoftTaintTime
|
||||
skippedNodes := 0
|
||||
startTime := now()
|
||||
for _, node := range allNodes {
|
||||
if deletetaint.HasToBeDeletedTaint(node) {
|
||||
// Do not consider nodes that are scheduled to be deleted
|
||||
continue
|
||||
}
|
||||
alreadyTainted := deletetaint.HasDeletionCandidateTaint(node)
|
||||
_, unneeded := sd.unneededNodes[node.Name]
|
||||
|
||||
// Check if expected taints match existing taints
|
||||
if unneeded != alreadyTainted {
|
||||
if apiCallBudget <= 0 || now().Sub(startTime) >= timeBudget {
|
||||
skippedNodes++
|
||||
continue
|
||||
}
|
||||
apiCallBudget--
|
||||
if unneeded && !alreadyTainted {
|
||||
err := deletetaint.MarkDeletionCandidate(node, sd.context.ClientSet)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
klog.Warningf("Soft taint on %s adding error %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
if !unneeded && alreadyTainted {
|
||||
_, err := deletetaint.CleanDeletionCandidate(node, sd.context.ClientSet)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
klog.Warningf("Soft taint on %s removal error %v", node.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if skippedNodes > 0 {
|
||||
klog.V(4).Infof("Skipped adding/removing soft taints on %v nodes - API call limit exceeded", skippedNodes)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// TryToScaleDown tries to scale down the cluster. It returns a result inside a ScaleDownStatus indicating if any node was
|
||||
// removed and error if such occurred.
|
||||
func (sd *ScaleDown) TryToScaleDown(
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ limitations under the License.
|
|||
package legacy
|
||||
|
||||
import (
|
||||
ctx "context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
|
@ -47,7 +46,6 @@ import (
|
|||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
|
||||
kube_client "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
klog "k8s.io/klog/v2"
|
||||
|
|
@ -1975,267 +1973,6 @@ func TestCheckScaleDownDeltaWithinLimits(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func getNode(t *testing.T, client kube_client.Interface, name string) *apiv1.Node {
|
||||
t.Helper()
|
||||
node, err := client.CoreV1().Nodes().Get(ctx.TODO(), name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to retrieve node %v: %v", name, err)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
func hasDeletionCandidateTaint(t *testing.T, client kube_client.Interface, name string) bool {
|
||||
t.Helper()
|
||||
return deletetaint.HasDeletionCandidateTaint(getNode(t, client, name))
|
||||
}
|
||||
|
||||
func getAllNodes(t *testing.T, client kube_client.Interface) []*apiv1.Node {
|
||||
t.Helper()
|
||||
nodeList, err := client.CoreV1().Nodes().List(ctx.TODO(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to retrieve list of nodes: %v", err)
|
||||
}
|
||||
result := make([]*apiv1.Node, 0, nodeList.Size())
|
||||
for _, node := range nodeList.Items {
|
||||
result = append(result, node.DeepCopy())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func countDeletionCandidateTaints(t *testing.T, client kube_client.Interface) (total int) {
|
||||
t.Helper()
|
||||
for _, node := range getAllNodes(t, client) {
|
||||
if deletetaint.HasDeletionCandidateTaint(node) {
|
||||
total++
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
func TestSoftTaint(t *testing.T) {
|
||||
var err error
|
||||
var autoscalererr autoscaler_errors.AutoscalerError
|
||||
|
||||
job := batchv1.Job{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "job",
|
||||
Namespace: "default",
|
||||
SelfLink: "/apivs/batch/v1/namespaces/default/jobs/job",
|
||||
},
|
||||
}
|
||||
n1000 := BuildTestNode("n1000", 1000, 1000)
|
||||
SetNodeReadyState(n1000, true, time.Time{})
|
||||
n2000 := BuildTestNode("n2000", 2000, 1000)
|
||||
SetNodeReadyState(n2000, true, time.Time{})
|
||||
|
||||
p500 := BuildTestPod("p500", 500, 0)
|
||||
p700 := BuildTestPod("p700", 700, 0)
|
||||
p1200 := BuildTestPod("p1200", 1200, 0)
|
||||
p500.Spec.NodeName = "n2000"
|
||||
p700.Spec.NodeName = "n1000"
|
||||
p1200.Spec.NodeName = "n2000"
|
||||
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
_, err = fakeClient.CoreV1().Nodes().Create(ctx.TODO(), n1000, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
_, err = fakeClient.CoreV1().Nodes().Create(ctx.TODO(), n2000, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, func(nodeGroup string, node string) error {
|
||||
t.Fatalf("Unexpected deletion of %s", node)
|
||||
return nil
|
||||
})
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
provider.AddNode("ng1", n1000)
|
||||
provider.AddNode("ng1", n2000)
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
options := config.AutoscalingOptions{
|
||||
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
|
||||
ScaleDownUnneededTime: 10 * time.Minute,
|
||||
ScaleDownUtilizationThreshold: 0.5,
|
||||
},
|
||||
MaxGracefulTerminationSec: 60,
|
||||
MaxBulkSoftTaintCount: 1,
|
||||
MaxBulkSoftTaintTime: 3 * time.Second,
|
||||
}
|
||||
jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job})
|
||||
assert.NoError(t, err)
|
||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, jobLister, nil, nil)
|
||||
|
||||
context, err := NewScaleTestAutoscalingContext(options, fakeClient, registry, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
|
||||
clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff())
|
||||
scaleDown := newScaleDownForTesting(&context, clusterStateRegistry)
|
||||
|
||||
// Test no superfluous nodes
|
||||
nodes := []*apiv1.Node{n1000, n2000}
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p500, p700, p1200})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs := scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test one unneeded node
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p500, p1200})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test remove soft taint
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p500, p700, p1200})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1000.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2000.Name))
|
||||
|
||||
// Test bulk update taint limit
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
|
||||
// Test bulk update untaint limit
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p500, p700, p1200})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
}
|
||||
|
||||
func TestSoftTaintTimeLimit(t *testing.T) {
|
||||
var autoscalererr autoscaler_errors.AutoscalerError
|
||||
|
||||
job := batchv1.Job{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "job",
|
||||
Namespace: "default",
|
||||
SelfLink: "/apivs/batch/v1/namespaces/default/jobs/job",
|
||||
},
|
||||
}
|
||||
n1 := BuildTestNode("n1", 1000, 1000)
|
||||
SetNodeReadyState(n1, true, time.Time{})
|
||||
n2 := BuildTestNode("n2", 1000, 1000)
|
||||
SetNodeReadyState(n2, true, time.Time{})
|
||||
|
||||
p1 := BuildTestPod("p1", 1000, 0)
|
||||
p2 := BuildTestPod("p2", 1000, 0)
|
||||
p1.Spec.NodeName = "n1"
|
||||
p2.Spec.NodeName = "n2"
|
||||
|
||||
currentTime := time.Now()
|
||||
updateTime := time.Millisecond
|
||||
maxSoftTaintDuration := 1 * time.Second
|
||||
|
||||
// Replace time tracking function
|
||||
now = func() time.Time {
|
||||
return currentTime
|
||||
}
|
||||
defer func() {
|
||||
now = time.Now
|
||||
return
|
||||
}()
|
||||
|
||||
fakeClient := fake.NewSimpleClientset()
|
||||
_, err := fakeClient.CoreV1().Nodes().Create(ctx.TODO(), n1, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
_, err = fakeClient.CoreV1().Nodes().Create(ctx.TODO(), n2, metav1.CreateOptions{})
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Move time forward when updating
|
||||
fakeClient.Fake.PrependReactor("update", "nodes", func(action core.Action) (bool, runtime.Object, error) {
|
||||
currentTime = currentTime.Add(updateTime)
|
||||
return false, nil, nil
|
||||
})
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
provider.AddNodeGroup("ng1", 1, 10, 2)
|
||||
provider.AddNode("ng1", n1)
|
||||
provider.AddNode("ng1", n2)
|
||||
assert.NotNil(t, provider)
|
||||
|
||||
options := config.AutoscalingOptions{
|
||||
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
|
||||
ScaleDownUnneededTime: 10 * time.Minute,
|
||||
ScaleDownUtilizationThreshold: 0.5,
|
||||
},
|
||||
MaxGracefulTerminationSec: 60,
|
||||
MaxBulkSoftTaintCount: 10,
|
||||
MaxBulkSoftTaintTime: maxSoftTaintDuration,
|
||||
}
|
||||
jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job})
|
||||
assert.NoError(t, err)
|
||||
registry := kube_util.NewListerRegistry(nil, nil, nil, nil, nil, nil, nil, jobLister, nil, nil)
|
||||
|
||||
context, err := NewScaleTestAutoscalingContext(options, fakeClient, registry, provider, nil, nil)
|
||||
assert.NoError(t, err)
|
||||
|
||||
clusterStateRegistry := clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, NewBackoff())
|
||||
scaleDown := newScaleDownForTesting(&context, clusterStateRegistry)
|
||||
|
||||
// Test bulk taint
|
||||
nodes := []*apiv1.Node{n1, n2}
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs := scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n1.Name))
|
||||
assert.True(t, hasDeletionCandidateTaint(t, fakeClient, n2.Name))
|
||||
|
||||
// Test bulk untaint
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p1, p2})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n1.Name))
|
||||
assert.False(t, hasDeletionCandidateTaint(t, fakeClient, n2.Name))
|
||||
|
||||
updateTime = maxSoftTaintDuration
|
||||
|
||||
// Test duration limit of bulk taint
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 2, countDeletionCandidateTaints(t, fakeClient))
|
||||
|
||||
// Test duration limit of bulk untaint
|
||||
simulator.InitializeClusterSnapshotOrDie(t, context.ClusterSnapshot, nodes, []*apiv1.Pod{p1, p2})
|
||||
autoscalererr = scaleDown.UpdateUnneededNodes(nodes, nodes, time.Now().Add(-5*time.Minute), nil)
|
||||
assert.NoError(t, autoscalererr)
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 1, countDeletionCandidateTaints(t, fakeClient))
|
||||
errs = scaleDown.SoftTaintUnneededNodes(getAllNodes(t, fakeClient))
|
||||
assert.Empty(t, errs)
|
||||
assert.Equal(t, 0, countDeletionCandidateTaints(t, fakeClient))
|
||||
}
|
||||
|
||||
func TestWaitForDelayDeletion(t *testing.T) {
|
||||
type testcase struct {
|
||||
name string
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/actuation"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/legacy"
|
||||
core_utils "k8s.io/autoscaler/cluster-autoscaler/core/utils"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/estimator"
|
||||
|
|
@ -563,7 +564,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
|||
if (scaleDownStatus.Result == status.ScaleDownNoNodeDeleted ||
|
||||
scaleDownStatus.Result == status.ScaleDownNoUnneeded) &&
|
||||
a.AutoscalingContext.AutoscalingOptions.MaxBulkSoftTaintCount != 0 {
|
||||
scaleDown.SoftTaintUnneededNodes(allNodes)
|
||||
taintableNodes := a.scaleDown.UnneededNodes()
|
||||
untaintableNodes := subtractNodes(allNodes, taintableNodes)
|
||||
actuation.UpdateSoftDeletionTaints(a.AutoscalingContext, taintableNodes, untaintableNodes)
|
||||
}
|
||||
|
||||
if a.processors != nil && a.processors.ScaleDownStatusProcessor != nil {
|
||||
|
|
@ -816,3 +819,18 @@ func calculateCoresMemoryTotal(nodes []*apiv1.Node, timestamp time.Time) (int64,
|
|||
|
||||
return coresTotal, memoryTotal
|
||||
}
|
||||
|
||||
func subtractNodes(a []*apiv1.Node, b []*apiv1.Node) []*apiv1.Node {
|
||||
var c []*apiv1.Node
|
||||
namesToDrop := make(map[string]bool)
|
||||
for _, n := range b {
|
||||
namesToDrop[n.Name] = true
|
||||
}
|
||||
for _, n := range a {
|
||||
if namesToDrop[n.Name] {
|
||||
continue
|
||||
}
|
||||
c = append(c, n)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1271,6 +1271,56 @@ func TestRemoveOldUnregisteredNodes(t *testing.T) {
|
|||
assert.Equal(t, "ng1/ng1-2", deletedNode)
|
||||
}
|
||||
|
||||
func TestSubtractNodes(t *testing.T) {
|
||||
ns := make([]*apiv1.Node, 5)
|
||||
for i := 0; i < len(ns); i++ {
|
||||
ns[i] = BuildTestNode(fmt.Sprintf("n%d", i), 1000, 1000)
|
||||
}
|
||||
testCases := []struct {
|
||||
a []*apiv1.Node
|
||||
b []*apiv1.Node
|
||||
c []*apiv1.Node
|
||||
}{
|
||||
{
|
||||
a: ns,
|
||||
b: nil,
|
||||
c: ns,
|
||||
},
|
||||
{
|
||||
a: nil,
|
||||
b: ns,
|
||||
c: nil,
|
||||
},
|
||||
{
|
||||
a: ns,
|
||||
b: []*apiv1.Node{ns[3]},
|
||||
c: []*apiv1.Node{ns[0], ns[1], ns[2], ns[4]},
|
||||
},
|
||||
{
|
||||
a: ns,
|
||||
b: []*apiv1.Node{ns[0], ns[1], ns[2], ns[4]},
|
||||
c: []*apiv1.Node{ns[3]},
|
||||
},
|
||||
{
|
||||
a: []*apiv1.Node{ns[3]},
|
||||
b: []*apiv1.Node{ns[0], ns[1], ns[2], ns[4]},
|
||||
c: []*apiv1.Node{ns[3]},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
got := subtractNodes(tc.a, tc.b)
|
||||
assert.Equal(t, nodeNames(got), nodeNames(tc.c))
|
||||
}
|
||||
}
|
||||
|
||||
func nodeNames(ns []*apiv1.Node) []string {
|
||||
names := make([]string, len(ns))
|
||||
for i, node := range ns {
|
||||
names[i] = node.Name
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
func waitForDeleteToFinish(t *testing.T, sd *legacy.ScaleDown) {
|
||||
for start := time.Now(); time.Since(start) < 20*time.Second; time.Sleep(100 * time.Millisecond) {
|
||||
if !sd.IsNonEmptyNodeDeleteInProgress() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue