Fix bug where a node that becomes ready after 2 mins can be
treated as unready. Deprecated LongNotStarted In cases where node n1 would: 1) Be created at t=0min 2) Ready condition is true at t=2.5min 3) Not ready taint is removed at t=3min the ready node is counted as unready Tested cases after fix: 1) Case described above 2) Nodes not starting even after 15mins still treated as unready 3) Nodes created long ago that suddenly become unready are counted as unready.
This commit is contained in:
parent
bb3b24c1ba
commit
8c592f0c04
|
|
@ -43,10 +43,6 @@ const (
|
|||
// MaxNodeStartupTime is the maximum time from the moment the node is registered to the time the node is ready.
|
||||
MaxNodeStartupTime = 15 * time.Minute
|
||||
|
||||
// MaxStatusSettingDelayAfterCreation is the maximum time for node to set its initial status after the
|
||||
// node is registered.
|
||||
MaxStatusSettingDelayAfterCreation = 2 * time.Minute
|
||||
|
||||
// MaxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.
|
||||
MaxNodeGroupBackoffDuration = 30 * time.Minute
|
||||
|
||||
|
|
@ -399,7 +395,7 @@ func (csr *ClusterStateRegistry) IsNodeGroupHealthy(nodeGroupName string) bool {
|
|||
|
||||
if unjustifiedUnready > csr.config.OkTotalUnreadyCount &&
|
||||
float64(unjustifiedUnready) > csr.config.MaxTotalUnreadyPercentage/100.0*
|
||||
float64(readiness.Ready+readiness.Unready+readiness.NotStarted+readiness.LongNotStarted) {
|
||||
float64(readiness.Ready+readiness.Unready+readiness.NotStarted) {
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
@ -452,7 +448,7 @@ func (csr *ClusterStateRegistry) getProvisionedAndTargetSizesForNodeGroup(nodeGr
|
|||
}
|
||||
return 0, target, true
|
||||
}
|
||||
provisioned = readiness.Registered - readiness.NotStarted - readiness.LongNotStarted
|
||||
provisioned = readiness.Registered - readiness.NotStarted
|
||||
|
||||
return provisioned, target, true
|
||||
}
|
||||
|
|
@ -531,8 +527,6 @@ type Readiness struct {
|
|||
// Number of nodes that are being currently deleted. They exist in K8S but
|
||||
// are not included in NodeGroup.TargetSize().
|
||||
Deleted int
|
||||
// Number of nodes that failed to start within a reasonable limit.
|
||||
LongNotStarted int
|
||||
// Number of nodes that are not yet fully started.
|
||||
NotStarted int
|
||||
// Number of all registered nodes in the group (ready/unready/deleted/etc).
|
||||
|
|
@ -554,12 +548,10 @@ func (csr *ClusterStateRegistry) updateReadinessStats(currentTime time.Time) {
|
|||
current.Registered++
|
||||
if deletetaint.HasToBeDeletedTaint(node) {
|
||||
current.Deleted++
|
||||
} else if stillStarting := isNodeStillStarting(node); stillStarting && node.CreationTimestamp.Time.Add(MaxNodeStartupTime).Before(currentTime) {
|
||||
current.LongNotStarted++
|
||||
} else if stillStarting {
|
||||
current.NotStarted++
|
||||
} else if ready {
|
||||
current.Ready++
|
||||
} else if node.CreationTimestamp.Time.Add(MaxNodeStartupTime).After(currentTime) {
|
||||
current.NotStarted++
|
||||
} else {
|
||||
current.Unready++
|
||||
}
|
||||
|
|
@ -743,11 +735,10 @@ func (csr *ClusterStateRegistry) GetClusterReadiness() Readiness {
|
|||
func buildHealthStatusNodeGroup(isReady bool, readiness Readiness, acceptable AcceptableRange, minSize, maxSize int) api.ClusterAutoscalerCondition {
|
||||
condition := api.ClusterAutoscalerCondition{
|
||||
Type: api.ClusterAutoscalerHealth,
|
||||
Message: fmt.Sprintf("ready=%d unready=%d notStarted=%d longNotStarted=%d registered=%d longUnregistered=%d cloudProviderTarget=%d (minSize=%d, maxSize=%d)",
|
||||
Message: fmt.Sprintf("ready=%d unready=%d notStarted=%d longNotStarted=0 registered=%d longUnregistered=%d cloudProviderTarget=%d (minSize=%d, maxSize=%d)",
|
||||
readiness.Ready,
|
||||
readiness.Unready,
|
||||
readiness.NotStarted,
|
||||
readiness.LongNotStarted,
|
||||
readiness.Registered,
|
||||
readiness.LongUnregistered,
|
||||
acceptable.CurrentTarget,
|
||||
|
|
@ -798,11 +789,10 @@ func buildScaleDownStatusNodeGroup(candidates []string, lastProbed time.Time) ap
|
|||
func buildHealthStatusClusterwide(isReady bool, readiness Readiness) api.ClusterAutoscalerCondition {
|
||||
condition := api.ClusterAutoscalerCondition{
|
||||
Type: api.ClusterAutoscalerHealth,
|
||||
Message: fmt.Sprintf("ready=%d unready=%d notStarted=%d longNotStarted=%d registered=%d longUnregistered=%d",
|
||||
Message: fmt.Sprintf("ready=%d unready=%d notStarted=%d longNotStarted=0 registered=%d longUnregistered=%d",
|
||||
readiness.Ready,
|
||||
readiness.Unready,
|
||||
readiness.NotStarted,
|
||||
readiness.LongNotStarted,
|
||||
readiness.Registered,
|
||||
readiness.LongUnregistered,
|
||||
),
|
||||
|
|
@ -860,39 +850,6 @@ func buildScaleDownStatusClusterwide(candidates map[string][]string, lastProbed
|
|||
return condition
|
||||
}
|
||||
|
||||
func hasTaint(node *apiv1.Node, taintKey string) bool {
|
||||
for _, taint := range node.Spec.Taints {
|
||||
if taint.Key == taintKey {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isNodeStillStarting(node *apiv1.Node) bool {
|
||||
for _, condition := range node.Status.Conditions {
|
||||
if condition.Type == apiv1.NodeReady {
|
||||
notReady := condition.Status != apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeNotReady)
|
||||
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if condition.Type == apiv1.NodeDiskPressure {
|
||||
notReady := condition.Status == apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeDiskPressure)
|
||||
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if condition.Type == apiv1.NodeNetworkUnavailable {
|
||||
notReady := condition.Status == apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeNetworkUnavailable)
|
||||
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func updateLastTransition(oldStatus, newStatus *api.ClusterAutoscalerStatus) {
|
||||
newStatus.ClusterwideConditions = updateLastTransitionSingleList(
|
||||
oldStatus.ClusterwideConditions, newStatus.ClusterwideConditions)
|
||||
|
|
@ -955,7 +912,7 @@ func (csr *ClusterStateRegistry) GetUpcomingNodes() map[string]int {
|
|||
readiness := csr.perNodeGroupReadiness[id]
|
||||
ar := csr.acceptableRanges[id]
|
||||
// newNodes is the number of nodes that
|
||||
newNodes := ar.CurrentTarget - (readiness.Ready + readiness.Unready + readiness.LongNotStarted + readiness.LongUnregistered)
|
||||
newNodes := ar.CurrentTarget - (readiness.Ready + readiness.Unready + readiness.LongUnregistered)
|
||||
if newNodes <= 0 {
|
||||
// Negative value is unlikely but theoretically possible.
|
||||
continue
|
||||
|
|
@ -1006,7 +963,7 @@ func (csr *ClusterStateRegistry) GetAutoscaledNodesCount() (currentSize, targetS
|
|||
targetSize += accRange.CurrentTarget
|
||||
}
|
||||
for _, readiness := range csr.perNodeGroupReadiness {
|
||||
currentSize += readiness.Registered - readiness.NotStarted - readiness.LongNotStarted
|
||||
currentSize += readiness.Registered - readiness.NotStarted
|
||||
}
|
||||
return currentSize, targetSize
|
||||
}
|
||||
|
|
|
|||
|
|
@ -319,6 +319,79 @@ func TestTooManyUnready(t *testing.T) {
|
|||
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
|
||||
}
|
||||
|
||||
func TestUnreadyLongAfterCreation(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
|
||||
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
|
||||
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
|
||||
SetNodeReadyState(ng2_1, false, now.Add(-time.Minute))
|
||||
ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-30 * time.Minute)}
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
provider.AddNodeGroup("ng1", 1, 10, 1)
|
||||
provider.AddNodeGroup("ng2", 1, 10, 1)
|
||||
provider.AddNode("ng1", ng1_1)
|
||||
provider.AddNode("ng2", ng2_1)
|
||||
|
||||
assert.NotNil(t, provider)
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
|
||||
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
|
||||
MaxTotalUnreadyPercentage: 10,
|
||||
OkTotalUnreadyCount: 1,
|
||||
}, fakeLogRecorder, newBackoff())
|
||||
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Unready)
|
||||
assert.Equal(t, 0, clusterstate.GetClusterReadiness().NotStarted)
|
||||
upcoming := clusterstate.GetUpcomingNodes()
|
||||
assert.Equal(t, 0, upcoming["ng1"])
|
||||
}
|
||||
|
||||
func TestNotStarted(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
|
||||
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
|
||||
ng2_1 := BuildTestNode("ng2-1", 1000, 1000)
|
||||
SetNodeReadyState(ng2_1, false, now.Add(-4*time.Minute))
|
||||
SetNodeNotReadyTaint(ng2_1)
|
||||
ng2_1.CreationTimestamp = metav1.Time{Time: now.Add(-10 * time.Minute)}
|
||||
|
||||
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||
provider.AddNodeGroup("ng1", 1, 10, 1)
|
||||
provider.AddNodeGroup("ng2", 1, 10, 1)
|
||||
provider.AddNode("ng1", ng1_1)
|
||||
provider.AddNode("ng2", ng2_1)
|
||||
|
||||
assert.NotNil(t, provider)
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "some-map")
|
||||
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
|
||||
MaxTotalUnreadyPercentage: 10,
|
||||
OkTotalUnreadyCount: 1,
|
||||
}, fakeLogRecorder, newBackoff())
|
||||
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, clusterstate.GetClusterReadiness().NotStarted)
|
||||
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Ready)
|
||||
|
||||
// node ng2_1 moves condition to ready
|
||||
SetNodeReadyState(ng2_1, true, now.Add(-4*time.Minute))
|
||||
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 1, clusterstate.GetClusterReadiness().NotStarted)
|
||||
assert.Equal(t, 1, clusterstate.GetClusterReadiness().Ready)
|
||||
|
||||
// node ng2_1 no longer has the taint
|
||||
RemoveNodeNotReadyTaint(ng2_1)
|
||||
err = clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1}, nil, now)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, 0, clusterstate.GetClusterReadiness().NotStarted)
|
||||
assert.Equal(t, 2, clusterstate.GetClusterReadiness().Ready)
|
||||
}
|
||||
|
||||
func TestExpiredScaleUp(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
|
|
@ -768,57 +841,6 @@ func TestUpdateScaleUp(t *testing.T) {
|
|||
assert.Nil(t, clusterstate.scaleUpRequests["ng1"])
|
||||
}
|
||||
|
||||
func TestIsNodeStillStarting(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
condition apiv1.NodeConditionType
|
||||
status apiv1.ConditionStatus
|
||||
taintKey string
|
||||
expectedResult bool
|
||||
}{
|
||||
{"unready", apiv1.NodeReady, apiv1.ConditionFalse, "", true},
|
||||
{"readiness unknown", apiv1.NodeReady, apiv1.ConditionUnknown, "", true},
|
||||
{"out of disk", apiv1.NodeDiskPressure, apiv1.ConditionTrue, "", true},
|
||||
{"network unavailable", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, "", true},
|
||||
{"started", apiv1.NodeReady, apiv1.ConditionTrue, "", false},
|
||||
{"unready and unready taint", apiv1.NodeReady, apiv1.ConditionFalse, apiv1.TaintNodeNotReady, true},
|
||||
{"readiness unknown and unready taint", apiv1.NodeReady, apiv1.ConditionUnknown, apiv1.TaintNodeNotReady, true},
|
||||
{"disk pressure and disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionTrue, apiv1.TaintNodeDiskPressure, true},
|
||||
{"network unavailable and network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, apiv1.TaintNodeNetworkUnavailable, true},
|
||||
{"ready but unready taint", apiv1.NodeReady, apiv1.ConditionTrue, apiv1.TaintNodeNotReady, true},
|
||||
{"no disk pressure but disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionFalse, apiv1.TaintNodeDiskPressure, true},
|
||||
{"network available but network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionFalse, apiv1.TaintNodeNetworkUnavailable, true},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
createTestNode := func(timeSinceCreation time.Duration) *apiv1.Node {
|
||||
node := BuildTestNode("n1", 1000, 1000)
|
||||
node.CreationTimestamp.Time = time.Time{}
|
||||
testedTime := node.CreationTimestamp.Time.Add(timeSinceCreation)
|
||||
|
||||
SetNodeCondition(node, tc.condition, tc.status, testedTime)
|
||||
|
||||
if tc.taintKey != "" {
|
||||
node.Spec.Taints = []apiv1.Taint{{
|
||||
Key: tc.taintKey,
|
||||
Effect: apiv1.TaintEffectNoSchedule,
|
||||
TimeAdded: &metav1.Time{Time: testedTime},
|
||||
}}
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
t.Run("recent "+tc.desc, func(t *testing.T) {
|
||||
node := createTestNode(1 * time.Minute)
|
||||
assert.Equal(t, tc.expectedResult, isNodeStillStarting(node))
|
||||
})
|
||||
t.Run("long "+tc.desc, func(t *testing.T) {
|
||||
node := createTestNode(30 * time.Minute)
|
||||
// No matter what are the node's conditions, stop considering it not started after long enough.
|
||||
assert.False(t, isNodeStillStarting(node))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestScaleUpFailures(t *testing.T) {
|
||||
now := time.Now()
|
||||
|
||||
|
|
|
|||
|
|
@ -312,7 +312,7 @@ func UpdateClusterStateMetrics(csr *clusterstate.ClusterStateRegistry) {
|
|||
}
|
||||
metrics.UpdateClusterSafeToAutoscale(csr.IsClusterHealthy())
|
||||
readiness := csr.GetClusterReadiness()
|
||||
metrics.UpdateNodesCount(readiness.Ready, readiness.Unready+readiness.LongNotStarted, readiness.NotStarted, readiness.LongUnregistered, readiness.Unregistered)
|
||||
metrics.UpdateNodesCount(readiness.Ready, readiness.Unready, readiness.NotStarted, readiness.LongUnregistered, readiness.Unregistered)
|
||||
}
|
||||
|
||||
// GetOldestCreateTime returns oldest creation time out of the pods in the set
|
||||
|
|
|
|||
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package kubernetes
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
||||
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
|
||||
)
|
||||
|
||||
func TestGetReadiness(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
condition apiv1.NodeConditionType
|
||||
status apiv1.ConditionStatus
|
||||
taintKey string
|
||||
expectedResult bool
|
||||
}{
|
||||
{"ready", apiv1.NodeReady, apiv1.ConditionTrue, "", true},
|
||||
{"unready and unready taint", apiv1.NodeReady, apiv1.ConditionFalse, apiv1.TaintNodeNotReady, false},
|
||||
{"readiness unknown and unready taint", apiv1.NodeReady, apiv1.ConditionUnknown, apiv1.TaintNodeNotReady, false},
|
||||
{"disk pressure and disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionTrue, apiv1.TaintNodeDiskPressure, false},
|
||||
{"network unavailable and network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, apiv1.TaintNodeNetworkUnavailable, false},
|
||||
{"ready but unready taint", apiv1.NodeReady, apiv1.ConditionTrue, apiv1.TaintNodeNotReady, false},
|
||||
{"no disk pressure but disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionFalse, apiv1.TaintNodeDiskPressure, false},
|
||||
{"network available but network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionFalse, apiv1.TaintNodeNetworkUnavailable, false},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
createTestNode := func(timeSinceCreation time.Duration) *apiv1.Node {
|
||||
node := BuildTestNode("n1", 1000, 1000)
|
||||
node.CreationTimestamp.Time = time.Time{}
|
||||
testedTime := node.CreationTimestamp.Time.Add(timeSinceCreation)
|
||||
|
||||
SetNodeCondition(node, tc.condition, tc.status, testedTime)
|
||||
if tc.condition != apiv1.NodeReady {
|
||||
SetNodeCondition(node, apiv1.NodeReady, apiv1.ConditionTrue, testedTime)
|
||||
}
|
||||
|
||||
if tc.taintKey != "" {
|
||||
node.Spec.Taints = []apiv1.Taint{{
|
||||
Key: tc.taintKey,
|
||||
Effect: apiv1.TaintEffectNoSchedule,
|
||||
TimeAdded: &metav1.Time{Time: testedTime},
|
||||
}}
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
node := createTestNode(1 * time.Minute)
|
||||
isReady, _, err := GetReadinessState(node)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tc.expectedResult, isReady)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -162,6 +162,23 @@ func SetNodeReadyState(node *apiv1.Node, ready bool, lastTransition time.Time) {
|
|||
}
|
||||
}
|
||||
|
||||
// SetNodeNotReadyTaint sets the not ready taint on node.
|
||||
func SetNodeNotReadyTaint(node *apiv1.Node) {
|
||||
node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{Key: apiv1.TaintNodeNotReady, Effect: apiv1.TaintEffectNoSchedule})
|
||||
}
|
||||
|
||||
// RemoveNodeNotReadyTaint removes the not ready taint.
|
||||
func RemoveNodeNotReadyTaint(node *apiv1.Node) {
|
||||
var final []apiv1.Taint
|
||||
for i := range node.Spec.Taints {
|
||||
if node.Spec.Taints[i].Key == apiv1.TaintNodeNotReady {
|
||||
continue
|
||||
}
|
||||
final = append(final, node.Spec.Taints[i])
|
||||
}
|
||||
node.Spec.Taints = final
|
||||
}
|
||||
|
||||
// SetNodeCondition sets node condition.
|
||||
func SetNodeCondition(node *apiv1.Node, conditionType apiv1.NodeConditionType, status apiv1.ConditionStatus, lastTransition time.Time) {
|
||||
for i := range node.Status.Conditions {
|
||||
|
|
|
|||
Loading…
Reference in New Issue