Expose backoff time parameters
This commit is contained in:
parent
52a2adebf1
commit
03a0475502
|
|
@ -42,15 +42,6 @@ import (
|
|||
const (
|
||||
// MaxNodeStartupTime is the maximum time from the moment the node is registered to the time the node is ready.
|
||||
MaxNodeStartupTime = 15 * time.Minute
|
||||
|
||||
// MaxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.
|
||||
MaxNodeGroupBackoffDuration = 30 * time.Minute
|
||||
|
||||
// InitialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start.
|
||||
InitialNodeGroupBackoffDuration = 5 * time.Minute
|
||||
|
||||
// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
|
||||
NodeGroupBackoffResetTimeout = 3 * time.Hour
|
||||
)
|
||||
|
||||
// ScaleUpRequest contains information about the requested node group scale up.
|
||||
|
|
|
|||
|
|
@ -653,7 +653,7 @@ func TestUpdateLastTransitionTimes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
expectedNgTimestamps := make(map[string](map[api.ClusterAutoscalerConditionType]metav1.Time), 0)
|
||||
expectedNgTimestamps := make(map[string]map[api.ClusterAutoscalerConditionType]metav1.Time, 0)
|
||||
// Same as cluster-wide
|
||||
expectedNgTimestamps["ng1"] = map[api.ClusterAutoscalerConditionType]metav1.Time{
|
||||
api.ClusterAutoscalerHealth: now,
|
||||
|
|
@ -710,7 +710,7 @@ func TestScaleUpBackoff(t *testing.T) {
|
|||
assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
|
||||
|
||||
// Backoff should expire after timeout
|
||||
now = now.Add(InitialNodeGroupBackoffDuration).Add(time.Second)
|
||||
now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
|
||||
assert.True(t, clusterstate.IsClusterHealthy())
|
||||
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
|
||||
assert.True(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
|
||||
|
|
@ -724,7 +724,7 @@ func TestScaleUpBackoff(t *testing.T) {
|
|||
assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
|
||||
assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
|
||||
|
||||
now = now.Add(InitialNodeGroupBackoffDuration).Add(time.Second)
|
||||
now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
|
||||
assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
|
||||
|
||||
// The backoff should be cleared after a successful scale-up
|
||||
|
|
@ -873,5 +873,6 @@ func TestScaleUpFailures(t *testing.T) {
|
|||
}
|
||||
|
||||
func newBackoff() backoff.Backoff {
|
||||
return backoff.NewIdBasedExponentialBackoff(InitialNodeGroupBackoffDuration, MaxNodeGroupBackoffDuration, NodeGroupBackoffResetTimeout)
|
||||
return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
|
||||
30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,4 +171,10 @@ type AutoscalingOptions struct {
|
|||
DaemonSetEvictionForOccupiedNodes bool
|
||||
// User agent to use for HTTP calls.
|
||||
UserAgent string
|
||||
// InitialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start
|
||||
InitialNodeGroupBackoffDuration time.Duration
|
||||
// MaxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.
|
||||
MaxNodeGroupBackoffDuration time.Duration
|
||||
// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
|
||||
NodeGroupBackoffResetTimeout time.Duration
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ import (
|
|||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||
cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/config"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/context"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
|
||||
|
|
@ -121,7 +120,7 @@ func initializeDefaultOptions(opts *AutoscalerOptions) error {
|
|||
}
|
||||
if opts.Backoff == nil {
|
||||
opts.Backoff =
|
||||
backoff.NewIdBasedExponentialBackoff(clusterstate.InitialNodeGroupBackoffDuration, clusterstate.MaxNodeGroupBackoffDuration, clusterstate.NodeGroupBackoffResetTimeout)
|
||||
backoff.NewIdBasedExponentialBackoff(opts.InitialNodeGroupBackoffDuration, opts.MaxNodeGroupBackoffDuration, opts.NodeGroupBackoffResetTimeout)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import (
|
|||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
|
||||
|
||||
|
|
@ -53,7 +54,6 @@ import (
|
|||
"github.com/stretchr/testify/assert"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
|
||||
kube_client "k8s.io/client-go/kubernetes"
|
||||
kube_record "k8s.io/client-go/tools/record"
|
||||
|
|
@ -296,5 +296,6 @@ func (p *MockAutoprovisioningNodeGroupListProcessor) CleanUp() {
|
|||
|
||||
// NewBackoff creates a new backoff object
|
||||
func NewBackoff() backoff.Backoff {
|
||||
return backoff.NewIdBasedExponentialBackoff(clusterstate.InitialNodeGroupBackoffDuration, clusterstate.MaxNodeGroupBackoffDuration, clusterstate.NodeGroupBackoffResetTimeout)
|
||||
return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
|
||||
30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ import (
|
|||
componentbaseconfig "k8s.io/component-base/config"
|
||||
"k8s.io/component-base/config/options"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
klog "k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2"
|
||||
)
|
||||
|
||||
// MultiStringFlag is a flag for passing multiple parameters using same flag
|
||||
|
|
@ -188,10 +188,16 @@ var (
|
|||
daemonSetEvictionForEmptyNodes = flag.Bool("daemonset-eviction-for-empty-nodes", false, "DaemonSet pods will be gracefully terminated from empty nodes")
|
||||
daemonSetEvictionForOccupiedNodes = flag.Bool("daemonset-eviction-for-occupied-nodes", true, "DaemonSet pods will be gracefully terminated from non-empty nodes")
|
||||
userAgent = flag.String("user-agent", "cluster-autoscaler", "User agent used for HTTP calls.")
|
||||
emitPerNodeGroupMetrics = flag.Bool("emit-per-nodegroup-metrics", false, "If true, emit per node group metrics.")
|
||||
debuggingSnapshotEnabled = flag.Bool("debugging-snapshot-enabled", false, "Whether the debugging snapshot of cluster autoscaler feature is enabled")
|
||||
nodeInfoCacheExpireTime = flag.Duration("node-info-cache-expire-time", 87600*time.Hour, "Node Info cache expire time for each item. Default value is 10 years.")
|
||||
|
||||
emitPerNodeGroupMetrics = flag.Bool("emit-per-nodegroup-metrics", false, "If true, emit per node group metrics.")
|
||||
debuggingSnapshotEnabled = flag.Bool("debugging-snapshot-enabled", false, "Whether the debugging snapshot of cluster autoscaler feature is enabled")
|
||||
nodeInfoCacheExpireTime = flag.Duration("node-info-cache-expire-time", 87600*time.Hour, "Node Info cache expire time for each item. Default value is 10 years.")
|
||||
initialNodeGroupBackoffDuration = flag.Duration("initial-node-group-backoff-duration", 5*time.Minute,
|
||||
"initialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start.")
|
||||
maxNodeGroupBackoffDuration = flag.Duration("max-node-group-backoff-duration", 30*time.Minute,
|
||||
"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
|
||||
nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
|
||||
"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
|
||||
)
|
||||
|
||||
func createAutoscalingOptions() config.AutoscalingOptions {
|
||||
|
|
@ -272,6 +278,9 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
DaemonSetEvictionForEmptyNodes: *daemonSetEvictionForEmptyNodes,
|
||||
DaemonSetEvictionForOccupiedNodes: *daemonSetEvictionForOccupiedNodes,
|
||||
UserAgent: *userAgent,
|
||||
InitialNodeGroupBackoffDuration: *initialNodeGroupBackoffDuration,
|
||||
MaxNodeGroupBackoffDuration: *maxNodeGroupBackoffDuration,
|
||||
NodeGroupBackoffResetTimeout: *nodeGroupBackoffResetTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue