Expose backoff time parameters
This commit is contained in:
		
							parent
							
								
									52a2adebf1
								
							
						
					
					
						commit
						03a0475502
					
				| 
						 | 
				
			
			@ -42,15 +42,6 @@ import (
 | 
			
		|||
const (
 | 
			
		||||
	// MaxNodeStartupTime is the maximum time from the moment the node is registered to the time the node is ready.
 | 
			
		||||
	MaxNodeStartupTime = 15 * time.Minute
 | 
			
		||||
 | 
			
		||||
	// MaxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.
 | 
			
		||||
	MaxNodeGroupBackoffDuration = 30 * time.Minute
 | 
			
		||||
 | 
			
		||||
	// InitialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start.
 | 
			
		||||
	InitialNodeGroupBackoffDuration = 5 * time.Minute
 | 
			
		||||
 | 
			
		||||
	// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
 | 
			
		||||
	NodeGroupBackoffResetTimeout = 3 * time.Hour
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ScaleUpRequest contains information about the requested node group scale up.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -653,7 +653,7 @@ func TestUpdateLastTransitionTimes(t *testing.T) {
 | 
			
		|||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	expectedNgTimestamps := make(map[string](map[api.ClusterAutoscalerConditionType]metav1.Time), 0)
 | 
			
		||||
	expectedNgTimestamps := make(map[string]map[api.ClusterAutoscalerConditionType]metav1.Time, 0)
 | 
			
		||||
	// Same as cluster-wide
 | 
			
		||||
	expectedNgTimestamps["ng1"] = map[api.ClusterAutoscalerConditionType]metav1.Time{
 | 
			
		||||
		api.ClusterAutoscalerHealth:    now,
 | 
			
		||||
| 
						 | 
				
			
			@ -710,7 +710,7 @@ func TestScaleUpBackoff(t *testing.T) {
 | 
			
		|||
	assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
 | 
			
		||||
 | 
			
		||||
	// Backoff should expire after timeout
 | 
			
		||||
	now = now.Add(InitialNodeGroupBackoffDuration).Add(time.Second)
 | 
			
		||||
	now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
 | 
			
		||||
	assert.True(t, clusterstate.IsClusterHealthy())
 | 
			
		||||
	assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
 | 
			
		||||
	assert.True(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
 | 
			
		||||
| 
						 | 
				
			
			@ -724,7 +724,7 @@ func TestScaleUpBackoff(t *testing.T) {
 | 
			
		|||
	assert.True(t, clusterstate.IsNodeGroupHealthy("ng1"))
 | 
			
		||||
	assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
 | 
			
		||||
 | 
			
		||||
	now = now.Add(InitialNodeGroupBackoffDuration).Add(time.Second)
 | 
			
		||||
	now = now.Add(5 * time.Minute /*InitialNodeGroupBackoffDuration*/).Add(time.Second)
 | 
			
		||||
	assert.False(t, clusterstate.IsNodeGroupSafeToScaleUp(ng1, now))
 | 
			
		||||
 | 
			
		||||
	// The backoff should be cleared after a successful scale-up
 | 
			
		||||
| 
						 | 
				
			
			@ -873,5 +873,6 @@ func TestScaleUpFailures(t *testing.T) {
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
func newBackoff() backoff.Backoff {
 | 
			
		||||
	return backoff.NewIdBasedExponentialBackoff(InitialNodeGroupBackoffDuration, MaxNodeGroupBackoffDuration, NodeGroupBackoffResetTimeout)
 | 
			
		||||
	return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
 | 
			
		||||
		30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -171,4 +171,10 @@ type AutoscalingOptions struct {
 | 
			
		|||
	DaemonSetEvictionForOccupiedNodes bool
 | 
			
		||||
	// User agent to use for HTTP calls.
 | 
			
		||||
	UserAgent string
 | 
			
		||||
	// InitialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start
 | 
			
		||||
	InitialNodeGroupBackoffDuration time.Duration
 | 
			
		||||
	// MaxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.
 | 
			
		||||
	MaxNodeGroupBackoffDuration time.Duration
 | 
			
		||||
	// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
 | 
			
		||||
	NodeGroupBackoffResetTimeout time.Duration
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,7 +22,6 @@ import (
 | 
			
		|||
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 | 
			
		||||
	cloudBuilder "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/builder"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/config"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/context"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
 | 
			
		||||
| 
						 | 
				
			
			@ -121,7 +120,7 @@ func initializeDefaultOptions(opts *AutoscalerOptions) error {
 | 
			
		|||
	}
 | 
			
		||||
	if opts.Backoff == nil {
 | 
			
		||||
		opts.Backoff =
 | 
			
		||||
			backoff.NewIdBasedExponentialBackoff(clusterstate.InitialNodeGroupBackoffDuration, clusterstate.MaxNodeGroupBackoffDuration, clusterstate.NodeGroupBackoffResetTimeout)
 | 
			
		||||
			backoff.NewIdBasedExponentialBackoff(opts.InitialNodeGroupBackoffDuration, opts.MaxNodeGroupBackoffDuration, opts.NodeGroupBackoffResetTimeout)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,7 @@ import (
 | 
			
		|||
	"fmt"
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"testing"
 | 
			
		||||
	"time"
 | 
			
		||||
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/debuggingsnapshot"
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,7 +54,6 @@ import (
 | 
			
		|||
	"github.com/stretchr/testify/assert"
 | 
			
		||||
 | 
			
		||||
	apiv1 "k8s.io/api/core/v1"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/clusterstate"
 | 
			
		||||
	"k8s.io/autoscaler/cluster-autoscaler/utils/backoff"
 | 
			
		||||
	kube_client "k8s.io/client-go/kubernetes"
 | 
			
		||||
	kube_record "k8s.io/client-go/tools/record"
 | 
			
		||||
| 
						 | 
				
			
			@ -296,5 +296,6 @@ func (p *MockAutoprovisioningNodeGroupListProcessor) CleanUp() {
 | 
			
		|||
 | 
			
		||||
// NewBackoff creates a new backoff object
 | 
			
		||||
func NewBackoff() backoff.Backoff {
 | 
			
		||||
	return backoff.NewIdBasedExponentialBackoff(clusterstate.InitialNodeGroupBackoffDuration, clusterstate.MaxNodeGroupBackoffDuration, clusterstate.NodeGroupBackoffResetTimeout)
 | 
			
		||||
	return backoff.NewIdBasedExponentialBackoff(5*time.Minute, /*InitialNodeGroupBackoffDuration*/
 | 
			
		||||
		30*time.Minute /*MaxNodeGroupBackoffDuration*/, 3*time.Hour /*NodeGroupBackoffResetTimeout*/)
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -62,7 +62,7 @@ import (
 | 
			
		|||
	componentbaseconfig "k8s.io/component-base/config"
 | 
			
		||||
	"k8s.io/component-base/config/options"
 | 
			
		||||
	"k8s.io/component-base/metrics/legacyregistry"
 | 
			
		||||
	klog "k8s.io/klog/v2"
 | 
			
		||||
	"k8s.io/klog/v2"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// MultiStringFlag is a flag for passing multiple parameters using same flag
 | 
			
		||||
| 
						 | 
				
			
			@ -188,10 +188,16 @@ var (
 | 
			
		|||
	daemonSetEvictionForEmptyNodes     = flag.Bool("daemonset-eviction-for-empty-nodes", false, "DaemonSet pods will be gracefully terminated from empty nodes")
 | 
			
		||||
	daemonSetEvictionForOccupiedNodes  = flag.Bool("daemonset-eviction-for-occupied-nodes", true, "DaemonSet pods will be gracefully terminated from non-empty nodes")
 | 
			
		||||
	userAgent                          = flag.String("user-agent", "cluster-autoscaler", "User agent used for HTTP calls.")
 | 
			
		||||
	emitPerNodeGroupMetrics            = flag.Bool("emit-per-nodegroup-metrics", false, "If true, emit per node group metrics.")
 | 
			
		||||
	debuggingSnapshotEnabled           = flag.Bool("debugging-snapshot-enabled", false, "Whether the debugging snapshot of cluster autoscaler feature is enabled")
 | 
			
		||||
	nodeInfoCacheExpireTime            = flag.Duration("node-info-cache-expire-time", 87600*time.Hour, "Node Info cache expire time for each item. Default value is 10 years.")
 | 
			
		||||
 | 
			
		||||
	emitPerNodeGroupMetrics  = flag.Bool("emit-per-nodegroup-metrics", false, "If true, emit per node group metrics.")
 | 
			
		||||
	debuggingSnapshotEnabled = flag.Bool("debugging-snapshot-enabled", false, "Whether the debugging snapshot of cluster autoscaler feature is enabled")
 | 
			
		||||
	nodeInfoCacheExpireTime  = flag.Duration("node-info-cache-expire-time", 87600*time.Hour, "Node Info cache expire time for each item. Default value is 10 years.")
 | 
			
		||||
	initialNodeGroupBackoffDuration = flag.Duration("initial-node-group-backoff-duration", 5*time.Minute,
 | 
			
		||||
		"initialNodeGroupBackoffDuration is the duration of first backoff after a new node failed to start.")
 | 
			
		||||
	maxNodeGroupBackoffDuration = flag.Duration("max-node-group-backoff-duration", 30*time.Minute,
 | 
			
		||||
		"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
 | 
			
		||||
	nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
 | 
			
		||||
		"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func createAutoscalingOptions() config.AutoscalingOptions {
 | 
			
		||||
| 
						 | 
				
			
			@ -272,6 +278,9 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 | 
			
		|||
		DaemonSetEvictionForEmptyNodes:     *daemonSetEvictionForEmptyNodes,
 | 
			
		||||
		DaemonSetEvictionForOccupiedNodes:  *daemonSetEvictionForOccupiedNodes,
 | 
			
		||||
		UserAgent:                          *userAgent,
 | 
			
		||||
		InitialNodeGroupBackoffDuration:    *initialNodeGroupBackoffDuration,
 | 
			
		||||
		MaxNodeGroupBackoffDuration:        *maxNodeGroupBackoffDuration,
 | 
			
		||||
		NodeGroupBackoffResetTimeout:       *nodeGroupBackoffResetTimeout,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue