Update max unready nodes to 45%
This commit is contained in:
parent
444201d1e7
commit
820f688d2a
|
|
@ -512,9 +512,11 @@ CA, from version 1.0, gives pods at most 10 minutes graceful termination time. I
|
|||
|
||||
### How does CA deal with unready nodes?
|
||||
|
||||
From 0.5 CA (K8S 1.6) continues to work even if some (up to 33% or not greater than 3,
|
||||
configurable by `--max-total-unready-percentage` and `--ok-total-unready-count` flags)
|
||||
percentage of nodes is unavailable. Once there are more unready nodes in the cluster,
|
||||
From 0.5 CA (K8S 1.6) continues to work even if some nodes are unavailable.
|
||||
The default number of tolerated unready nodes in CA 1.2.1 or earlier is 33% of total nodes in the cluster or up to 3 nodes, whichever is higher.
|
||||
For CA 1.2.2 and later, it's 45% or 3 nodes.
|
||||
This is configurable by `--max-total-unready-percentage` and `--ok-total-unready-count` flags.
|
||||
Once there are more unready nodes in the cluster,
|
||||
CA stops all operations until the situation improves. If there are fewer unready nodes,
|
||||
but they are concentrated in a particular node group,
|
||||
then this node group may be excluded from future scale-ups.
|
||||
|
|
|
|||
|
|
@ -80,9 +80,10 @@ type ScaleDownRequest struct {
|
|||
|
||||
// ClusterStateRegistryConfig contains configuration information for ClusterStateRegistry.
|
||||
type ClusterStateRegistryConfig struct {
|
||||
// Maximum percentage of unready nodes in total in, if the number is higher than OkTotalUnreadyCount
|
||||
// Maximum percentage of unready nodes in total, if the number of unready nodes is higher than OkTotalUnreadyCount.
|
||||
MaxTotalUnreadyPercentage float64
|
||||
// Number of nodes that can be unready in total. If the number is higher than that then MaxTotalUnreadyPercentage applies.
|
||||
// Minimum number of nodes that must be unready for MaxTotalUnreadyPercentage to apply.
|
||||
// This is to ensure that in very small clusters (e.g. 2 nodes) a single node's failure doesn't disable autoscaling.
|
||||
OkTotalUnreadyCount int
|
||||
// Maximum time CA waits for node to be provisioned
|
||||
MaxNodeProvisionTime time.Duration
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ var (
|
|||
"Cloud provider type. Available values: ["+strings.Join(cloudBuilder.AvailableCloudProviders, ",")+"]")
|
||||
maxEmptyBulkDeleteFlag = flag.Int("max-empty-bulk-delete", 10, "Maximum number of empty nodes that can be deleted at the same time.")
|
||||
maxGracefulTerminationFlag = flag.Int("max-graceful-termination-sec", 10*60, "Maximum number of seconds CA waits for pod termination when trying to scale down a node.")
|
||||
maxTotalUnreadyPercentage = flag.Float64("max-total-unready-percentage", 33, "Maximum percentage of unready nodes after which CA halts operations")
|
||||
maxTotalUnreadyPercentage = flag.Float64("max-total-unready-percentage", 45, "Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations")
|
||||
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
|
||||
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue