Update max unready nodes to 45%

This commit is contained in:
Aleksandra Malinowska 2018-05-17 12:51:45 +02:00
parent 444201d1e7
commit 820f688d2a
3 changed files with 9 additions and 6 deletions

View File

@ -512,9 +512,11 @@ CA, from version 1.0, gives pods at most 10 minutes graceful termination time. I
### How does CA deal with unready nodes?
From 0.5 CA (K8S 1.6) continues to work even if some (up to 33% or not greater than 3,
configurable by `--max-total-unready-percentage` and `--ok-total-unready-count` flags)
percentage of nodes is unavailable. Once there are more unready nodes in the cluster,
From 0.5 CA (K8S 1.6) continues to work even if some nodes are unavailable.
The default number of tolerated unready nodes in CA 1.2.1 or earlier is 33% of total nodes in the cluster or up to 3 nodes, whichever is higher.
For CA 1.2.2 and later, it's 45% or 3 nodes.
This is configurable by `--max-total-unready-percentage` and `--ok-total-unready-count` flags.
Once there are more unready nodes in the cluster,
CA stops all operations until the situation improves. If there are fewer unready nodes,
but they are concentrated in a particular node group,
then this node group may be excluded from future scale-ups.

View File

@ -80,9 +80,10 @@ type ScaleDownRequest struct {
// ClusterStateRegistryConfig contains configuration information for ClusterStateRegistry.
type ClusterStateRegistryConfig struct {
// Maximum percentage of unready nodes in total in, if the number is higher than OkTotalUnreadyCount
// Maximum percentage of unready nodes in total, if the number of unready nodes is higher than OkTotalUnreadyCount.
MaxTotalUnreadyPercentage float64
// Number of nodes that can be unready in total. If the number is higher than that then MaxTotalUnreadyPercentage applies.
// Minimum number of nodes that must be unready for MaxTotalUnreadyPercentage to apply.
// This is to ensure that in very small clusters (e.g. 2 nodes) a single node's failure doesn't disable autoscaling.
OkTotalUnreadyCount int
// Maximum time CA waits for node to be provisioned
MaxNodeProvisionTime time.Duration

View File

@ -113,7 +113,7 @@ var (
"Cloud provider type. Available values: ["+strings.Join(cloudBuilder.AvailableCloudProviders, ",")+"]")
maxEmptyBulkDeleteFlag = flag.Int("max-empty-bulk-delete", 10, "Maximum number of empty nodes that can be deleted at the same time.")
maxGracefulTerminationFlag = flag.Int("max-graceful-termination-sec", 10*60, "Maximum number of seconds CA waits for pod termination when trying to scale down a node.")
maxTotalUnreadyPercentage = flag.Float64("max-total-unready-percentage", 33, "Maximum percentage of unready nodes after which CA halts operations")
maxTotalUnreadyPercentage = flag.Float64("max-total-unready-percentage", 45, "Maximum percentage of unready nodes in the cluster. After this is exceeded, CA halts operations")
okTotalUnreadyCount = flag.Int("ok-total-unready-count", 3, "Number of allowed unready nodes, irrespective of max-total-unready-percentage")
maxNodeProvisionTime = flag.Duration("max-node-provision-time", 15*time.Minute, "Maximum time CA waits for node to be provisioned")