Merge pull request #297 from bskiba/additional-k
Only consider up to 10% of the nodes as additional candidates for scale down
This commit is contained in:
commit
f9cabf3a1a
|
|
@ -99,6 +99,16 @@ type AutoscalingOptions struct {
|
||||||
// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
|
// ScaleDownNonEmptyCandidatesCount is the maximum number of non empty nodes
|
||||||
// considered at once as candidates for scale down.
|
// considered at once as candidates for scale down.
|
||||||
ScaleDownNonEmptyCandidatesCount int
|
ScaleDownNonEmptyCandidatesCount int
|
||||||
|
// ScaleDownCandidatesPoolRatio is a ratio of nodes that are considered
|
||||||
|
// as additional non empty candidates for scale down when some candidates from
|
||||||
|
// previous iteration are no longer valid.
|
||||||
|
ScaleDownCandidatesPoolRatio float64
|
||||||
|
// ScaleDownCandidatesPoolMinCount is the minimum number of nodes that are
|
||||||
|
// considered as additional non empty candidates for scale down when some
|
||||||
|
// candidates from previous iteration are no longer valid.
|
||||||
|
// The formula to calculate additional candidates number is following:
|
||||||
|
// max(#nodes * ScaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolMinCount)
|
||||||
|
ScaleDownCandidatesPoolMinCount int
|
||||||
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
|
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
|
||||||
WriteStatusConfigMap bool
|
WriteStatusConfigMap bool
|
||||||
// BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them.
|
// BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them.
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
@ -219,15 +220,23 @@ func (sd *ScaleDown) UpdateUnneededNodes(
|
||||||
return sd.markSimulationError(simulatorErr, timestamp)
|
return sd.markSimulationError(simulatorErr, timestamp)
|
||||||
}
|
}
|
||||||
|
|
||||||
additionalCandidatesCount := sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove)
|
additionalCandidatesCount := sd.context.ScaleDownNonEmptyCandidatesCount - len(nodesToRemove)
|
||||||
if additionalCandidatesCount > len(currentNonCandidates) {
|
if additionalCandidatesCount > len(currentNonCandidates) {
|
||||||
additionalCandidatesCount = len(currentNonCandidates)
|
additionalCandidatesCount = len(currentNonCandidates)
|
||||||
}
|
}
|
||||||
|
// Limit the additional candidates pool size for better performance.
|
||||||
|
additionalCandidatesPoolSize := int(math.Ceil(float64(len(nodes)) * sd.context.ScaleDownCandidatesPoolRatio))
|
||||||
|
if additionalCandidatesPoolSize < sd.context.ScaleDownCandidatesPoolMinCount {
|
||||||
|
additionalCandidatesPoolSize = sd.context.ScaleDownCandidatesPoolMinCount
|
||||||
|
}
|
||||||
|
if additionalCandidatesPoolSize > len(currentNonCandidates) {
|
||||||
|
additionalCandidatesPoolSize = len(currentNonCandidates)
|
||||||
|
}
|
||||||
if additionalCandidatesCount > 0 {
|
if additionalCandidatesCount > 0 {
|
||||||
// Look for addidtional nodes to remove among the rest of nodes
|
// Look for addidtional nodes to remove among the rest of nodes
|
||||||
glog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount)
|
glog.V(3).Infof("Finding additional %v candidates for scale down.", additionalCandidatesCount)
|
||||||
additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr :=
|
additionalNodesToRemove, additionalUnremovable, additionalNewHints, simulatorErr :=
|
||||||
simulator.FindNodesToRemove(currentNonCandidates, nodes, pods, nil,
|
simulator.FindNodesToRemove(currentNonCandidates[:additionalCandidatesPoolSize], nodes, pods, nil,
|
||||||
sd.context.PredicateChecker, additionalCandidatesCount, true,
|
sd.context.PredicateChecker, additionalCandidatesCount, true,
|
||||||
sd.podLocationHints, sd.usageTracker, timestamp, pdbs)
|
sd.podLocationHints, sd.usageTracker, timestamp, pdbs)
|
||||||
if simulatorErr != nil {
|
if simulatorErr != nil {
|
||||||
|
|
@ -290,7 +299,7 @@ func (sd *ScaleDown) markSimulationError(simulatorErr errors.AutoscalerError,
|
||||||
func (sd *ScaleDown) chooseCandidates(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
|
func (sd *ScaleDown) chooseCandidates(nodes []*apiv1.Node) ([]*apiv1.Node, []*apiv1.Node) {
|
||||||
// Number of candidates should not be capped. We will look for nodes to remove
|
// Number of candidates should not be capped. We will look for nodes to remove
|
||||||
// from the whole set of nodes.
|
// from the whole set of nodes.
|
||||||
if sd.context.AutoscalingOptions.ScaleDownNonEmptyCandidatesCount <= 0 {
|
if sd.context.ScaleDownNonEmptyCandidatesCount <= 0 {
|
||||||
return nodes, []*apiv1.Node{}
|
return nodes, []*apiv1.Node{}
|
||||||
}
|
}
|
||||||
currentCandidates := make([]*apiv1.Node, 0, len(sd.unneededNodesList))
|
currentCandidates := make([]*apiv1.Node, 0, len(sd.unneededNodesList))
|
||||||
|
|
|
||||||
|
|
@ -165,6 +165,8 @@ func TestFindUnneededMaxCandidates(t *testing.T) {
|
||||||
AutoscalingOptions: AutoscalingOptions{
|
AutoscalingOptions: AutoscalingOptions{
|
||||||
ScaleDownUtilizationThreshold: 0.35,
|
ScaleDownUtilizationThreshold: 0.35,
|
||||||
ScaleDownNonEmptyCandidatesCount: numCandidates,
|
ScaleDownNonEmptyCandidatesCount: numCandidates,
|
||||||
|
ScaleDownCandidatesPoolRatio: 1,
|
||||||
|
ScaleDownCandidatesPoolMinCount: 1000,
|
||||||
},
|
},
|
||||||
ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder),
|
ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder),
|
||||||
PredicateChecker: simulator.NewTestPredicateChecker(),
|
PredicateChecker: simulator.NewTestPredicateChecker(),
|
||||||
|
|
@ -199,6 +201,53 @@ func TestFindUnneededMaxCandidates(t *testing.T) {
|
||||||
assert.NotContains(t, sd.unneededNodes, deleted)
|
assert.NotContains(t, sd.unneededNodes, deleted)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFindUnneededAdditionalNodePool(t *testing.T) {
|
||||||
|
provider := testprovider.NewTestCloudProvider(nil, nil)
|
||||||
|
provider.AddNodeGroup("ng1", 1, 100, 2)
|
||||||
|
|
||||||
|
numNodes := 100
|
||||||
|
nodes := make([]*apiv1.Node, 0, numNodes)
|
||||||
|
for i := 0; i < numNodes; i++ {
|
||||||
|
n := BuildTestNode(fmt.Sprintf("n%v", i), 1000, 10)
|
||||||
|
SetNodeReadyState(n, true, time.Time{})
|
||||||
|
provider.AddNode("ng1", n)
|
||||||
|
nodes = append(nodes, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// shared owner reference
|
||||||
|
ownerRef := GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", "")
|
||||||
|
|
||||||
|
pods := make([]*apiv1.Pod, 0, numNodes)
|
||||||
|
for i := 0; i < numNodes; i++ {
|
||||||
|
p := BuildTestPod(fmt.Sprintf("p%v", i), 100, 0)
|
||||||
|
p.Spec.NodeName = fmt.Sprintf("n%v", i)
|
||||||
|
p.OwnerReferences = ownerRef
|
||||||
|
pods = append(pods, p)
|
||||||
|
}
|
||||||
|
|
||||||
|
fakeClient := &fake.Clientset{}
|
||||||
|
fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
|
||||||
|
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false)
|
||||||
|
|
||||||
|
numCandidates := 30
|
||||||
|
|
||||||
|
context := AutoscalingContext{
|
||||||
|
AutoscalingOptions: AutoscalingOptions{
|
||||||
|
ScaleDownUtilizationThreshold: 0.35,
|
||||||
|
ScaleDownNonEmptyCandidatesCount: numCandidates,
|
||||||
|
ScaleDownCandidatesPoolRatio: 0.1,
|
||||||
|
ScaleDownCandidatesPoolMinCount: 10,
|
||||||
|
},
|
||||||
|
ClusterStateRegistry: clusterstate.NewClusterStateRegistry(provider, clusterstate.ClusterStateRegistryConfig{}, fakeLogRecorder),
|
||||||
|
PredicateChecker: simulator.NewTestPredicateChecker(),
|
||||||
|
LogRecorder: fakeLogRecorder,
|
||||||
|
}
|
||||||
|
sd := NewScaleDown(&context)
|
||||||
|
|
||||||
|
sd.UpdateUnneededNodes(nodes, nodes, pods, time.Now(), nil)
|
||||||
|
assert.NotEmpty(t, sd.unneededNodes)
|
||||||
|
}
|
||||||
|
|
||||||
func TestDrainNode(t *testing.T) {
|
func TestDrainNode(t *testing.T) {
|
||||||
deletedPods := make(chan string, 10)
|
deletedPods := make(chan string, 10)
|
||||||
updatedNodes := make(chan string, 10)
|
updatedNodes := make(chan string, 10)
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,17 @@ var (
|
||||||
"Lower value means better CA responsiveness but possible slower scale down latency."+
|
"Lower value means better CA responsiveness but possible slower scale down latency."+
|
||||||
"Higher value can affect CA performance with big clusters (hundreds of nodes)."+
|
"Higher value can affect CA performance with big clusters (hundreds of nodes)."+
|
||||||
"Set to non posistive value to turn this heuristic off - CA will not limit the number of nodes it considers.")
|
"Set to non posistive value to turn this heuristic off - CA will not limit the number of nodes it considers.")
|
||||||
|
scaleDownCandidatesPoolRatio = flag.Float64("scale-down-candidates-pool-ratio", 0.1,
|
||||||
|
"A ratio of nodes that are considered as additional non empty candidates for"+
|
||||||
|
"scale down when some candidates from previous iteration are no longer valid."+
|
||||||
|
"Lower value means better CA responsiveness but possible slower scale down latency."+
|
||||||
|
"Higher value can affect CA performance with big clusters (hundreds of nodes)."+
|
||||||
|
"Set to 1.0 to turn this heuristics off - CA will take all nodes as additional candidates.")
|
||||||
|
scaleDownCandidatesPoolMinCount = flag.Int("scale-down-candidates-pool-min-count", 50,
|
||||||
|
"Minimum number of nodes that are considered as additional non empty candidates"+
|
||||||
|
"for scale down when some candidates from previous iteration are no longer valid."+
|
||||||
|
"When calculating the pool size for additional candidates we take"+
|
||||||
|
"max(#nodes * scale-down-candidates-pool-ratio, scale-down-candidates-pool-min-count).")
|
||||||
scanInterval = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down")
|
scanInterval = flag.Duration("scan-interval", 10*time.Second, "How often cluster is reevaluated for scale up or down")
|
||||||
maxNodesTotal = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.")
|
maxNodesTotal = flag.Int("max-nodes-total", 0, "Maximum number of nodes in all node groups. Cluster autoscaler will not grow the cluster beyond this number.")
|
||||||
cloudProviderFlag = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce, aws, kubemark")
|
cloudProviderFlag = flag.String("cloud-provider", "gce", "Cloud provider type. Allowed values: gce, aws, kubemark")
|
||||||
|
|
@ -134,6 +145,8 @@ func createAutoscalerOptions() core.AutoscalerOptions {
|
||||||
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
|
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
|
||||||
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
|
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
|
||||||
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
|
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
|
||||||
|
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
|
||||||
|
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
|
||||||
WriteStatusConfigMap: *writeStatusConfigMapFlag,
|
WriteStatusConfigMap: *writeStatusConfigMapFlag,
|
||||||
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
|
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
|
||||||
ConfigNamespace: *namespace,
|
ConfigNamespace: *namespace,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue