add flags to ignore daemonsets and mirror pods when calculating resource utilization of a node
Adds the flag --ignore-daemonsets-utilization and --ignore-mirror-pods-utilization (defaults to false) and when enabled, factors DaemonSet and mirror pods out when calculating the resource utilization of a node.
This commit is contained in:
parent
d4a6664b38
commit
4ae7acbacc
|
|
@ -60,6 +60,10 @@ type AutoscalingOptions struct {
|
|||
EstimatorName string
|
||||
// ExpanderName sets the type of node group expander to be used in scale up
|
||||
ExpanderName string
|
||||
// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
|
||||
IgnoreDaemonSetsUtilization bool
|
||||
// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
|
||||
IgnoreMirrorPodsUtilization bool
|
||||
// MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before
|
||||
// removing the node from cloud provider.
|
||||
MaxGracefulTerminationSec int
|
||||
|
|
|
|||
|
|
@ -399,7 +399,7 @@ func (sd *ScaleDown) UpdateUnneededNodes(
|
|||
glog.Errorf("Node info for %s not found", node.Name)
|
||||
continue
|
||||
}
|
||||
utilInfo, err := simulator.CalculateUtilization(node, nodeInfo)
|
||||
utilInfo, err := simulator.CalculateUtilization(node, nodeInfo, sd.context.IgnoreDaemonSetsUtilization, sd.context.IgnoreMirrorPodsUtilization)
|
||||
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)
|
||||
|
|
|
|||
|
|
@ -140,6 +140,11 @@ var (
|
|||
expanderFlag = flag.String("expander", expander.RandomExpanderName,
|
||||
"Type of node group expander to be used in scale up. Available values: ["+strings.Join(expander.AvailableExpanders, ",")+"]")
|
||||
|
||||
ignoreDaemonSetsUtilization = flag.Bool("ignore-daemonsets-utilization", false,
|
||||
"Should CA ignore DaemonSet pods when calculating resource utilization for scaling down")
|
||||
ignoreMirrorPodsUtilization = flag.Bool("ignore-mirror-pods-utilization", false,
|
||||
"Should CA ignore Mirror pods when calculating resource utilization for scaling down")
|
||||
|
||||
writeStatusConfigMapFlag = flag.Bool("write-status-configmap", true, "Should CA write status information to a configmap")
|
||||
maxInactivityTimeFlag = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart")
|
||||
maxFailingTimeFlag = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart")
|
||||
|
|
@ -179,6 +184,8 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
OkTotalUnreadyCount: *okTotalUnreadyCount,
|
||||
EstimatorName: *estimatorFlag,
|
||||
ExpanderName: *expanderFlag,
|
||||
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
|
||||
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
|
||||
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
|
||||
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
|
||||
MaxNodeProvisionTime: *maxNodeProvisionTime,
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import (
|
|||
"math/rand"
|
||||
"time"
|
||||
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/glogx"
|
||||
scheduler_util "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
|
||||
|
|
@ -151,19 +152,19 @@ func FindEmptyNodesToRemove(candidates []*apiv1.Node, pods []*apiv1.Pod) []*apiv
|
|||
// CalculateUtilization calculates utilization of a node, defined as maximum of (cpu, memory) utilization.
|
||||
// Per resource utilization is the sum of requests for it divided by allocatable. It also returns the individual
|
||||
// cpu and memory utilization.
|
||||
func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (utilInfo UtilizationInfo, err error) {
|
||||
cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU)
|
||||
func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, skipDaemonSetPods, skipMirrorPods bool) (utilInfo UtilizationInfo, err error) {
|
||||
cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU, skipDaemonSetPods, skipMirrorPods)
|
||||
if err != nil {
|
||||
return UtilizationInfo{}, err
|
||||
}
|
||||
mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory)
|
||||
mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory, skipDaemonSetPods, skipMirrorPods)
|
||||
if err != nil {
|
||||
return UtilizationInfo{}, err
|
||||
}
|
||||
return UtilizationInfo{CpuUtil: cpu, MemUtil: mem, Utilization: math.Max(cpu, mem)}, nil
|
||||
}
|
||||
|
||||
func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName) (float64, error) {
|
||||
func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName, skipDaemonSetPods, skipMirrorPods bool) (float64, error) {
|
||||
nodeAllocatable, found := node.Status.Allocatable[resourceName]
|
||||
if !found {
|
||||
return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name)
|
||||
|
|
@ -173,6 +174,14 @@ func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.N
|
|||
}
|
||||
podsRequest := resource.MustParse("0")
|
||||
for _, pod := range nodeInfo.Pods() {
|
||||
// factor daemonset pods out of the utilization calculations
|
||||
if skipDaemonSetPods && isDaemonSet(pod) {
|
||||
continue
|
||||
}
|
||||
// factor mirror pods out of the utilization calculations
|
||||
if skipMirrorPods && drain.IsMirrorPod(pod) {
|
||||
continue
|
||||
}
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if resourceValue, found := container.Resources.Requests[resourceName]; found {
|
||||
podsRequest.Add(resourceValue)
|
||||
|
|
@ -283,3 +292,12 @@ func shuffleNodes(nodes []*apiv1.Node) []*apiv1.Node {
|
|||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func isDaemonSet(pod *apiv1.Pod) bool {
|
||||
for _, ownerReference := range pod.ObjectMeta.OwnerReferences {
|
||||
if ownerReference.Kind == "DaemonSet" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,14 +38,42 @@ func TestUtilization(t *testing.T) {
|
|||
node := BuildTestNode("node1", 2000, 2000000)
|
||||
SetNodeReadyState(node, true, time.Time{})
|
||||
|
||||
utilInfo, err := CalculateUtilization(node, nodeInfo)
|
||||
utilInfo, err := CalculateUtilization(node, nodeInfo, false, false)
|
||||
assert.NoError(t, err)
|
||||
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
|
||||
|
||||
node2 := BuildTestNode("node1", 2000, -1)
|
||||
|
||||
_, err = CalculateUtilization(node2, nodeInfo)
|
||||
_, err = CalculateUtilization(node2, nodeInfo, false, false)
|
||||
assert.Error(t, err)
|
||||
|
||||
daemonSetPod3 := BuildTestPod("p3", 100, 200000)
|
||||
daemonSetPod3.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "")
|
||||
|
||||
nodeInfo = schedulercache.NewNodeInfo(pod, pod, pod2, daemonSetPod3)
|
||||
utilInfo, err = CalculateUtilization(node, nodeInfo, true, false)
|
||||
assert.NoError(t, err)
|
||||
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
|
||||
|
||||
nodeInfo = schedulercache.NewNodeInfo(pod, pod2, daemonSetPod3)
|
||||
utilInfo, err = CalculateUtilization(node, nodeInfo, false, false)
|
||||
assert.NoError(t, err)
|
||||
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
|
||||
|
||||
mirrorPod4 := BuildTestPod("p4", 100, 200000)
|
||||
mirrorPod4.Annotations = map[string]string{
|
||||
types.ConfigMirrorAnnotationKey: "",
|
||||
}
|
||||
|
||||
nodeInfo = schedulercache.NewNodeInfo(pod, pod, pod2, mirrorPod4)
|
||||
utilInfo, err = CalculateUtilization(node, nodeInfo, false, true)
|
||||
assert.NoError(t, err)
|
||||
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
|
||||
|
||||
nodeInfo = schedulercache.NewNodeInfo(pod, pod2, mirrorPod4)
|
||||
utilInfo, err = CalculateUtilization(node, nodeInfo, false, false)
|
||||
assert.NoError(t, err)
|
||||
assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01)
|
||||
}
|
||||
|
||||
func TestFindPlaceAllOk(t *testing.T) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue