Delay scale-up including GPU request
Nodes with GPU are expensive and it's likely a bunch of pods using them will be created in a batch. In this case we can wait a bit for all pods to be created to make more efficient scale-up decision.
This commit is contained in:
parent
d876d74912
commit
abbc45da2e
|
|
@ -37,6 +37,10 @@ import (
|
|||
const (
|
||||
// How old the oldest unschedulable pod should be before starting scale up.
|
||||
unschedulablePodTimeBuffer = 2 * time.Second
|
||||
// How old the oldest unschedulable pod with GPU should be before starting scale up.
|
||||
// The idea is that nodes with GPU are very expensive and we're ready to sacrifice
|
||||
// a bit more latency to wait for more pods and make a more informed scale-up decision.
|
||||
unschedulablePodWithGpuTimeBuffer = 30 * time.Second
|
||||
// How long should Cluster Autoscaler wait for nodes to become ready after start.
|
||||
nodesNotReadyAfterStartTimeout = 10 * time.Minute
|
||||
)
|
||||
|
|
@ -274,7 +278,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
|
|||
glog.V(1).Info("No unschedulable pods")
|
||||
} else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal {
|
||||
glog.V(1).Info("Max total nodes in cluster reached")
|
||||
} else if getOldestCreateTime(unschedulablePodsToHelp).Add(unschedulablePodTimeBuffer).After(currentTime) {
|
||||
} else if allPodsAreNew(unschedulablePodsToHelp, currentTime) {
|
||||
// The assumption here is that these pods have been created very recently and probably there
|
||||
// is more pods to come. In theory we could check the newest pod time but then if pod were created
|
||||
// slowly but at the pace of 1 every 2 seconds then no scale up would be triggered for long time.
|
||||
|
|
@ -384,3 +388,11 @@ func (a *StaticAutoscaler) ExitCleanUp() {
|
|||
}
|
||||
utils.DeleteStatusConfigMap(a.AutoscalingContext.ClientSet, a.AutoscalingContext.ConfigNamespace)
|
||||
}
|
||||
|
||||
func allPodsAreNew(pods []*apiv1.Pod, currentTime time.Time) bool {
|
||||
if getOldestCreateTime(pods).Add(unschedulablePodTimeBuffer).After(currentTime) {
|
||||
return true
|
||||
}
|
||||
found, oldest := getOldestCreateTimeWithGpu(pods)
|
||||
return found && oldest.Add(unschedulablePodWithGpuTimeBuffer).After(currentTime)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import (
|
|||
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
|
||||
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
|
||||
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
|
||||
scheduler_util "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
|
||||
|
||||
|
|
@ -517,6 +518,20 @@ func getOldestCreateTime(pods []*apiv1.Pod) time.Time {
|
|||
return oldest
|
||||
}
|
||||
|
||||
func getOldestCreateTimeWithGpu(pods []*apiv1.Pod) (bool, time.Time) {
|
||||
oldest := time.Now()
|
||||
gpuFound := false
|
||||
for _, pod := range pods {
|
||||
if gpu.PodRequestsGpu(pod) {
|
||||
gpuFound = true
|
||||
if oldest.After(pod.CreationTimestamp.Time) {
|
||||
oldest = pod.CreationTimestamp.Time
|
||||
}
|
||||
}
|
||||
}
|
||||
return gpuFound, oldest
|
||||
}
|
||||
|
||||
// UpdateEmptyClusterStateMetrics updates metrics related to empty cluster's state.
|
||||
// TODO(aleksandra-malinowska): use long unregistered value from ClusterStateRegistry.
|
||||
func UpdateEmptyClusterStateMetrics() {
|
||||
|
|
|
|||
|
|
@ -101,6 +101,19 @@ func NodeHasGpu(node *apiv1.Node) bool {
|
|||
return hasGpuLabel || (hasGpuAllocatable && !gpuAllocatable.IsZero())
|
||||
}
|
||||
|
||||
// PodRequestsGpu returns true if a given pod has GPU request.
|
||||
func PodRequestsGpu(pod *apiv1.Pod) bool {
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if container.Resources.Requests != nil {
|
||||
_, gpuFound := container.Resources.Requests[ResourceNvidiaGPU]
|
||||
if gpuFound {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// GpuRequestInfo contains an information about a set of pods requesting a GPU.
|
||||
type GpuRequestInfo struct {
|
||||
// MaxRequest is maximum GPU request among pods
|
||||
|
|
|
|||
|
|
@ -194,6 +194,15 @@ func TestNodeHasGpu(t *testing.T) {
|
|||
assert.False(t, NodeHasGpu(nodeNoGpu))
|
||||
}
|
||||
|
||||
func TestPodRequestsGpu(t *testing.T) {
|
||||
podNoGpu := test.BuildTestPod("podNoGpu", 0, 1000)
|
||||
podWithGpu := test.BuildTestPod("pod1AnyGpu", 0, 1000)
|
||||
podWithGpu.Spec.Containers[0].Resources.Requests[ResourceNvidiaGPU] = *resource.NewQuantity(1, resource.DecimalSI)
|
||||
|
||||
assert.False(t, PodRequestsGpu(podNoGpu))
|
||||
assert.True(t, PodRequestsGpu(podWithGpu))
|
||||
}
|
||||
|
||||
func TestGetGpuRequests(t *testing.T) {
|
||||
podNoGpu := test.BuildTestPod("podNoGpu", 0, 1000)
|
||||
podNoGpu.Spec.NodeSelector = map[string]string{}
|
||||
|
|
|
|||
Loading…
Reference in New Issue