Set PodsTriggeredScaleUp field for failed scale ups.

2021-10-14 15:39:24 +02:00 · 2021-10-14 15:39:24 +02:00 · a7660c1f5a
parent c7c14381f5
commit a7660c1f5a
1 changed files with 19 additions and 11 deletions
--- a/cluster-autoscaler/core/scale_up.go
+++ b/cluster-autoscaler/core/scale_up.go
@ -478,9 +478,8 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 			newNodes = context.MaxNodesTotal - len(nodes) - len(upcomingNodes)
 			context.LogRecorder.Eventf(apiv1.EventTypeWarning, "MaxNodesTotalReached", "Max total nodes in cluster reached: %v", context.MaxNodesTotal)
 			if newNodes < 1 {
-				return scaleUpError(&status.ScaleUpStatus{}, errors.NewAutoscalerError(
+				return scaleUpError(&status.ScaleUpStatus{PodsTriggeredScaleUp: bestOption.Pods},
-					errors.TransientError,
+					errors.NewAutoscalerError(errors.TransientError, "max node total count already reached"))
 					"max node total count already reached"))
 			}
 		}
@ -490,7 +489,7 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 			createNodeGroupResult, err := processors.NodeGroupManager.CreateNodeGroup(context, bestOption.NodeGroup)
 			if err != nil {
 				return scaleUpError(
-					&status.ScaleUpStatus{FailedCreationNodeGroups: []cloudprovider.NodeGroup{bestOption.NodeGroup}},
+					&status.ScaleUpStatus{FailedCreationNodeGroups: []cloudprovider.NodeGroup{bestOption.NodeGroup}, PodsTriggeredScaleUp: bestOption.Pods},
 					err)
 			}
 			createNodeGroupResults = append(createNodeGroupResults, createNodeGroupResult)
@ -522,7 +521,7 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 				option, err2 := computeExpansionOption(context, podEquivalenceGroups, nodeGroup, nodeInfo, upcomingNodes)
 				if err2 != nil {
-					return scaleUpError(&status.ScaleUpStatus{}, errors.ToAutoscalerError(errors.InternalError, err))
+					return scaleUpError(&status.ScaleUpStatus{PodsTriggeredScaleUp: bestOption.Pods}, errors.ToAutoscalerError(errors.InternalError, err))
 				}
 				if len(option.Pods) > 0 && option.NodeCount > 0 {
@ -541,7 +540,9 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 			// This should never happen, as we already should have retrieved
 			// nodeInfo for any considered nodegroup.
 			klog.Errorf("No node info for: %s", bestOption.NodeGroup.Id())
-			return scaleUpError(&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults}, errors.NewAutoscalerError(
+			return scaleUpError(
 				&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
 				errors.NewAutoscalerError(
 					errors.CloudProviderError,
 					"No node info for best expansion option!"))
 		}
@ -549,14 +550,18 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 		// apply upper limits for CPU and memory
 		newNodes, err = applyScaleUpResourcesLimits(context, processors, newNodes, scaleUpResourcesLeft, nodeInfo, bestOption.NodeGroup, resourceLimiter)
 		if err != nil {
-			return scaleUpError(&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults}, err)
+			return scaleUpError(
 				&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
 				err)
 		}
 		targetNodeGroups := []cloudprovider.NodeGroup{bestOption.NodeGroup}
 		if context.BalanceSimilarNodeGroups {
 			similarNodeGroups, typedErr := processors.NodeGroupSetProcessor.FindSimilarNodeGroups(context, bestOption.NodeGroup, nodeInfos)
 			if typedErr != nil {
-				return scaleUpError(&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults}, typedErr.AddPrefix("Failed to find matching node groups: "))
+				return scaleUpError(
 					&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
 					typedErr.AddPrefix("Failed to find matching node groups: "))
 			}
 			similarNodeGroups = filterNodeGroupsByPods(similarNodeGroups, bestOption.Pods, expansionOptions)
 			for _, ng := range similarNodeGroups {
@ -583,7 +588,9 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 		scaleUpInfos, typedErr := processors.NodeGroupSetProcessor.BalanceScaleUpBetweenGroups(
 			context, targetNodeGroups, newNodes)
 		if typedErr != nil {
-			return scaleUpError(&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults}, typedErr)
+			return scaleUpError(
 				&status.ScaleUpStatus{CreateNodeGroupResults: createNodeGroupResults, PodsTriggeredScaleUp: bestOption.Pods},
 				typedErr)
 		}
 		klog.V(1).Infof("Final scale-up plan: %v", scaleUpInfos)
 		for _, info := range scaleUpInfos {
@ -593,6 +600,7 @@ func ScaleUp(context *context.AutoscalingContext, processors *ca_processors.Auto
 					&status.ScaleUpStatus{
 						CreateNodeGroupResults: createNodeGroupResults,
 						FailedResizeNodeGroups: []cloudprovider.NodeGroup{info.Group},
 						PodsTriggeredScaleUp:   bestOption.Pods,
 					},
 					typedErr,
 				)