diff --git a/cluster-autoscaler/core/dynamic_autoscaler.go b/cluster-autoscaler/core/dynamic_autoscaler.go index 5dd4445855..3537a650c5 100644 --- a/cluster-autoscaler/core/dynamic_autoscaler.go +++ b/cluster-autoscaler/core/dynamic_autoscaler.go @@ -65,11 +65,11 @@ func (a *DynamicAutoscaler) ExitCleanUp() { // RunOnce represents a single iteration of a dynamic autoscaler inside the CA's control-loop func (a *DynamicAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError { reconfigureStart := time.Now() - metrics.UpdateLastTime("reconfigure", reconfigureStart) + metrics.UpdateLastTime(metrics.Reconfigure, reconfigureStart) if err := a.Reconfigure(); err != nil { glog.Errorf("Failed to reconfigure : %v", err) } - metrics.UpdateDuration("reconfigure", reconfigureStart) + metrics.UpdateDurationFromStart(metrics.Reconfigure, reconfigureStart) return a.autoscaler.RunOnce(currentTime) } diff --git a/cluster-autoscaler/core/polling_autoscaler.go b/cluster-autoscaler/core/polling_autoscaler.go index 81b8f72dbb..b13ae00572 100644 --- a/cluster-autoscaler/core/polling_autoscaler.go +++ b/cluster-autoscaler/core/polling_autoscaler.go @@ -64,11 +64,11 @@ func (a *PollingAutoscaler) ExitCleanUp() { // RunOnce represents a single iteration of a polling autoscaler inside the CA's control-loop func (a *PollingAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError { reconfigureStart := time.Now() - metrics.UpdateLastTime("poll", reconfigureStart) + metrics.UpdateLastTime(metrics.Poll, reconfigureStart) if err := a.Poll(); err != nil { glog.Errorf("Failed to poll : %v", err) } - metrics.UpdateDuration("poll", reconfigureStart) + metrics.UpdateDurationFromStart(metrics.Poll, reconfigureStart) return a.autoscaler.RunOnce(currentTime) } diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go index e98ada71b9..766d423579 100644 --- a/cluster-autoscaler/core/scale_down.go +++ b/cluster-autoscaler/core/scale_down.go @@ -203,8 +203,10 @@ func (sd *ScaleDown) UpdateUnneededNodes( // TryToScaleDown tries to scale down the cluster. It returns ScaleDownResult indicating if any node was // removed and error if such occurred. func (sd *ScaleDown) TryToScaleDown(nodes []*apiv1.Node, pods []*apiv1.Pod, pdbs []*policyv1.PodDisruptionBudget) (ScaleDownResult, errors.AutoscalerError) { - now := time.Now() + nodeDeletionDuration := time.Duration(0) + findNodesToRemoveDuration := time.Duration(0) + defer updateScaleDownMetrics(now, &findNodesToRemoveDuration, &nodeDeletionDuration) candidates := make([]*apiv1.Node, 0) readinessMap := make(map[string]bool) @@ -266,55 +268,23 @@ func (sd *ScaleDown) TryToScaleDown(nodes []*apiv1.Node, pods []*apiv1.Pod, pdbs // to recreate on other nodes. emptyNodes := getEmptyNodes(candidates, pods, sd.context.MaxEmptyBulkDelete, sd.context.CloudProvider) if len(emptyNodes) > 0 { + nodeDeletionStart := time.Now() confirmation := make(chan errors.AutoscalerError, len(emptyNodes)) - for _, node := range emptyNodes { - glog.V(0).Infof("Scale-down: removing empty node %s", node.Name) - sd.context.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDownEmpty", "Scale-down: removing empty node %s", node.Name) - simulator.RemoveNodeFromTracker(sd.usageTracker, node.Name, sd.unneededNodes) - go func(nodeToDelete *apiv1.Node) { - - deleteErr := deleteNodeFromCloudProvider(nodeToDelete, sd.context.CloudProvider, - sd.context.Recorder, sd.context.ClusterStateRegistry) - if deleteErr == nil { - if readinessMap[nodeToDelete.Name] { - metrics.RegisterScaleDown(1, metrics.Empty) - } else { - metrics.RegisterScaleDown(1, metrics.Unready) - } - } - confirmation <- deleteErr - }(node) - } - var finalError errors.AutoscalerError - - startTime := time.Now() - for range emptyNodes { - timeElapsed := time.Now().Sub(startTime) - timeLeft := MaxCloudProviderNodeDeletionTime - timeElapsed - if timeLeft < 0 { - finalError = errors.NewAutoscalerError(errors.TransientError, "Failed to delete nodes in time") - break - } - select { - case err := <-confirmation: - if err != nil { - glog.Errorf("Problem with empty node deletion: %v", err) - finalError = err - } - case <-time.After(timeLeft): - finalError = errors.NewAutoscalerError(errors.TransientError, "Failed to delete nodes in time") - } - } - if finalError == nil { + sd.scheduleDeleteEmptyNodes(emptyNodes, readinessMap, confirmation) + err := sd.waitForEmptyNodesDeleted(emptyNodes, confirmation) + nodeDeletionDuration = time.Now().Sub(nodeDeletionStart) + if err == nil { return ScaleDownNodeDeleted, nil } - return ScaleDownError, finalError.AddPrefix("failed to delete at least one empty node: ") + return ScaleDownError, err.AddPrefix("failed to delete at least one empty node: ") } + findNodesToRemoveStart := time.Now() // We look for only 1 node so new hints may be incomplete. nodesToRemove, _, err := simulator.FindNodesToRemove(candidates, nodes, pods, sd.context.ClientSet, sd.context.PredicateChecker, 1, false, sd.podLocationHints, sd.usageTracker, time.Now(), pdbs) + findNodesToRemoveDuration = time.Now().Sub(findNodesToRemoveStart) if err != nil { return ScaleDownError, err.AddPrefix("Find node to remove failed: ") @@ -336,7 +306,9 @@ func (sd *ScaleDown) TryToScaleDown(nodes []*apiv1.Node, pods []*apiv1.Pod, pdbs // Nothing super-bad should happen if the node is removed from tracker prematurely. simulator.RemoveNodeFromTracker(sd.usageTracker, toRemove.Node.Name, sd.unneededNodes) + nodeDeletionStart := time.Now() err = deleteNode(sd.context, toRemove.Node, toRemove.PodsToReschedule) + nodeDeletionDuration = time.Now().Sub(nodeDeletionStart) if err != nil { return ScaleDownError, err.AddPrefix("Failed to delete %s: ", toRemove.Node.Name) } @@ -349,6 +321,16 @@ func (sd *ScaleDown) TryToScaleDown(nodes []*apiv1.Node, pods []*apiv1.Pod, pdbs return ScaleDownNodeDeleted, nil } +// updateScaleDownMetrics registers duration of different parts of scale down. +// Separates time spent on finding nodes to remove, deleting nodes and other operations. +func updateScaleDownMetrics(scaleDownStart time.Time, findNodesToRemoveDuration *time.Duration, nodeDeletionDuration *time.Duration) { + stop := time.Now() + miscDuration := stop.Sub(scaleDownStart) - *nodeDeletionDuration - *findNodesToRemoveDuration + metrics.UpdateDuration(metrics.ScaleDownNodeDeletion, *nodeDeletionDuration) + metrics.UpdateDuration(metrics.ScaleDownFindNodesToRemove, *findNodesToRemoveDuration) + metrics.UpdateDuration(metrics.ScaleDownMiscOperations, miscDuration) +} + // This functions finds empty nodes among passed candidates and returns a list of empty nodes // that can be deleted at the same time. func getEmptyNodes(candidates []*apiv1.Node, pods []*apiv1.Pod, maxEmptyBulkDelete int, cloudProvider cloudprovider.CloudProvider) []*apiv1.Node { @@ -391,6 +373,51 @@ func getEmptyNodes(candidates []*apiv1.Node, pods []*apiv1.Pod, maxEmptyBulkDele return result[:limit] } +func (sd *ScaleDown) scheduleDeleteEmptyNodes(emptyNodes []*apiv1.Node, + readinessMap map[string]bool, confirmation chan errors.AutoscalerError) { + for _, node := range emptyNodes { + glog.V(0).Infof("Scale-down: removing empty node %s", node.Name) + sd.context.LogRecorder.Eventf(apiv1.EventTypeNormal, "ScaleDownEmpty", "Scale-down: removing empty node %s", node.Name) + simulator.RemoveNodeFromTracker(sd.usageTracker, node.Name, sd.unneededNodes) + go func(nodeToDelete *apiv1.Node) { + + deleteErr := deleteNodeFromCloudProvider(nodeToDelete, sd.context.CloudProvider, + sd.context.Recorder, sd.context.ClusterStateRegistry) + if deleteErr == nil { + if readinessMap[nodeToDelete.Name] { + metrics.RegisterScaleDown(1, metrics.Empty) + } else { + metrics.RegisterScaleDown(1, metrics.Unready) + } + } + confirmation <- deleteErr + }(node) + } +} + +func (sd *ScaleDown) waitForEmptyNodesDeleted(emptyNodes []*apiv1.Node, confirmation chan errors.AutoscalerError) errors.AutoscalerError { + var finalError errors.AutoscalerError + + startTime := time.Now() + for range emptyNodes { + timeElapsed := time.Now().Sub(startTime) + timeLeft := MaxCloudProviderNodeDeletionTime - timeElapsed + if timeLeft < 0 { + return errors.NewAutoscalerError(errors.TransientError, "Failed to delete nodes in time") + } + select { + case err := <-confirmation: + if err != nil { + glog.Errorf("Problem with empty node deletion: %v", err) + finalError = err + } + case <-time.After(timeLeft): + finalError = errors.NewAutoscalerError(errors.TransientError, "Failed to delete nodes in time") + } + } + return finalError +} + func deleteNode(context *AutoscalingContext, node *apiv1.Node, pods []*apiv1.Pod) errors.AutoscalerError { if err := drainNode(node, pods, context.ClientSet, context.Recorder, context.MaxGracefulTerminationSec, MaxPodEvictionTime, EvictionRetryTime); err != nil { diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 01ec594e6d..4b44138731 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -135,8 +135,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError return nil } - metrics.UpdateDuration("updateClusterState", runStart) - metrics.UpdateLastTime("autoscaling", time.Now()) + metrics.UpdateDurationFromStart(metrics.UpdateState, runStart) + metrics.UpdateLastTime(metrics.Autoscaling, time.Now()) // Check if there are any nodes that failed to register in Kubernetes // master. @@ -227,11 +227,11 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError } scaleUpStart := time.Now() - metrics.UpdateLastTime("scaleUp", scaleUpStart) + metrics.UpdateLastTime(metrics.ScaleUp, scaleUpStart) scaledUp, typedErr := ScaleUp(autoscalingContext, unschedulablePodsToHelp, readyNodes, daemonsets) - metrics.UpdateDuration("scaleUp", scaleUpStart) + metrics.UpdateDurationFromStart(metrics.ScaleUp, scaleUpStart) if typedErr != nil { glog.Errorf("Failed to scale up: %v", typedErr) @@ -271,7 +271,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError return typedErr } - metrics.UpdateDuration("findUnneeded", unneededStart) + metrics.UpdateDurationFromStart(metrics.FindUnneeded, unneededStart) for key, val := range scaleDown.unneededNodes { if glog.V(4) { @@ -283,9 +283,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError glog.V(4).Infof("Starting scale down") scaleDownStart := time.Now() - metrics.UpdateLastTime("scaleDown", scaleDownStart) + metrics.UpdateLastTime(metrics.ScaleDown, scaleDownStart) result, typedErr := scaleDown.TryToScaleDown(allNodes, allScheduled, pdbs) - metrics.UpdateDuration("scaleDown", scaleDownStart) + metrics.UpdateDurationFromStart(metrics.ScaleDown, scaleDownStart) // TODO: revisit result handling if typedErr != nil { diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 574e4bfb6d..27b9145627 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -192,7 +192,7 @@ func run(healthCheck *metrics.HealthCheck) { case <-time.After(*scanInterval): { loopStart := time.Now() - metrics.UpdateLastTime("main", loopStart) + metrics.UpdateLastTime(metrics.Main, loopStart) healthCheck.UpdateLastActivity(loopStart) err := autoscaler.RunOnce(loopStart) @@ -202,7 +202,7 @@ func run(healthCheck *metrics.HealthCheck) { healthCheck.UpdateLastSuccessfulRun(time.Now()) } - metrics.UpdateDuration("main", loopStart) + metrics.UpdateDurationFromStart(metrics.Main, loopStart) } } } diff --git a/cluster-autoscaler/metrics/metrics.go b/cluster-autoscaler/metrics/metrics.go index 57ab2df5bc..dc8f61d707 100644 --- a/cluster-autoscaler/metrics/metrics.go +++ b/cluster-autoscaler/metrics/metrics.go @@ -30,6 +30,10 @@ import ( // NodeScaleDownReason describes reason for removing node type NodeScaleDownReason string +// FunctionLabel is a name of Cluster Autoscaler operation for which +// we measure duration +type FunctionLabel string + const ( caNamespace = "cluster_autoscaler" readyLabel = "ready" @@ -50,6 +54,21 @@ const ( LogLongDurationThreshold = 5 * time.Second ) +// Names of Cluster Autoscaler operations +const ( + ScaleDown FunctionLabel = "scaleDown" + ScaleDownNodeDeletion FunctionLabel = "scaleDown:nodeDeletion" + ScaleDownFindNodesToRemove FunctionLabel = "scaleDown:findNodesToRemove" + ScaleDownMiscOperations FunctionLabel = "scaleDown:miscOperations" + ScaleUp FunctionLabel = "scaleUp" + FindUnneeded FunctionLabel = "findUnneeded" + UpdateState FunctionLabel = "updateClusterState" + Main FunctionLabel = "main" + Poll FunctionLabel = "poll" + Reconfigure FunctionLabel = "reconfigure" + Autoscaling FunctionLabel = "autoscaling" +) + var ( /**** Metrics related to cluster state ****/ clusterSafeToAutoscale = prometheus.NewGauge( @@ -149,20 +168,26 @@ func init() { prometheus.MustRegister(unneededNodesCount) } -// UpdateDuration records the duration of the step identified by the label -func UpdateDuration(label string, start time.Time) { +// UpdateDurationFromStart records the duration of the step identified by the +// label using start time +func UpdateDurationFromStart(label FunctionLabel, start time.Time) { duration := time.Now().Sub(start) + UpdateDuration(label, duration) +} + +// UpdateDuration records the duration of the step identified by the label +func UpdateDuration(label FunctionLabel, duration time.Duration) { // TODO(maciekpytel): remove second condition if we manage to get // asynchronous node drain - if duration > LogLongDurationThreshold && label != "scaleDown" { + if duration > LogLongDurationThreshold && label != ScaleDown { glog.Infof("Function %s took %v to complete", label, duration) } - functionDuration.WithLabelValues(label).Observe(duration.Seconds()) + functionDuration.WithLabelValues(string(label)).Observe(duration.Seconds()) } // UpdateLastTime records the time the step identified by the label was started -func UpdateLastTime(label string, now time.Time) { - lastActivity.WithLabelValues(label).Set(float64(now.Unix())) +func UpdateLastTime(label FunctionLabel, now time.Time) { + lastActivity.WithLabelValues(string(label)).Set(float64(now.Unix())) } // UpdateClusterState updates metrics related to cluster state