fix incomplete startup of informers

Previously, SharedInformerFactory.Start was called before core.NewAutoscaler. That had the effect that any new informer created as part of core.NewAutoscaler, in particular in kubernetes.NewListerRegistryWithDefaultListers, never got started. One of them was the DaemonSet informer. This had the effect that the DaemonSet lister had an empty cache and scale down failed with: I0920 11:06:36.046889 31805 cluster.go:164] node gke-cluster-pohly-default-pool-c9f60a43-5rvz cannot be removed: daemonset for kube-system/pdcsi-node-7hnmc is not present, err: daemonset.apps "pdcsi-node" not found This was on a GKE cluster with cluster-autoscaler running outside of the cluster on a development machine.
2023-09-20 11:20:35 +02:00 · 2023-09-20 11:20:35 +02:00 · ade5e0814e
parent f9a7c7f73f
commit ade5e0814e
1 changed files with 11 additions and 4 deletions
--- a/cluster-autoscaler/main.go
+++ b/cluster-autoscaler/main.go
@ -498,16 +498,23 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
 		Comparator: nodeInfoComparator,
 	}

-	stop := make(chan struct{})
-	informerFactory.Start(stop)
-
 	// These metrics should be published only once.
 	metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled)
 	metrics.UpdateCPULimitsCores(autoscalingOptions.MinCoresTotal, autoscalingOptions.MaxCoresTotal)
 	metrics.UpdateMemoryLimitsBytes(autoscalingOptions.MinMemoryTotal, autoscalingOptions.MaxMemoryTotal)

 	// Create autoscaler.
-	return core.NewAutoscaler(opts)
+	autoscaler, err := core.NewAutoscaler(opts)
+	if err != nil {
+		return nil, err
+	}
+
+	// Start informers. This must come after fully constructing the autoscaler because
+	// additional informers might have been registered in the factory during NewAutoscaler.
+	stop := make(chan struct{})
+	informerFactory.Start(stop)
+
+	return autoscaler, nil
 }

 func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) {