fix incomplete startup of informers

Previously, SharedInformerFactory.Start was called before core.NewAutoscaler.
That had the effect that any new informer created as part of
core.NewAutoscaler, in particular in
kubernetes.NewListerRegistryWithDefaultListers, never got started.

One of them was the DaemonSet informer. This had the effect that the DaemonSet
lister had an empty cache and scale down failed with:

    I0920 11:06:36.046889   31805 cluster.go:164] node gke-cluster-pohly-default-pool-c9f60a43-5rvz cannot be removed: daemonset for kube-system/pdcsi-node-7hnmc is not present, err: daemonset.apps "pdcsi-node" not found

This was on a GKE cluster with cluster-autoscaler running outside of the
cluster on a development machine.
This commit is contained in:
Patrick Ohly 2023-09-20 11:20:35 +02:00
parent f9a7c7f73f
commit ade5e0814e
1 changed files with 11 additions and 4 deletions

View File

@ -498,16 +498,23 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
Comparator: nodeInfoComparator,
}
stop := make(chan struct{})
informerFactory.Start(stop)
// These metrics should be published only once.
metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled)
metrics.UpdateCPULimitsCores(autoscalingOptions.MinCoresTotal, autoscalingOptions.MaxCoresTotal)
metrics.UpdateMemoryLimitsBytes(autoscalingOptions.MinMemoryTotal, autoscalingOptions.MaxMemoryTotal)
// Create autoscaler.
return core.NewAutoscaler(opts)
autoscaler, err := core.NewAutoscaler(opts)
if err != nil {
return nil, err
}
// Start informers. This must come after fully constructing the autoscaler because
// additional informers might have been registered in the factory during NewAutoscaler.
stop := make(chan struct{})
informerFactory.Start(stop)
return autoscaler, nil
}
func run(healthCheck *metrics.HealthCheck, debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter) {