Allow name of cluster-autoscaler status ConfigMap to be specificed

This allows us to run two instances of cluster-autoscaler in our
cluster, targeting two different types of autoscaling groups that
require different command-line settings to be passed.
This commit is contained in:
Eric Mrak and Brett Kochendorfer 2021-02-17 21:52:54 +00:00
parent f9751f18eb
commit 43dd34074e
6 changed files with 17 additions and 15 deletions

View File

@ -33,8 +33,6 @@ import (
)
const (
// StatusConfigMapName is the name of ConfigMap with status.
StatusConfigMapName = "cluster-autoscaler-status"
// ConfigMapLastUpdatedKey is the name of annotation informing about status ConfigMap last update.
ConfigMapLastUpdatedKey = "cluster-autoscaler.kubernetes.io/last-updated"
// ConfigMapLastUpdateFormat it the timestamp format used for last update annotation in status ConfigMap
@ -65,11 +63,11 @@ func (ler *LogEventRecorder) Eventf(eventtype, reason, message string, args ...i
// NewStatusMapRecorder creates a LogEventRecorder creating events on status configmap.
// If the configmap doesn't exist it will be created (with 'Initializing' status).
// If active == false the map will not be created and no events will be recorded.
func NewStatusMapRecorder(kubeClient kube_client.Interface, namespace string, recorder record.EventRecorder, active bool) (*LogEventRecorder, error) {
func NewStatusMapRecorder(kubeClient kube_client.Interface, namespace string, recorder record.EventRecorder, active bool, statusConfigMapName string) (*LogEventRecorder, error) {
var mapObj runtime.Object
var err error
if active {
mapObj, err = WriteStatusConfigMap(kubeClient, namespace, "Initializing", nil)
mapObj, err = WriteStatusConfigMap(kubeClient, namespace, "Initializing", nil, statusConfigMapName)
if err != nil {
return nil, errors.New("Failed to init status ConfigMap")
}
@ -84,14 +82,14 @@ func NewStatusMapRecorder(kubeClient kube_client.Interface, namespace string, re
// WriteStatusConfigMap writes updates status ConfigMap with a given message or creates a new
// ConfigMap if it doesn't exist. If logRecorder is passed and configmap update is successful
// logRecorder's internal reference will be updated.
func WriteStatusConfigMap(kubeClient kube_client.Interface, namespace string, msg string, logRecorder *LogEventRecorder) (*apiv1.ConfigMap, error) {
func WriteStatusConfigMap(kubeClient kube_client.Interface, namespace string, msg string, logRecorder *LogEventRecorder, statusConfigMapName string) (*apiv1.ConfigMap, error) {
statusUpdateTime := time.Now().Format(ConfigMapLastUpdateFormat)
statusMsg := fmt.Sprintf("Cluster-autoscaler status at %s:\n%v", statusUpdateTime, msg)
var configMap *apiv1.ConfigMap
var getStatusError, writeStatusError error
var errMsg string
maps := kubeClient.CoreV1().ConfigMaps(namespace)
configMap, getStatusError = maps.Get(context.TODO(), StatusConfigMapName, metav1.GetOptions{})
configMap, getStatusError = maps.Get(context.TODO(), statusConfigMapName, metav1.GetOptions{})
if getStatusError == nil {
configMap.Data["status"] = statusMsg
if configMap.ObjectMeta.Annotations == nil {
@ -103,7 +101,7 @@ func WriteStatusConfigMap(kubeClient kube_client.Interface, namespace string, ms
configMap = &apiv1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: StatusConfigMapName,
Name: statusConfigMapName,
Annotations: map[string]string{
ConfigMapLastUpdatedKey: statusUpdateTime,
},
@ -133,9 +131,9 @@ func WriteStatusConfigMap(kubeClient kube_client.Interface, namespace string, ms
}
// DeleteStatusConfigMap deletes status configmap
func DeleteStatusConfigMap(kubeClient kube_client.Interface, namespace string) error {
func DeleteStatusConfigMap(kubeClient kube_client.Interface, namespace string, statusConfigMapName string) error {
maps := kubeClient.CoreV1().ConfigMaps(namespace)
err := maps.Delete(context.TODO(), StatusConfigMapName, metav1.DeleteOptions{})
err := maps.Delete(context.TODO(), statusConfigMapName, metav1.DeleteOptions{})
if err != nil {
klog.Error("Failed to delete status configmap")
}

View File

@ -117,6 +117,8 @@ type AutoscalingOptions struct {
NodeDeletionDelayTimeout time.Duration
// WriteStatusConfigMap tells if the status information should be written to a ConfigMap
WriteStatusConfigMap bool
// StaticConfigMapName
StatusConfigMapName string
// BalanceSimilarNodeGroups enables logic that identifies node groups with similar machines and tries to balance node count between them.
BalanceSimilarNodeGroups bool
// ConfigNamespace is the namespace cluster-autoscaler is running in and all related configmaps live in

View File

@ -111,12 +111,12 @@ func NewAutoscalingKubeClients(opts config.AutoscalingOptions, kubeClient, event
listerRegistryStopChannel := make(chan struct{})
listerRegistry := kube_util.NewListerRegistryWithDefaultListers(kubeClient, listerRegistryStopChannel)
kubeEventRecorder := kube_util.CreateEventRecorder(eventsKubeClient)
logRecorder, err := utils.NewStatusMapRecorder(kubeClient, opts.ConfigNamespace, kubeEventRecorder, opts.WriteStatusConfigMap)
logRecorder, err := utils.NewStatusMapRecorder(kubeClient, opts.ConfigNamespace, kubeEventRecorder, opts.WriteStatusConfigMap, opts.StatusConfigMapName)
if err != nil {
klog.Error("Failed to initialize status configmap, unable to write status events")
// Get a dummy, so we can at least safely call the methods
// TODO(maciekpytel): recover from this after successful status configmap update?
logRecorder, _ = utils.NewStatusMapRecorder(eventsKubeClient, opts.ConfigNamespace, kubeEventRecorder, false)
logRecorder, _ = utils.NewStatusMapRecorder(eventsKubeClient, opts.ConfigNamespace, kubeEventRecorder, false, opts.StatusConfigMapName)
}
return &AutoscalingKubeClients{

View File

@ -153,7 +153,7 @@ func NewScaleTestAutoscalingContext(
// Not enough buffer space causes the test to hang without printing any logs.
// This is not useful.
fakeRecorder := kube_record.NewFakeRecorder(100)
fakeLogRecorder, err := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false)
fakeLogRecorder, err := utils.NewStatusMapRecorder(fakeClient, "kube-system", fakeRecorder, false, "my-cool-configmap")
if err != nil {
return context.AutoscalingContext{}, err
}

View File

@ -294,7 +294,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
if autoscalingContext.WriteStatusConfigMap {
status := a.clusterStateRegistry.GetStatus(currentTime)
utils.WriteStatusConfigMap(autoscalingContext.ClientSet, autoscalingContext.ConfigNamespace,
status.GetReadableString(), a.AutoscalingContext.LogRecorder)
status.GetReadableString(), a.AutoscalingContext.LogRecorder, a.AutoscalingContext.StatusConfigMapName)
}
// This deferred processor execution allows the processors to handle a situation when a scale-(up|down)
@ -696,7 +696,7 @@ func (a *StaticAutoscaler) ExitCleanUp() {
if !a.AutoscalingContext.WriteStatusConfigMap {
return
}
utils.DeleteStatusConfigMap(a.AutoscalingContext.ClientSet, a.AutoscalingContext.ConfigNamespace)
utils.DeleteStatusConfigMap(a.AutoscalingContext.ClientSet, a.AutoscalingContext.ConfigNamespace, a.AutoscalingContext.StatusConfigMapName)
a.clusterStateRegistry.Stop()
}
@ -761,7 +761,7 @@ func (a *StaticAutoscaler) onEmptyCluster(status string, emitEvent bool) {
metrics.UpdateClusterSafeToAutoscale(false)
metrics.UpdateNodesCount(0, 0, 0, 0, 0)
if a.AutoscalingContext.WriteStatusConfigMap {
utils.WriteStatusConfigMap(a.AutoscalingContext.ClientSet, a.AutoscalingContext.ConfigNamespace, status, a.AutoscalingContext.LogRecorder)
utils.WriteStatusConfigMap(a.AutoscalingContext.ClientSet, a.AutoscalingContext.ConfigNamespace, status, a.AutoscalingContext.LogRecorder, a.AutoscalingContext.StatusConfigMapName)
}
if emitEvent {
a.AutoscalingContext.LogRecorder.Eventf(apiv1.EventTypeWarning, "ClusterUnhealthy", status)

View File

@ -158,6 +158,7 @@ var (
"Should CA ignore Mirror pods when calculating resource utilization for scaling down")
writeStatusConfigMapFlag = flag.Bool("write-status-configmap", true, "Should CA write status information to a configmap")
statusConfigMapName = flag.String("status-config-map-name", "cluster-autoscaler-status", "Status configmap name")
maxInactivityTimeFlag = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart")
maxFailingTimeFlag = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart")
balanceSimilarNodeGroupsFlag = flag.Bool("balance-similar-node-groups", false, "Detect similar node groups and balance the number of nodes between them")
@ -233,6 +234,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag,
StatusConfigMapName: *statusConfigMapName,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace,
ClusterName: *clusterName,