add a flag to allow event duplication
this change brings in a new command line flag, `--record-duplicated-events`, which allows a user to enable the duplication of events bypassing the 5 minute de-duplication window.
This commit is contained in:
parent
4a97d16399
commit
8c27f76933
|
|
@ -28,6 +28,7 @@ this document:
|
|||
* [How to?](#how-to)
|
||||
* [I'm running cluster with nodes in multiple zones for HA purposes. Is that supported by Cluster Autoscaler?](#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)
|
||||
* [How can I monitor Cluster Autoscaler?](#how-can-i-monitor-cluster-autoscaler)
|
||||
* [How can I see all the events from Cluster Autoscaler?](#how-can-i-see-all-events-from-cluster-autoscaler)
|
||||
* [How can I scale my cluster to just 1 node?](#how-can-i-scale-my-cluster-to-just-1-node)
|
||||
* [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
|
||||
* [How can I prevent Cluster Autoscaler from scaling down a particular node?](#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node)
|
||||
|
|
@ -267,6 +268,16 @@ respectively under `/metrics` and `/health-check`.
|
|||
Metrics are provided in Prometheus format and their detailed description is
|
||||
available [here](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/proposals/metrics.md).
|
||||
|
||||
### How can I see all events from Cluster Autoscaler?
|
||||
|
||||
By default, the Cluster Autoscaler will deduplicate similar events that occur within a 5 minute
|
||||
window. This is done to improve scalability performance where many similar events might be
|
||||
triggered in a short timespan, such as when there are too many unscheduled pods.
|
||||
|
||||
In some cases, such as for debugging or when scalability of events is not an issue, you might
|
||||
want to see all the events coming from the Cluster Autoscaler. In these scenarios you should
|
||||
use the `--record-duplicated-events` command line flag.
|
||||
|
||||
### How can I scale my cluster to just 1 node?
|
||||
|
||||
Prior to version 0.6, Cluster Autoscaler was not touching nodes that were running important
|
||||
|
|
@ -760,6 +771,7 @@ The following startup parameters are supported for cluster autoscaler:
|
|||
| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true
|
||||
| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | ""
|
||||
| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false
|
||||
| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false
|
||||
|
||||
# Troubleshooting:
|
||||
|
||||
|
|
|
|||
|
|
@ -183,4 +183,6 @@ type AutoscalingOptions struct {
|
|||
MaxDrainParallelism int
|
||||
// GceExpanderEphemeralStorageSupport is whether scale-up takes ephemeral storage resources into account.
|
||||
GceExpanderEphemeralStorageSupport bool
|
||||
// RecordDuplicatedEvents controls whether events should be duplicated within a 5 minute window.
|
||||
RecordDuplicatedEvents bool
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ func NewAutoscalingContext(
|
|||
func NewAutoscalingKubeClients(opts config.AutoscalingOptions, kubeClient, eventsKubeClient kube_client.Interface) *AutoscalingKubeClients {
|
||||
listerRegistryStopChannel := make(chan struct{})
|
||||
listerRegistry := kube_util.NewListerRegistryWithDefaultListers(kubeClient, listerRegistryStopChannel)
|
||||
kubeEventRecorder := kube_util.CreateEventRecorder(eventsKubeClient)
|
||||
kubeEventRecorder := kube_util.CreateEventRecorder(eventsKubeClient, opts.RecordDuplicatedEvents)
|
||||
logRecorder, err := utils.NewStatusMapRecorder(kubeClient, opts.ConfigNamespace, kubeEventRecorder, opts.WriteStatusConfigMap, opts.StatusConfigMapName)
|
||||
if err != nil {
|
||||
klog.Error("Failed to initialize status configmap, unable to write status events")
|
||||
|
|
|
|||
|
|
@ -198,10 +198,10 @@ var (
|
|||
"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
|
||||
nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
|
||||
"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
|
||||
|
||||
maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.")
|
||||
maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.")
|
||||
gceExpanderEphemeralStorageSupport = flag.Bool("gce-expander-ephemeral-storage-support", false, "Whether scale-up takes ephemeral storage resources into account for GCE cloud provider")
|
||||
recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.")
|
||||
)
|
||||
|
||||
func createAutoscalingOptions() config.AutoscalingOptions {
|
||||
|
|
@ -288,6 +288,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
|||
MaxScaleDownParallelism: *maxScaleDownParallelismFlag,
|
||||
MaxDrainParallelism: *maxDrainParallelismFlag,
|
||||
GceExpanderEphemeralStorageSupport: *gceExpanderEphemeralStorageSupport,
|
||||
RecordDuplicatedEvents: *recordDuplicatedEvents,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -475,7 +476,7 @@ func main() {
|
|||
kubeClient.CoordinationV1(),
|
||||
resourcelock.ResourceLockConfig{
|
||||
Identity: id,
|
||||
EventRecorder: kube_util.CreateEventRecorder(kubeClient),
|
||||
EventRecorder: kube_util.CreateEventRecorder(kubeClient, *recordDuplicatedEvents),
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ func TestCleanAllToBeDeleted(t *testing.T) {
|
|||
n2.Spec.Taints = []apiv1.Taint{{Key: ToBeDeletedTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
||||
|
||||
fakeClient := buildFakeClient(t, n1, n2)
|
||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
|
||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient, false)
|
||||
|
||||
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
||||
|
||||
|
|
@ -216,7 +216,7 @@ func TestCleanAllDeletionCandidates(t *testing.T) {
|
|||
n2.Spec.Taints = []apiv1.Taint{{Key: DeletionCandidateTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
||||
|
||||
fakeClient := buildFakeClient(t, n1, n2)
|
||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
|
||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient, false)
|
||||
|
||||
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
||||
|
||||
|
|
|
|||
|
|
@ -39,8 +39,13 @@ const (
|
|||
)
|
||||
|
||||
// CreateEventRecorder creates an event recorder to send custom events to Kubernetes to be recorded for targeted Kubernetes objects
|
||||
func CreateEventRecorder(kubeClient clientset.Interface) kube_record.EventRecorder {
|
||||
eventBroadcaster := kube_record.NewBroadcasterWithCorrelatorOptions(getCorrelationOptions())
|
||||
func CreateEventRecorder(kubeClient clientset.Interface, recordDuplicatedEvents bool) kube_record.EventRecorder {
|
||||
var eventBroadcaster kube_record.EventBroadcaster
|
||||
if recordDuplicatedEvents {
|
||||
eventBroadcaster = kube_record.NewBroadcaster()
|
||||
} else {
|
||||
eventBroadcaster = kube_record.NewBroadcasterWithCorrelatorOptions(getCorrelationOptions())
|
||||
}
|
||||
if _, isfake := kubeClient.(*fake.Clientset); !isfake {
|
||||
actualSink := &v1core.EventSinkImpl{Interface: v1core.New(kubeClient.CoreV1().RESTClient()).Events("")}
|
||||
// EventBroadcaster has a StartLogging() method but the throttling options from getCorrelationOptions() get applied only to
|
||||
|
|
|
|||
Loading…
Reference in New Issue