add a flag to allow event duplication
this change brings in a new command line flag, `--record-duplicated-events`, which allows a user to enable the duplication of events bypassing the 5 minute de-duplication window.
This commit is contained in:
parent
4a97d16399
commit
8c27f76933
|
|
@ -28,6 +28,7 @@ this document:
|
||||||
* [How to?](#how-to)
|
* [How to?](#how-to)
|
||||||
* [I'm running cluster with nodes in multiple zones for HA purposes. Is that supported by Cluster Autoscaler?](#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)
|
* [I'm running cluster with nodes in multiple zones for HA purposes. Is that supported by Cluster Autoscaler?](#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)
|
||||||
* [How can I monitor Cluster Autoscaler?](#how-can-i-monitor-cluster-autoscaler)
|
* [How can I monitor Cluster Autoscaler?](#how-can-i-monitor-cluster-autoscaler)
|
||||||
|
* [How can I see all the events from Cluster Autoscaler?](#how-can-i-see-all-events-from-cluster-autoscaler)
|
||||||
* [How can I scale my cluster to just 1 node?](#how-can-i-scale-my-cluster-to-just-1-node)
|
* [How can I scale my cluster to just 1 node?](#how-can-i-scale-my-cluster-to-just-1-node)
|
||||||
* [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
|
* [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
|
||||||
* [How can I prevent Cluster Autoscaler from scaling down a particular node?](#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node)
|
* [How can I prevent Cluster Autoscaler from scaling down a particular node?](#how-can-i-prevent-cluster-autoscaler-from-scaling-down-a-particular-node)
|
||||||
|
|
@ -267,6 +268,16 @@ respectively under `/metrics` and `/health-check`.
|
||||||
Metrics are provided in Prometheus format and their detailed description is
|
Metrics are provided in Prometheus format and their detailed description is
|
||||||
available [here](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/proposals/metrics.md).
|
available [here](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/proposals/metrics.md).
|
||||||
|
|
||||||
|
### How can I see all events from Cluster Autoscaler?
|
||||||
|
|
||||||
|
By default, the Cluster Autoscaler will deduplicate similar events that occur within a 5 minute
|
||||||
|
window. This is done to improve scalability performance where many similar events might be
|
||||||
|
triggered in a short timespan, such as when there are too many unscheduled pods.
|
||||||
|
|
||||||
|
In some cases, such as for debugging or when scalability of events is not an issue, you might
|
||||||
|
want to see all the events coming from the Cluster Autoscaler. In these scenarios you should
|
||||||
|
use the `--record-duplicated-events` command line flag.
|
||||||
|
|
||||||
### How can I scale my cluster to just 1 node?
|
### How can I scale my cluster to just 1 node?
|
||||||
|
|
||||||
Prior to version 0.6, Cluster Autoscaler was not touching nodes that were running important
|
Prior to version 0.6, Cluster Autoscaler was not touching nodes that were running important
|
||||||
|
|
@ -760,6 +771,7 @@ The following startup parameters are supported for cluster autoscaler:
|
||||||
| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true
|
| `daemonset-eviction-for-occupied-nodes` | Whether DaemonSet pods will be gracefully terminated from non-empty nodes | true
|
||||||
| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | ""
|
| `feature-gates` | A set of key=value pairs that describe feature gates for alpha/experimental features. | ""
|
||||||
| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false
|
| `cordon-node-before-terminating` | Should CA cordon nodes before terminating during downscale process | false
|
||||||
|
| `record-duplicated-events` | Enable the autoscaler to print duplicated events within a 5 minute window. | false
|
||||||
|
|
||||||
# Troubleshooting:
|
# Troubleshooting:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -183,4 +183,6 @@ type AutoscalingOptions struct {
|
||||||
MaxDrainParallelism int
|
MaxDrainParallelism int
|
||||||
// GceExpanderEphemeralStorageSupport is whether scale-up takes ephemeral storage resources into account.
|
// GceExpanderEphemeralStorageSupport is whether scale-up takes ephemeral storage resources into account.
|
||||||
GceExpanderEphemeralStorageSupport bool
|
GceExpanderEphemeralStorageSupport bool
|
||||||
|
// RecordDuplicatedEvents controls whether events should be duplicated within a 5 minute window.
|
||||||
|
RecordDuplicatedEvents bool
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,7 @@ func NewAutoscalingContext(
|
||||||
func NewAutoscalingKubeClients(opts config.AutoscalingOptions, kubeClient, eventsKubeClient kube_client.Interface) *AutoscalingKubeClients {
|
func NewAutoscalingKubeClients(opts config.AutoscalingOptions, kubeClient, eventsKubeClient kube_client.Interface) *AutoscalingKubeClients {
|
||||||
listerRegistryStopChannel := make(chan struct{})
|
listerRegistryStopChannel := make(chan struct{})
|
||||||
listerRegistry := kube_util.NewListerRegistryWithDefaultListers(kubeClient, listerRegistryStopChannel)
|
listerRegistry := kube_util.NewListerRegistryWithDefaultListers(kubeClient, listerRegistryStopChannel)
|
||||||
kubeEventRecorder := kube_util.CreateEventRecorder(eventsKubeClient)
|
kubeEventRecorder := kube_util.CreateEventRecorder(eventsKubeClient, opts.RecordDuplicatedEvents)
|
||||||
logRecorder, err := utils.NewStatusMapRecorder(kubeClient, opts.ConfigNamespace, kubeEventRecorder, opts.WriteStatusConfigMap, opts.StatusConfigMapName)
|
logRecorder, err := utils.NewStatusMapRecorder(kubeClient, opts.ConfigNamespace, kubeEventRecorder, opts.WriteStatusConfigMap, opts.StatusConfigMapName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.Error("Failed to initialize status configmap, unable to write status events")
|
klog.Error("Failed to initialize status configmap, unable to write status events")
|
||||||
|
|
|
||||||
|
|
@ -198,10 +198,10 @@ var (
|
||||||
"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
|
"maxNodeGroupBackoffDuration is the maximum backoff duration for a NodeGroup after new nodes failed to start.")
|
||||||
nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
|
nodeGroupBackoffResetTimeout = flag.Duration("node-group-backoff-reset-timeout", 3*time.Hour,
|
||||||
"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
|
"nodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.")
|
||||||
|
|
||||||
maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.")
|
maxScaleDownParallelismFlag = flag.Int("max-scale-down-parallelism", 10, "Maximum number of nodes (both empty and needing drain) that can be deleted in parallel.")
|
||||||
maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.")
|
maxDrainParallelismFlag = flag.Int("max-drain-parallelism", 1, "Maximum number of nodes needing drain, that can be drained and deleted in parallel.")
|
||||||
gceExpanderEphemeralStorageSupport = flag.Bool("gce-expander-ephemeral-storage-support", false, "Whether scale-up takes ephemeral storage resources into account for GCE cloud provider")
|
gceExpanderEphemeralStorageSupport = flag.Bool("gce-expander-ephemeral-storage-support", false, "Whether scale-up takes ephemeral storage resources into account for GCE cloud provider")
|
||||||
|
recordDuplicatedEvents = flag.Bool("record-duplicated-events", false, "enable duplication of similar events within a 5 minute window.")
|
||||||
)
|
)
|
||||||
|
|
||||||
func createAutoscalingOptions() config.AutoscalingOptions {
|
func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
|
|
@ -288,6 +288,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
|
||||||
MaxScaleDownParallelism: *maxScaleDownParallelismFlag,
|
MaxScaleDownParallelism: *maxScaleDownParallelismFlag,
|
||||||
MaxDrainParallelism: *maxDrainParallelismFlag,
|
MaxDrainParallelism: *maxDrainParallelismFlag,
|
||||||
GceExpanderEphemeralStorageSupport: *gceExpanderEphemeralStorageSupport,
|
GceExpanderEphemeralStorageSupport: *gceExpanderEphemeralStorageSupport,
|
||||||
|
RecordDuplicatedEvents: *recordDuplicatedEvents,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -475,7 +476,7 @@ func main() {
|
||||||
kubeClient.CoordinationV1(),
|
kubeClient.CoordinationV1(),
|
||||||
resourcelock.ResourceLockConfig{
|
resourcelock.ResourceLockConfig{
|
||||||
Identity: id,
|
Identity: id,
|
||||||
EventRecorder: kube_util.CreateEventRecorder(kubeClient),
|
EventRecorder: kube_util.CreateEventRecorder(kubeClient, *recordDuplicatedEvents),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -200,7 +200,7 @@ func TestCleanAllToBeDeleted(t *testing.T) {
|
||||||
n2.Spec.Taints = []apiv1.Taint{{Key: ToBeDeletedTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
n2.Spec.Taints = []apiv1.Taint{{Key: ToBeDeletedTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
||||||
|
|
||||||
fakeClient := buildFakeClient(t, n1, n2)
|
fakeClient := buildFakeClient(t, n1, n2)
|
||||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
|
fakeRecorder := kube_util.CreateEventRecorder(fakeClient, false)
|
||||||
|
|
||||||
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
||||||
|
|
||||||
|
|
@ -216,7 +216,7 @@ func TestCleanAllDeletionCandidates(t *testing.T) {
|
||||||
n2.Spec.Taints = []apiv1.Taint{{Key: DeletionCandidateTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
n2.Spec.Taints = []apiv1.Taint{{Key: DeletionCandidateTaint, Value: strconv.FormatInt(time.Now().Unix()-301, 10)}}
|
||||||
|
|
||||||
fakeClient := buildFakeClient(t, n1, n2)
|
fakeClient := buildFakeClient(t, n1, n2)
|
||||||
fakeRecorder := kube_util.CreateEventRecorder(fakeClient)
|
fakeRecorder := kube_util.CreateEventRecorder(fakeClient, false)
|
||||||
|
|
||||||
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
assert.Equal(t, 1, len(getNode(t, fakeClient, "n2").Spec.Taints))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,13 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
// CreateEventRecorder creates an event recorder to send custom events to Kubernetes to be recorded for targeted Kubernetes objects
|
// CreateEventRecorder creates an event recorder to send custom events to Kubernetes to be recorded for targeted Kubernetes objects
|
||||||
func CreateEventRecorder(kubeClient clientset.Interface) kube_record.EventRecorder {
|
func CreateEventRecorder(kubeClient clientset.Interface, recordDuplicatedEvents bool) kube_record.EventRecorder {
|
||||||
eventBroadcaster := kube_record.NewBroadcasterWithCorrelatorOptions(getCorrelationOptions())
|
var eventBroadcaster kube_record.EventBroadcaster
|
||||||
|
if recordDuplicatedEvents {
|
||||||
|
eventBroadcaster = kube_record.NewBroadcaster()
|
||||||
|
} else {
|
||||||
|
eventBroadcaster = kube_record.NewBroadcasterWithCorrelatorOptions(getCorrelationOptions())
|
||||||
|
}
|
||||||
if _, isfake := kubeClient.(*fake.Clientset); !isfake {
|
if _, isfake := kubeClient.(*fake.Clientset); !isfake {
|
||||||
actualSink := &v1core.EventSinkImpl{Interface: v1core.New(kubeClient.CoreV1().RESTClient()).Events("")}
|
actualSink := &v1core.EventSinkImpl{Interface: v1core.New(kubeClient.CoreV1().RESTClient()).Events("")}
|
||||||
// EventBroadcaster has a StartLogging() method but the throttling options from getCorrelationOptions() get applied only to
|
// EventBroadcaster has a StartLogging() method but the throttling options from getCorrelationOptions() get applied only to
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue