Merge pull request #3972 from zhy76/metric
feat: add performance metrics for FederatedHPA
This commit is contained in:
commit
758a6ceaf0
|
@ -35,6 +35,7 @@ import (
|
||||||
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
|
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
|
||||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||||
"github.com/karmada-io/karmada/pkg/controllers/federatedhpa/monitor"
|
"github.com/karmada-io/karmada/pkg/controllers/federatedhpa/monitor"
|
||||||
|
"github.com/karmada-io/karmada/pkg/metrics"
|
||||||
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
|
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
|
||||||
"github.com/karmada-io/karmada/pkg/util"
|
"github.com/karmada-io/karmada/pkg/util"
|
||||||
"github.com/karmada-io/karmada/pkg/util/fedinformer/typedmanager"
|
"github.com/karmada-io/karmada/pkg/util/fedinformer/typedmanager"
|
||||||
|
@ -155,7 +156,12 @@ func (c *FederatedHPAController) Reconcile(ctx context.Context, req controllerru
|
||||||
}
|
}
|
||||||
c.hpaSelectorsMux.Unlock()
|
c.hpaSelectorsMux.Unlock()
|
||||||
|
|
||||||
err := c.reconcileAutoscaler(ctx, hpa)
|
// observe process FederatedHPA latency
|
||||||
|
var err error
|
||||||
|
startTime := time.Now()
|
||||||
|
defer metrics.ObserveProcessFederatedHPALatency(err, startTime)
|
||||||
|
|
||||||
|
err = c.reconcileAutoscaler(ctx, hpa)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return controllerruntime.Result{}, err
|
return controllerruntime.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@ import (
|
||||||
resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
|
resourceclient "k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1"
|
||||||
customclient "k8s.io/metrics/pkg/client/custom_metrics"
|
customclient "k8s.io/metrics/pkg/client/custom_metrics"
|
||||||
externalclient "k8s.io/metrics/pkg/client/external_metrics"
|
externalclient "k8s.io/metrics/pkg/client/external_metrics"
|
||||||
|
|
||||||
|
"github.com/karmada-io/karmada/pkg/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -64,6 +66,11 @@ type resourceMetricsClient struct {
|
||||||
// GetResourceMetric gets the given resource metric (and an associated oldest timestamp)
|
// GetResourceMetric gets the given resource metric (and an associated oldest timestamp)
|
||||||
// for all pods matching the specified selector in the given namespace
|
// for all pods matching the specified selector in the given namespace
|
||||||
func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource corev1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error) {
|
func (c *resourceMetricsClient) GetResourceMetric(ctx context.Context, resource corev1.ResourceName, namespace string, selector labels.Selector, container string) (PodMetricsInfo, time.Time, error) {
|
||||||
|
// observe pull ResourceMetric latency
|
||||||
|
var err error
|
||||||
|
startTime := time.Now()
|
||||||
|
defer metrics.ObserveFederatedHPAPullMetricsLatency(err, "ResourceMetric", startTime)
|
||||||
|
|
||||||
metrics, err := c.client.PodMetricses(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
|
metrics, err := c.client.PodMetricses(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, time.Time{}, fmt.Errorf("unable to fetch metrics from resource metrics API: %v", err)
|
return nil, time.Time{}, fmt.Errorf("unable to fetch metrics from resource metrics API: %v", err)
|
||||||
|
@ -143,6 +150,11 @@ type customMetricsClient struct {
|
||||||
// GetRawMetric gets the given metric (and an associated oldest timestamp)
|
// GetRawMetric gets the given metric (and an associated oldest timestamp)
|
||||||
// for all pods matching the specified selector in the given namespace
|
// for all pods matching the specified selector in the given namespace
|
||||||
func (c *customMetricsClient) GetRawMetric(metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (PodMetricsInfo, time.Time, error) {
|
func (c *customMetricsClient) GetRawMetric(metricName string, namespace string, selector labels.Selector, metricSelector labels.Selector) (PodMetricsInfo, time.Time, error) {
|
||||||
|
// observe pull RawMetric latency
|
||||||
|
var err error
|
||||||
|
startTime := time.Now()
|
||||||
|
defer metrics.ObserveFederatedHPAPullMetricsLatency(err, "RawMetric", startTime)
|
||||||
|
|
||||||
metrics, err := c.client.NamespacedMetrics(namespace).GetForObjects(schema.GroupKind{Kind: "Pod"}, selector, metricName, metricSelector)
|
metrics, err := c.client.NamespacedMetrics(namespace).GetForObjects(schema.GroupKind{Kind: "Pod"}, selector, metricName, metricSelector)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, time.Time{}, fmt.Errorf("unable to fetch metrics from custom metrics API: %v", err)
|
return nil, time.Time{}, fmt.Errorf("unable to fetch metrics from custom metrics API: %v", err)
|
||||||
|
@ -175,9 +187,13 @@ func (c *customMetricsClient) GetRawMetric(metricName string, namespace string,
|
||||||
// GetObjectMetric gets the given metric (and an associated timestamp) for the given
|
// GetObjectMetric gets the given metric (and an associated timestamp) for the given
|
||||||
// object in the given namespace
|
// object in the given namespace
|
||||||
func (c *customMetricsClient) GetObjectMetric(metricName string, namespace string, objectRef *autoscalingv2.CrossVersionObjectReference, metricSelector labels.Selector) (int64, time.Time, error) {
|
func (c *customMetricsClient) GetObjectMetric(metricName string, namespace string, objectRef *autoscalingv2.CrossVersionObjectReference, metricSelector labels.Selector) (int64, time.Time, error) {
|
||||||
|
// observe pull ObjectMetric latency
|
||||||
|
var err error
|
||||||
|
startTime := time.Now()
|
||||||
|
defer metrics.ObserveFederatedHPAPullMetricsLatency(err, "ObjectMetric", startTime)
|
||||||
|
|
||||||
gvk := schema.FromAPIVersionAndKind(objectRef.APIVersion, objectRef.Kind)
|
gvk := schema.FromAPIVersionAndKind(objectRef.APIVersion, objectRef.Kind)
|
||||||
var metricValue *customapi.MetricValue
|
var metricValue *customapi.MetricValue
|
||||||
var err error
|
|
||||||
if gvk.Kind == "Namespace" && gvk.Group == "" {
|
if gvk.Kind == "Namespace" && gvk.Group == "" {
|
||||||
// handle namespace separately
|
// handle namespace separately
|
||||||
// NB: we ignore namespace name here, since CrossVersionObjectReference isn't
|
// NB: we ignore namespace name here, since CrossVersionObjectReference isn't
|
||||||
|
@ -203,6 +219,11 @@ type externalMetricsClient struct {
|
||||||
// GetExternalMetric gets all the values of a given external metric
|
// GetExternalMetric gets all the values of a given external metric
|
||||||
// that match the specified selector.
|
// that match the specified selector.
|
||||||
func (c *externalMetricsClient) GetExternalMetric(metricName, namespace string, selector labels.Selector) ([]int64, time.Time, error) {
|
func (c *externalMetricsClient) GetExternalMetric(metricName, namespace string, selector labels.Selector) ([]int64, time.Time, error) {
|
||||||
|
// observe pull ExternalMetric latency
|
||||||
|
var err error
|
||||||
|
startTime := time.Now()
|
||||||
|
defer metrics.ObserveFederatedHPAPullMetricsLatency(err, "ExternalMetric", startTime)
|
||||||
|
|
||||||
metrics, err := c.client.NamespacedMetrics(namespace).List(metricName, selector)
|
metrics, err := c.client.NamespacedMetrics(namespace).List(metricName, selector)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return []int64{}, time.Time{}, fmt.Errorf("unable to fetch metrics from external metrics API: %v", err)
|
return []int64{}, time.Time{}, fmt.Errorf("unable to fetch metrics from external metrics API: %v", err)
|
||||||
|
|
|
@ -17,6 +17,8 @@ const (
|
||||||
policyPreemptionMetricsName = "policy_preemption_total"
|
policyPreemptionMetricsName = "policy_preemption_total"
|
||||||
cronFederatedHPADurationMetricsName = "cronfederatedhpa_process_duration_seconds"
|
cronFederatedHPADurationMetricsName = "cronfederatedhpa_process_duration_seconds"
|
||||||
cronFederatedHPARuleDurationMetricsName = "cronfederatedhpa_rule_process_duration_seconds"
|
cronFederatedHPARuleDurationMetricsName = "cronfederatedhpa_rule_process_duration_seconds"
|
||||||
|
federatedHPADurationMetricsName = "federatedhpa_process_duration_seconds"
|
||||||
|
federatedHPAPullMetricsDurationMetricsName = "federatedhpa_pull_metrics_duration_seconds"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -65,6 +67,18 @@ var (
|
||||||
Help: "Duration in seconds to process a CronFederatedHPA rule. By the result, 'error' means a CronFederatedHPA rule failed to be processed. Otherwise 'success'.",
|
Help: "Duration in seconds to process a CronFederatedHPA rule. By the result, 'error' means a CronFederatedHPA rule failed to be processed. Otherwise 'success'.",
|
||||||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
|
Buckets: prometheus.ExponentialBuckets(0.001, 2, 12),
|
||||||
}, []string{"result"})
|
}, []string{"result"})
|
||||||
|
|
||||||
|
federatedHPADurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Name: federatedHPADurationMetricsName,
|
||||||
|
Help: "Duration in seconds to process a FederatedHPA. By the result, 'error' means a FederatedHPA failed to be processed. Otherwise 'success'.",
|
||||||
|
Buckets: prometheus.ExponentialBuckets(0.01, 2, 12),
|
||||||
|
}, []string{"result"})
|
||||||
|
|
||||||
|
federatedHPAPullMetricsDurationHistogram = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Name: federatedHPAPullMetricsDurationMetricsName,
|
||||||
|
Help: "Duration in seconds taken by the FederatedHPA to pull metrics. By the result, 'error' means the FederatedHPA failed to pull the metrics. Otherwise 'success'.",
|
||||||
|
Buckets: prometheus.ExponentialBuckets(0.01, 2, 12),
|
||||||
|
}, []string{"result", "metricType"})
|
||||||
)
|
)
|
||||||
|
|
||||||
// ObserveFindMatchedPolicyLatency records the duration for the resource finding a matched policy.
|
// ObserveFindMatchedPolicyLatency records the duration for the resource finding a matched policy.
|
||||||
|
@ -103,6 +117,16 @@ func ObserveProcessCronFederatedHPARuleLatency(err error, start time.Time) {
|
||||||
cronFederatedHPARuleDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
|
cronFederatedHPARuleDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ObserveProcessFederatedHPALatency records the duration to process a FederatedHPA.
|
||||||
|
func ObserveProcessFederatedHPALatency(err error, start time.Time) {
|
||||||
|
federatedHPADurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err)).Observe(utilmetrics.DurationInSeconds(start))
|
||||||
|
}
|
||||||
|
|
||||||
|
// ObserveFederatedHPAPullMetricsLatency records the duration it takes for the FederatedHPA to pull metrics.
|
||||||
|
func ObserveFederatedHPAPullMetricsLatency(err error, metricType string, start time.Time) {
|
||||||
|
federatedHPAPullMetricsDurationHistogram.WithLabelValues(utilmetrics.GetResultByError(err), metricType).Observe(utilmetrics.DurationInSeconds(start))
|
||||||
|
}
|
||||||
|
|
||||||
// ResourceCollectors returns the collectors about resources.
|
// ResourceCollectors returns the collectors about resources.
|
||||||
func ResourceCollectors() []prometheus.Collector {
|
func ResourceCollectors() []prometheus.Collector {
|
||||||
return []prometheus.Collector{
|
return []prometheus.Collector{
|
||||||
|
@ -114,6 +138,8 @@ func ResourceCollectors() []prometheus.Collector {
|
||||||
policyPreemptionCounter,
|
policyPreemptionCounter,
|
||||||
cronFederatedHPADurationHistogram,
|
cronFederatedHPADurationHistogram,
|
||||||
cronFederatedHPARuleDurationHistogram,
|
cronFederatedHPARuleDurationHistogram,
|
||||||
|
federatedHPADurationHistogram,
|
||||||
|
federatedHPAPullMetricsDurationHistogram,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue