introduce tests to check whether workqueue metrics exist.

Signed-off-by: chaosi-zju <chaosi@zju.edu.cn>
This commit is contained in:
chaosi-zju 2024-12-31 17:23:25 +08:00
parent d80b7d4864
commit 5db980daa5
6 changed files with 335 additions and 5 deletions

View File

@ -44,7 +44,7 @@ jobs:
hack/cli-testing-environment.sh hack/cli-testing-environment.sh
# run a single e2e # run a single e2e
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/ ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
- name: export logs - name: export logs
@ -87,7 +87,7 @@ jobs:
hack/cli-testing-init-with-config.sh hack/cli-testing-init-with-config.sh
# run a single e2e # run a single e2e
export KUBECONFIG=${HOME}/karmada/karmada-apiserver.config export KUBECONFIG=${HOME}/.kube/karmada-host.config:${HOME}/karmada/karmada-apiserver.config
GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo GO111MODULE=on go install github.com/onsi/ginkgo/v2/ginkgo
ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/ ginkgo -v --race --trace -p --focus="[BasicPropagation] propagation testing deployment propagation testing" ./test/e2e/
- name: export logs for config test - name: export logs for config test

2
go.mod
View File

@ -19,6 +19,7 @@ require (
github.com/onsi/gomega v1.34.1 github.com/onsi/gomega v1.34.1
github.com/opensearch-project/opensearch-go v1.1.0 github.com/opensearch-project/opensearch-go v1.1.0
github.com/prometheus/client_golang v1.19.1 github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/spf13/cobra v1.8.1 github.com/spf13/cobra v1.8.1
github.com/spf13/pflag v1.0.5 github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.9.0 github.com/stretchr/testify v1.9.0
@ -134,7 +135,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect
github.com/rivo/uniseg v0.4.2 // indirect github.com/rivo/uniseg v0.4.2 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"fmt" "fmt"
"os" "os"
"path/filepath"
"strings" "strings"
"time" "time"
@ -310,7 +311,7 @@ func WaitClusterFitWith(c client.Client, clusterName string, fit func(cluster *c
// LoadRESTClientConfig creates a rest.Config using the passed kubeconfig. If context is empty, current context in kubeconfig will be used. // LoadRESTClientConfig creates a rest.Config using the passed kubeconfig. If context is empty, current context in kubeconfig will be used.
func LoadRESTClientConfig(kubeconfig string, context string) (*rest.Config, error) { func LoadRESTClientConfig(kubeconfig string, context string) (*rest.Config, error) {
loader := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfig} loader := &clientcmd.ClientConfigLoadingRules{Precedence: filepath.SplitList(kubeconfig)}
loadedConfig, err := loader.Load() loadedConfig, err := loader.Load()
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -0,0 +1,193 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package framework
import (
"context"
"fmt"
"regexp"
"time"
"github.com/prometheus/common/model"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/component-base/metrics/testutil"
"k8s.io/klog/v2"
"github.com/karmada-io/karmada/pkg/util/names"
)
const (
karmadaNamespace = "karmada-system"
metricsBindPort = 8080
leaderPodMetric = "leader_election_master_status"
queryTimeout = 10 * time.Second
)
// following refers to https://github.com/kubernetes/kubernetes/blob/master/test/e2e/framework/metrics/metrics_grabber.go
// Grabber is used to grab metrics from karmada components
type Grabber struct {
hostKubeClient clientset.Interface
controllerManagerPods []string
schedulerPods []string
deschedulerPods []string
metricsAdapterPods []string
schedulerEstimatorPods []string
webhookPods []string
}
// NewMetricsGrabber creates a new metrics grabber
func NewMetricsGrabber(ctx context.Context, c clientset.Interface) (*Grabber, error) {
grabber := Grabber{hostKubeClient: c}
regKarmadaControllerManagerPods := regexp.MustCompile(names.KarmadaControllerManagerComponentName + "-.*")
regKarmadaSchedulerPods := regexp.MustCompile(names.KarmadaSchedulerComponentName + "-.*")
regKarmadaDeschedulerPods := regexp.MustCompile(names.KarmadaDeschedulerComponentName + "-.*")
regKarmadaMetricsAdapterPods := regexp.MustCompile(names.KarmadaMetricsAdapterComponentName + "-.*")
regKarmadaSchedulerEstimatorPods := regexp.MustCompile(names.KarmadaSchedulerEstimatorComponentName + "-" + ClusterNames()[0] + "-.*")
regKarmadaWebhookPods := regexp.MustCompile(names.KarmadaWebhookComponentName + "-.*")
podList, err := c.CoreV1().Pods(karmadaNamespace).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, err
}
if len(podList.Items) < 1 {
klog.Warningf("Can't find any pods in namespace %s to grab metrics from", karmadaNamespace)
}
for _, pod := range podList.Items {
if regKarmadaControllerManagerPods.MatchString(pod.Name) {
grabber.controllerManagerPods = append(grabber.controllerManagerPods, pod.Name)
continue
}
if regKarmadaDeschedulerPods.MatchString(pod.Name) {
grabber.deschedulerPods = append(grabber.deschedulerPods, pod.Name)
continue
}
if regKarmadaMetricsAdapterPods.MatchString(pod.Name) {
grabber.metricsAdapterPods = append(grabber.metricsAdapterPods, pod.Name)
continue
}
if regKarmadaSchedulerEstimatorPods.MatchString(pod.Name) {
grabber.schedulerEstimatorPods = append(grabber.schedulerEstimatorPods, pod.Name)
continue
}
if regKarmadaSchedulerPods.MatchString(pod.Name) {
grabber.schedulerPods = append(grabber.schedulerPods, pod.Name)
continue
}
if regKarmadaWebhookPods.MatchString(pod.Name) {
grabber.webhookPods = append(grabber.webhookPods, pod.Name)
}
}
return &grabber, nil
}
// GrabMetricsFromComponent fetch metrics from the leader of a specified Karmada component
func (g *Grabber) GrabMetricsFromComponent(ctx context.Context, component string) (map[string]testutil.Metrics, error) {
pods, fromLeader := make([]string, 0), false
switch component {
case names.KarmadaControllerManagerComponentName:
pods, fromLeader = g.controllerManagerPods, true
case names.KarmadaSchedulerComponentName:
pods, fromLeader = g.schedulerPods, true
case names.KarmadaDeschedulerComponentName:
pods, fromLeader = g.deschedulerPods, true
case names.KarmadaMetricsAdapterComponentName:
pods = g.metricsAdapterPods
case names.KarmadaSchedulerEstimatorComponentName:
pods = g.schedulerEstimatorPods
case names.KarmadaWebhookComponentName:
pods = g.webhookPods
}
return g.grabMetricsFromPod(ctx, component, pods, fromLeader)
}
// grabMetricsFromPod fetch metrics from the leader pod
func (g *Grabber) grabMetricsFromPod(ctx context.Context, component string, pods []string, fromLeader bool) (map[string]testutil.Metrics, error) {
var output string
var lastMetricsFetchErr error
result := make(map[string]testutil.Metrics)
for _, podName := range pods {
if metricsWaitErr := wait.PollUntilContextTimeout(ctx, time.Second, queryTimeout, true, func(ctx context.Context) (bool, error) {
output, lastMetricsFetchErr = GetMetricsFromPod(ctx, g.hostKubeClient, podName, karmadaNamespace, metricsBindPort)
return lastMetricsFetchErr == nil, nil
}); metricsWaitErr != nil {
klog.Errorf("error waiting for %s to expose metrics: %v; %v", podName, metricsWaitErr, lastMetricsFetchErr)
continue
}
podMetrics := testutil.Metrics{}
metricsParseErr := testutil.ParseMetrics(output, &podMetrics)
if metricsParseErr != nil {
klog.Errorf("failed to parse metrics for %s: %v", podName, metricsParseErr)
continue
}
// judge which pod is the leader pod
if fromLeader && !isLeaderPod(podMetrics[leaderPodMetric]) {
klog.Infof("skip fetch %s since it is not the leader pod", podName)
continue
}
result[podName] = podMetrics
klog.Infof("successfully grabbed metrics of %s", podName)
}
if len(result) == 0 {
return nil, fmt.Errorf("failed to fetch metrics from the pod of %s", component)
}
return result, nil
}
// GetMetricsFromPod retrieves metrics data.
func GetMetricsFromPod(ctx context.Context, client clientset.Interface, podName string, namespace string, port int) (string, error) {
rawOutput, err := client.CoreV1().RESTClient().Get().
Namespace(namespace).
Resource("pods").
SubResource("proxy").
Name(fmt.Sprintf("%s:%d", podName, port)).
Suffix("metrics").
Do(ctx).Raw()
if err != nil {
return "", err
}
return string(rawOutput), nil
}
func isLeaderPod(samples model.Samples) bool {
for _, sample := range samples {
if sample.Value > 0 {
return true
}
}
return false
}
// PrintMetricSample prints the metric sample
func PrintMetricSample(podName string, sample model.Samples) {
if sample.Len() == 0 {
return
}
if podName != "" {
klog.Infof("metrics from pod: %s", podName)
}
for _, s := range sample {
klog.Infof("metric: %v, value: %v, timestamp: %v", s.Metric, s.Value, s.Timestamp)
}
}

126
test/e2e/metrics_test.go Normal file
View File

@ -0,0 +1,126 @@
/*
Copyright 2023 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2e
import (
"context"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/klog/v2"
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
"github.com/karmada-io/karmada/pkg/util/names"
"github.com/karmada-io/karmada/test/e2e/framework"
testhelper "github.com/karmada-io/karmada/test/helper"
)
var _ = ginkgo.Describe("metrics testing", func() {
var grabber *framework.Grabber
var componentMetrics = map[string][]string{
names.KarmadaControllerManagerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
"cluster_ready_state", // custom ClusterCollectors metrics
"work_sync_workload_duration_seconds_sum", // custom ResourceCollectors metrics
},
names.KarmadaSchedulerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
"karmada_scheduler_schedule_attempts_total", // scheduler custom metrics
},
names.KarmadaDeschedulerComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
},
names.KarmadaMetricsAdapterComponentName: {
"workqueue_queue_duration_seconds_sum", // workqueue metrics
},
names.KarmadaSchedulerEstimatorComponentName: {
"karmada_scheduler_estimator_estimating_request_total", // scheduler estimator custom metrics
},
names.KarmadaWebhookComponentName: {
"controller_runtime_webhook_requests_total", // controller runtime hook server metrics
},
}
ginkgo.BeforeEach(func() {
var err error
grabber, err = framework.NewMetricsGrabber(context.TODO(), hostKubeClient)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
})
ginkgo.Context("metrics presence testing", func() {
ginkgo.It("metrics presence testing for each component", func() {
ginkgo.By("do a simple scheduling to ensure above metrics exist", func() {
name := deploymentNamePrefix + rand.String(RandomStrLength)
deployment := testhelper.NewDeployment(testNamespace, name)
policy := testhelper.NewPropagationPolicy(testNamespace, name, []policyv1alpha1.ResourceSelector{
{
APIVersion: deployment.APIVersion,
Kind: deployment.Kind,
Name: deployment.Name,
},
}, policyv1alpha1.Placement{
ClusterAffinity: &policyv1alpha1.ClusterAffinity{
ClusterNames: framework.ClusterNames(),
},
})
framework.CreateDeployment(kubeClient, deployment)
framework.CreatePropagationPolicy(karmadaClient, policy)
ginkgo.DeferCleanup(func() {
framework.RemoveDeployment(kubeClient, deployment.Namespace, deployment.Name)
framework.RemovePropagationPolicy(karmadaClient, policy.Namespace, policy.Name)
})
framework.WaitDeploymentPresentOnClustersFitWith(framework.ClusterNames(), deployment.Namespace, deployment.Name, func(_ *appsv1.Deployment) bool { return true })
})
for component, metricNameList := range componentMetrics {
ginkgo.By("judge metrics presence of component: "+component, func() {
podsMetrics, err := grabber.GrabMetricsFromComponent(context.TODO(), component)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
for _, metricName := range metricNameList {
metricExist := false
for podName, metrics := range podsMetrics {
// the output format of `metrics` is like:
// {
// "workqueue_queue_duration_seconds_sum": [{
// "metric": {
// "__name__": "workqueue_queue_duration_seconds_sum",
// "controller": "work-status-controller",
// "name": "work-status-controller"
// },
// "value": [0, "0.12403110800000001"]
// }]
// }
framework.PrintMetricSample(podName, metrics[metricName])
if metrics[metricName].Len() > 0 {
metricExist = true
break
}
}
if !metricExist {
klog.Errorf("metric %s not found in component %s", metricName, component)
gomega.Expect(metricExist).ShouldNot(gomega.BeFalse())
}
}
})
}
})
})
})

View File

@ -103,11 +103,13 @@ var (
) )
var ( var (
hostContext string
karmadaContext string karmadaContext string
kubeconfig string kubeconfig string
karmadactlPath string karmadactlPath string
restConfig *rest.Config restConfig *rest.Config
karmadaHost string karmadaHost string
hostKubeClient kubernetes.Interface
kubeClient kubernetes.Interface kubeClient kubernetes.Interface
karmadaClient karmada.Interface karmadaClient karmada.Interface
dynamicClient dynamic.Interface dynamicClient dynamic.Interface
@ -125,7 +127,8 @@ func init() {
// eg. ginkgo -v --race --trace --fail-fast -p --randomize-all ./test/e2e/ -- --poll-interval=5s --poll-timeout=5m // eg. ginkgo -v --race --trace --fail-fast -p --randomize-all ./test/e2e/ -- --poll-interval=5s --poll-timeout=5m
flag.DurationVar(&pollInterval, "poll-interval", 5*time.Second, "poll-interval defines the interval time for a poll operation") flag.DurationVar(&pollInterval, "poll-interval", 5*time.Second, "poll-interval defines the interval time for a poll operation")
flag.DurationVar(&pollTimeout, "poll-timeout", 300*time.Second, "poll-timeout defines the time which the poll operation times out") flag.DurationVar(&pollTimeout, "poll-timeout", 300*time.Second, "poll-timeout defines the time which the poll operation times out")
flag.StringVar(&karmadaContext, "karmada-context", karmadaContext, "Name of the cluster context in control plane kubeconfig file.") flag.StringVar(&hostContext, "host-context", "karmada-host", "Name of the host cluster context in control plane kubeconfig file.")
flag.StringVar(&karmadaContext, "karmada-context", "karmada-apiserver", "Name of the karmada cluster context in control plane kubeconfig file.")
} }
func TestE2E(t *testing.T) { func TestE2E(t *testing.T) {
@ -148,6 +151,13 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
gomega.Expect(karmadactlPath).ShouldNot(gomega.BeEmpty()) gomega.Expect(karmadactlPath).ShouldNot(gomega.BeEmpty())
clusterProvider = cluster.NewProvider() clusterProvider = cluster.NewProvider()
restConfig, err = framework.LoadRESTClientConfig(kubeconfig, hostContext)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
hostKubeClient, err = kubernetes.NewForConfig(restConfig)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
restConfig, err = framework.LoadRESTClientConfig(kubeconfig, karmadaContext) restConfig, err = framework.LoadRESTClientConfig(kubeconfig, karmadaContext)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) gomega.Expect(err).ShouldNot(gomega.HaveOccurred())