/* Copyright 2023 The Karmada Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package provider import ( "context" "fmt" "sync" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" "k8s.io/metrics/pkg/apis/metrics" metricsv1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1" autoscalingv1alpha1 "github.com/karmada-io/karmada/pkg/apis/autoscaling/v1alpha1" clusterlister "github.com/karmada-io/karmada/pkg/generated/listers/cluster/v1alpha1" "github.com/karmada-io/karmada/pkg/util" "github.com/karmada-io/karmada/pkg/util/fedinformer/genericmanager" "github.com/karmada-io/karmada/pkg/util/helper" ) const ( // labelSelectorAnnotationInternal is the annotation used internal in karmada-metrics-adapter, // to record the selector specified by the user labelSelectorAnnotationInternal = "internal.karmada.io/selector" // namespaceSpecifiedAnnotation is the annotation used in karmada-metrics-adapter, // to record the namespace specified by the user namespaceSpecifiedAnnotation = "internal.karmada.io/namespace" ) var ( // podMetricsGVR is the gvr of pod metrics(v1beta1 version) podMetricsGVR = metricsv1beta1.SchemeGroupVersion.WithResource("pods") // nodeMetricsGVR is the gvr of node metrics(v1beta1 version) nodeMetricsGVR = metricsv1beta1.SchemeGroupVersion.WithResource("nodes") // PodsGVR is the gvr of pods PodsGVR = corev1.SchemeGroupVersion.WithResource("pods") // NodesGVR is the gvr of nodes NodesGVR = corev1.SchemeGroupVersion.WithResource("nodes") ) type queryResourceFromClustersFunc func(sci genericmanager.SingleClusterInformerManager, clusterName string) error type queryMetricsFromClustersFunc func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) // ResourceMetricsProvider is a resource metrics provider, to provide cpu/memory metrics type ResourceMetricsProvider struct { PodLister *PodLister NodeLister *NodeLister clusterLister clusterlister.ClusterLister informerManager genericmanager.MultiClusterInformerManager } // NewResourceMetricsProvider creates a new resource metrics provider func NewResourceMetricsProvider(clusterLister clusterlister.ClusterLister, informerManager genericmanager.MultiClusterInformerManager) *ResourceMetricsProvider { return &ResourceMetricsProvider{ clusterLister: clusterLister, informerManager: informerManager, PodLister: NewPodLister(clusterLister, informerManager), NodeLister: NewNodeLister(clusterLister, informerManager), } } // getMetricsParallel is a parallel func to query metrics from member clusters func (r *ResourceMetricsProvider) getMetricsParallel(resourceFunc queryResourceFromClustersFunc, metricsFunc queryMetricsFromClustersFunc) ([]interface{}, error) { clusters, err := r.clusterLister.List(labels.Everything()) if err != nil { klog.Errorf("Failed to list clusters: %v", err) return nil, err } // step 1. Find out the target clusters in lister cache var targetClusters []string for _, cluster := range clusters { sci := r.informerManager.GetSingleClusterManager(cluster.Name) if sci == nil { klog.Errorf("Failed to get cluster(%s) manager", cluster.Name) continue } err := resourceFunc(sci, cluster.Name) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("Failed to query resource in cluster(%s): %v", cluster.Name, err) } continue } targetClusters = append(targetClusters, cluster.Name) } var metrics []interface{} if len(targetClusters) == 0 { return metrics, nil } // step 2. Query metrics from the filtered target clusters metricsChannel := make(chan interface{}) var wg sync.WaitGroup for _, clusterName := range targetClusters { wg.Add(1) go func(cluster string) { defer wg.Done() sci := r.informerManager.GetSingleClusterManager(cluster) if sci == nil { klog.Errorf("Failed to get cluster(%s) manager", cluster) return } metrics, err := metricsFunc(sci, cluster) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("Failed to query metrics in cluster(%s): %v", cluster, err) } return } // If there are multiple metrics with same name, it's ok because it's an array instead of a map. // The HPA controller will calculate the average utilization with the array. metricsChannel <- metrics }(clusterName) } go func() { wg.Wait() close(metricsChannel) }() for { data, ok := <-metricsChannel if !ok { break } metrics = append(metrics, data) } return metrics, nil } // queryPodMetricsByName queries metrics by pod name from target clusters func (r *ResourceMetricsProvider) queryPodMetricsByName(name, namespace string) ([]metrics.PodMetrics, error) { resourceQueryFunc := func(sci genericmanager.SingleClusterInformerManager, _ string) error { _, err := sci.Lister(PodsGVR).ByNamespace(namespace).Get(name) return err } metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) { metricsValue, err := sci.GetClient().Resource(podMetricsGVR). Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}) if err != nil { return nil, err } util.MergeAnnotation(metricsValue, autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName) return metricsValue, err } metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc) if err != nil { return nil, err } var podMetrics []metrics.PodMetrics for index := range metricsQuery { internalMetrics, err := metricsConvertV1beta1PodToInternalPod(*metricsQuery[index].(*unstructured.Unstructured)) if err != nil { continue } podMetrics = append(podMetrics, internalMetrics...) } return podMetrics, nil } // queryPodMetricsBySelector queries metrics by pod selector from target clusters func (r *ResourceMetricsProvider) queryPodMetricsBySelector(selector, namespace string) ([]metrics.PodMetrics, error) { labelSelector, err := labels.Parse(selector) if err != nil { klog.Errorf("Failed to parse label selector: %v", err) return nil, err } resourceQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) error { pods, err := sci.Lister(PodsGVR).ByNamespace(namespace).List(labelSelector) if err != nil { klog.Errorf("Failed to list pods in cluster(%s): %v", clusterName, err) return err } if len(pods) == 0 { return errors.NewNotFound(PodsGVR.GroupResource(), "") } return nil } metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) { metricsList, err := sci.GetClient().Resource(podMetricsGVR). Namespace(namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: selector, }) if err != nil { return nil, err } for i := range metricsList.Items { util.MergeAnnotation(&metricsList.Items[i], autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName) } return metricsList, err } metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc) if err != nil { return nil, err } var podMetrics []metrics.PodMetrics for index := range metricsQuery { metricsData := metricsQuery[index].(*unstructured.UnstructuredList) internalMetrics, err := metricsConvertV1beta1PodToInternalPod(metricsData.Items...) if err != nil { continue } podMetrics = append(podMetrics, internalMetrics...) } return podMetrics, nil } // queryNodeMetricsByName queries metrics by node name from target clusters func (r *ResourceMetricsProvider) queryNodeMetricsByName(name string) ([]metrics.NodeMetrics, error) { resourceQueryFunc := func(sci genericmanager.SingleClusterInformerManager, _ string) error { _, err := sci.Lister(NodesGVR).Get(name) return err } metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) { metricsValue, err := sci.GetClient().Resource(nodeMetricsGVR).Get(context.Background(), name, metav1.GetOptions{}) if err != nil { return nil, err } util.MergeAnnotation(metricsValue, autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName) return metricsValue, err } metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc) if err != nil { return nil, err } var nodeMetrics []metrics.NodeMetrics for index := range metricsQuery { internalMetrics, err := metricsConvertV1beta1NodeToInternalNode(*metricsQuery[index].(*unstructured.Unstructured)) if err != nil { continue } nodeMetrics = append(nodeMetrics, internalMetrics...) } return nodeMetrics, nil } // queryNodeMetricsBySelector queries metrics by node selector from target clusters func (r *ResourceMetricsProvider) queryNodeMetricsBySelector(selector string) ([]metrics.NodeMetrics, error) { labelSelector, err := labels.Parse(selector) if err != nil { klog.Errorf("Failed to parse label selector: %v", err) return nil, err } resourceQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) error { nodes, err := sci.Lister(NodesGVR).List(labelSelector) if err != nil { klog.Errorf("Failed to list pods in cluster(%s): %v", clusterName, err) return err } if len(nodes) == 0 { return errors.NewNotFound(PodsGVR.GroupResource(), "") } return nil } metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) { metricsList, err := sci.GetClient().Resource(nodeMetricsGVR).List(context.Background(), metav1.ListOptions{ LabelSelector: selector, }) if err != nil { return nil, err } for i := range metricsList.Items { util.MergeAnnotation(&metricsList.Items[i], autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName) } return metricsList, err } metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc) if err != nil { return nil, err } var nodeMetrics []metrics.NodeMetrics for index := range metricsQuery { metricsData := metricsQuery[index].(*unstructured.UnstructuredList) internalMetrics, err := metricsConvertV1beta1NodeToInternalNode(metricsData.Items...) if err != nil { continue } nodeMetrics = append(nodeMetrics, internalMetrics...) } return nodeMetrics, nil } // GetPodMetrics queries metrics by the internal constructed pod func (r *ResourceMetricsProvider) GetPodMetrics(pods ...*metav1.PartialObjectMetadata) ([]metrics.PodMetrics, error) { var queryData []metrics.PodMetrics if len(pods) == 0 { return queryData, nil } var err error // In the previous step, we construct the annotations, so it couldn't be nil if _, ok := pods[0].Annotations[labelSelectorAnnotationInternal]; ok { namespace := pods[0].Annotations[namespaceSpecifiedAnnotation] selectorStr := pods[0].Annotations[labelSelectorAnnotationInternal] queryData, err = r.queryPodMetricsBySelector(selectorStr, namespace) } else { queryData, err = r.queryPodMetricsByName(pods[0].Name, pods[0].Namespace) } if err != nil { return nil, err } return queryData, nil } // GetNodeMetrics queries metrics by the internal constructed node func (r *ResourceMetricsProvider) GetNodeMetrics(nodes ...*corev1.Node) ([]metrics.NodeMetrics, error) { var queryData []metrics.NodeMetrics if len(nodes) == 0 { return queryData, nil } var err error // In the previous step, we construct the annotations, so it couldn't be nil if _, ok := nodes[0].Annotations[labelSelectorAnnotationInternal]; ok { selectorStr := nodes[0].Annotations[labelSelectorAnnotationInternal] queryData, err = r.queryNodeMetricsBySelector(selectorStr) } else { queryData, err = r.queryNodeMetricsByName(nodes[0].Name) } if err != nil { return nil, err } return queryData, nil } // PodLister is an internal lister for pods type PodLister struct { namespaceSpecified string clusterLister clusterlister.ClusterLister informerManager genericmanager.MultiClusterInformerManager } // NewPodLister creates an internal new PodLister func NewPodLister(clusterLister clusterlister.ClusterLister, informerManager genericmanager.MultiClusterInformerManager) *PodLister { return &PodLister{ clusterLister: clusterLister, informerManager: informerManager, } } // List returns the internal constructed pod with label selector info func (p *PodLister) List(selector labels.Selector) (ret []runtime.Object, err error) { klog.V(4).Infof("List query pods with selector: %s", selector.String()) clusters, err := p.clusterLister.List(labels.Everything()) if err != nil { return nil, err } for _, cluster := range clusters { sci := p.informerManager.GetSingleClusterManager(cluster.Name) if sci == nil { klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name) continue } pods, err := sci.Lister(PodsGVR).ByNamespace(p.namespaceSpecified).List(selector) if err != nil { klog.Errorf("Failed to list pods from cluster(%s) in namespace(%s): %v", cluster.Name, p.namespaceSpecified, err) return nil, err } for _, pod := range pods { podTyped := &corev1.Pod{} err = helper.ConvertToTypedObject(pod, podTyped) if err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } podPartial := p.convertToPodPartialData(podTyped, selector.String(), true) ret = append(ret, podPartial) } } return ret, nil } // convertToPodPartialData converts pod to partial data func (p *PodLister) convertToPodPartialData(pod *corev1.Pod, selector string, labelSelector bool) *metav1.PartialObjectMetadata { ret := &metav1.PartialObjectMetadata{ TypeMeta: pod.TypeMeta, ObjectMeta: pod.ObjectMeta, } if ret.Annotations == nil { ret.Annotations = map[string]string{} } // If user sets this annotation, we need to remove it to avoid parsing wrong next. if !labelSelector { delete(ret.Annotations, namespaceSpecifiedAnnotation) delete(ret.Annotations, labelSelectorAnnotationInternal) return ret } ret.Annotations[labelSelectorAnnotationInternal] = selector ret.Annotations[namespaceSpecifiedAnnotation] = p.namespaceSpecified return ret } // Get returns the internal constructed pod with name info func (p *PodLister) Get(name string) (runtime.Object, error) { klog.V(4).Infof("Query pod in namespace(%s) with name:%s", p.namespaceSpecified, name) clusters, err := p.clusterLister.List(labels.Everything()) if err != nil { return nil, err } var podPartial *metav1.PartialObjectMetadata for _, cluster := range clusters { sci := p.informerManager.GetSingleClusterManager(cluster.Name) if sci == nil { klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name) continue } pod, err := sci.Lister(PodsGVR).ByNamespace(p.namespaceSpecified).Get(name) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("Failed to get pod from clsuster(%s) in namespace(%s): %v", cluster.Name, p.namespaceSpecified, err) } continue } if podPartial != nil { err := fmt.Errorf("the pod(%s) found in more than one clusters", name) return nil, errors.NewConflict(PodsGVR.GroupResource(), name, err) } podTyped := &corev1.Pod{} err = helper.ConvertToTypedObject(pod, podTyped) if err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } podPartial = p.convertToPodPartialData(podTyped, "", false) } if podPartial != nil { return podPartial, nil } return nil, errors.NewNotFound(PodsGVR.GroupResource(), name) } // ByNamespace returns the pod lister with namespace info func (p *PodLister) ByNamespace(namespace string) cache.GenericNamespaceLister { klog.V(4).Infof("Query Pods in namespace: %s", namespace) listerCopy := &PodLister{ clusterLister: p.clusterLister, informerManager: p.informerManager, } listerCopy.namespaceSpecified = namespace return listerCopy } // NodeLister is an internal lister for nodes type NodeLister struct { clusterLister clusterlister.ClusterLister informerManager genericmanager.MultiClusterInformerManager } // NewNodeLister creates an internal new NodeLister func NewNodeLister(clusterLister clusterlister.ClusterLister, informerManager genericmanager.MultiClusterInformerManager) *NodeLister { return &NodeLister{ clusterLister: clusterLister, informerManager: informerManager, } } // List returns the internal constructed node with label selector info func (n *NodeLister) List(selector labels.Selector) (ret []*corev1.Node, err error) { klog.V(4).Infof("Query node metrics with selector: %s", selector.String()) clusters, err := n.clusterLister.List(labels.Everything()) if err != nil { return nil, err } for _, cluster := range clusters { sci := n.informerManager.GetSingleClusterManager(cluster.Name) if sci == nil { klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name) continue } nodes, err := sci.Lister(NodesGVR).List(selector) if err != nil { klog.Errorf("Failed to list nodes from cluster(%s): %v", cluster.Name, err) return nil, err } for index := range nodes { nodeTyped := &corev1.Node{} err = helper.ConvertToTypedObject(nodes[index], nodeTyped) if err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } if nodeTyped.Annotations == nil { nodeTyped.Annotations = map[string]string{} } // If user sets this annotation, we need to reset it. nodeTyped.Annotations[labelSelectorAnnotationInternal] = selector.String() ret = append(ret, nodeTyped) } } return ret, nil } // Get returns the internal constructed node with name info func (n *NodeLister) Get(name string) (*corev1.Node, error) { klog.V(4).Infof("Query node metrics with name:%s", name) clusters, err := n.clusterLister.List(labels.Everything()) if err != nil { return nil, err } var nodeTyped *corev1.Node for _, cluster := range clusters { sci := n.informerManager.GetSingleClusterManager(cluster.Name) if sci == nil { klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name) continue } node, err := sci.Lister(NodesGVR).Get(name) if err != nil { if !errors.IsNotFound(err) { klog.Errorf("Failed to get node from cluster(%s):%v", cluster.Name, err) } continue } if nodeTyped != nil { err := fmt.Errorf("the node(%s) found in more than one clusters", name) return nil, errors.NewConflict(NodesGVR.GroupResource(), name, err) } nodeTyped = &corev1.Node{} err = helper.ConvertToTypedObject(node, nodeTyped) if err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } if nodeTyped.Annotations == nil { nodeTyped.Annotations = map[string]string{} } // If user sets this annotation, we need to remove it to avoid parsing wrong next. delete(nodeTyped.Annotations, labelSelectorAnnotationInternal) } if nodeTyped != nil { return nodeTyped, nil } return nil, errors.NewNotFound(NodesGVR.GroupResource(), name) } // metricsConvertV1beta1PodToInternalPod converts metricsv1beta1.PodMetrics to metrics.PodMetrics func metricsConvertV1beta1PodToInternalPod(objs ...unstructured.Unstructured) ([]metrics.PodMetrics, error) { var podMetricsV1beta1 []metricsv1beta1.PodMetrics for index := range objs { single := metricsv1beta1.PodMetrics{} if err := helper.ConvertToTypedObject(&objs[index], &single); err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } podMetricsV1beta1 = append(podMetricsV1beta1, single) } var podMetricsInternal []metrics.PodMetrics for index := range podMetricsV1beta1 { single := metrics.PodMetrics{} if err := metricsv1beta1.Convert_v1beta1_PodMetrics_To_metrics_PodMetrics(&podMetricsV1beta1[index], &single, nil); err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } podMetricsInternal = append(podMetricsInternal, single) } return podMetricsInternal, nil } // metricsConvertV1beta1NodeToInternalNode converts metricsv1beta1.NodeMetrics to metrics.NodeMetrics func metricsConvertV1beta1NodeToInternalNode(objs ...unstructured.Unstructured) ([]metrics.NodeMetrics, error) { var nodeMetricsV1beta1 []metricsv1beta1.NodeMetrics for index := range objs { single := metricsv1beta1.NodeMetrics{} if err := helper.ConvertToTypedObject(&objs[index], &single); err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } nodeMetricsV1beta1 = append(nodeMetricsV1beta1, single) } var nodeMetricsInternal []metrics.NodeMetrics for index := range nodeMetricsV1beta1 { single := metrics.NodeMetrics{} if err := metricsv1beta1.Convert_v1beta1_NodeMetrics_To_metrics_NodeMetrics(&nodeMetricsV1beta1[index], &single, nil); err != nil { klog.Errorf("Failed to convert to typed object: %v", err) return nil, err } nodeMetricsInternal = append(nodeMetricsInternal, single) } return nodeMetricsInternal, nil }