karmada/pkg/metricsadapter/provider/resourcemetrics.go

674 lines
22 KiB
Go
Executable File

/*
Copyright 2023 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package provider
import (
"context"
"fmt"
"sync"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
listv1 "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/metrics/pkg/apis/metrics"
metricsv1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1"
autoscalingv1alpha1 "github.com/karmada-io/karmada/pkg/apis/autoscaling/v1alpha1"
clusterlister "github.com/karmada-io/karmada/pkg/generated/listers/cluster/v1alpha1"
"github.com/karmada-io/karmada/pkg/util"
"github.com/karmada-io/karmada/pkg/util/fedinformer/genericmanager"
"github.com/karmada-io/karmada/pkg/util/fedinformer/typedmanager"
"github.com/karmada-io/karmada/pkg/util/helper"
)
const (
// labelSelectorAnnotationInternal is the annotation used internal in karmada-metrics-adapter,
// to record the selector specified by the user
labelSelectorAnnotationInternal = "internal.karmada.io/selector"
// namespaceSpecifiedAnnotation is the annotation used in karmada-metrics-adapter,
// to record the namespace specified by the user
namespaceSpecifiedAnnotation = "internal.karmada.io/namespace"
)
var (
// podMetricsGVR is the gvr of pod metrics(v1beta1 version)
podMetricsGVR = metricsv1beta1.SchemeGroupVersion.WithResource("pods")
// nodeMetricsGVR is the gvr of node metrics(v1beta1 version)
nodeMetricsGVR = metricsv1beta1.SchemeGroupVersion.WithResource("nodes")
// PodsGVR is the gvr of pods
PodsGVR = corev1.SchemeGroupVersion.WithResource("pods")
// NodesGVR is the gvr of nodes
NodesGVR = corev1.SchemeGroupVersion.WithResource("nodes")
)
type queryResourceFromClustersFunc func(sci typedmanager.SingleClusterInformerManager, clusterName string) error
type queryMetricsFromClustersFunc func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error)
// ResourceMetricsProvider is a resource metrics provider, to provide cpu/memory metrics
type ResourceMetricsProvider struct {
PodLister *PodLister
NodeLister *NodeLister
clusterLister clusterlister.ClusterLister
informerManager genericmanager.MultiClusterInformerManager
typedInformerManager typedmanager.MultiClusterInformerManager
}
// NewResourceMetricsProvider creates a new resource metrics provider
func NewResourceMetricsProvider(clusterLister clusterlister.ClusterLister, typedInformerManager typedmanager.MultiClusterInformerManager, informerManager genericmanager.MultiClusterInformerManager) *ResourceMetricsProvider {
return &ResourceMetricsProvider{
clusterLister: clusterLister,
informerManager: informerManager,
typedInformerManager: typedInformerManager,
PodLister: NewPodLister(clusterLister, typedInformerManager),
NodeLister: NewNodeLister(clusterLister, typedInformerManager),
}
}
// getMetricsParallel is a parallel func to query metrics from member clusters
func (r *ResourceMetricsProvider) getMetricsParallel(resourceFunc queryResourceFromClustersFunc,
metricsFunc queryMetricsFromClustersFunc) ([]interface{}, error) {
clusters, err := r.clusterLister.List(labels.Everything())
if err != nil {
klog.Errorf("Failed to list clusters: %v", err)
return nil, err
}
// step 1. Find out the target clusters in lister cache
var targetClusters []string
for _, cluster := range clusters {
sci := r.typedInformerManager.GetSingleClusterManager(cluster.Name)
if sci == nil {
klog.Errorf("Failed to get cluster(%s) manager", cluster.Name)
continue
}
err := resourceFunc(sci, cluster.Name)
if err != nil {
if !errors.IsNotFound(err) {
klog.Errorf("Failed to query resource in cluster(%s): %v", cluster.Name, err)
}
continue
}
targetClusters = append(targetClusters, cluster.Name)
}
var metrics []interface{}
if len(targetClusters) == 0 {
return metrics, nil
}
// step 2. Query metrics from the filtered target clusters
metricsChannel := make(chan interface{})
var wg sync.WaitGroup
for _, clusterName := range targetClusters {
wg.Add(1)
go func(cluster string) {
defer wg.Done()
sci := r.informerManager.GetSingleClusterManager(cluster)
if sci == nil {
klog.Errorf("Failed to get cluster(%s) manager", cluster)
return
}
metrics, err := metricsFunc(sci, cluster)
if err != nil {
if !errors.IsNotFound(err) {
klog.Errorf("Failed to query metrics in cluster(%s): %v", cluster, err)
}
return
}
// If there are multiple metrics with same name, it's ok because it's an array instead of a map.
// The HPA controller will calculate the average utilization with the array.
metricsChannel <- metrics
}(clusterName)
}
go func() {
wg.Wait()
close(metricsChannel)
}()
for {
data, ok := <-metricsChannel
if !ok {
break
}
metrics = append(metrics, data)
}
return metrics, nil
}
// queryPodMetricsByName queries metrics by pod name from target clusters
func (r *ResourceMetricsProvider) queryPodMetricsByName(name, namespace string) ([]metrics.PodMetrics, error) {
resourceQueryFunc := func(sci typedmanager.SingleClusterInformerManager, _ string) error {
podInterface, err := sci.Lister(PodsGVR)
if err != nil {
return err
}
lister := podInterface.(listv1.PodLister)
_, err = lister.Pods(namespace).Get(name)
return err
}
metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) {
metricsValue, err := sci.GetClient().Resource(podMetricsGVR).
Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{})
if err != nil {
return nil, err
}
util.MergeAnnotation(metricsValue, autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName)
return metricsValue, err
}
metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc)
if err != nil {
return nil, err
}
var podMetrics []metrics.PodMetrics
for index := range metricsQuery {
internalMetrics, err := metricsConvertV1beta1PodToInternalPod(*metricsQuery[index].(*unstructured.Unstructured))
if err != nil {
continue
}
podMetrics = append(podMetrics, internalMetrics...)
}
return podMetrics, nil
}
// queryPodMetricsBySelector queries metrics by pod selector from target clusters
func (r *ResourceMetricsProvider) queryPodMetricsBySelector(selector, namespace string) ([]metrics.PodMetrics, error) {
labelSelector, err := labels.Parse(selector)
if err != nil {
klog.Errorf("Failed to parse label selector: %v", err)
return nil, err
}
resourceQueryFunc := func(sci typedmanager.SingleClusterInformerManager, clusterName string) error {
podInterface, err := sci.Lister(PodsGVR)
if err != nil {
return err
}
lister := podInterface.(listv1.PodLister)
pods, err := lister.Pods(namespace).List(labelSelector)
if err != nil {
klog.Errorf("Failed to list pods in cluster(%s): %v", clusterName, err)
return err
}
if len(pods) == 0 {
return errors.NewNotFound(PodsGVR.GroupResource(), "")
}
return nil
}
metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) {
metricsList, err := sci.GetClient().Resource(podMetricsGVR).
Namespace(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: selector,
})
if err != nil {
return nil, err
}
for i := range metricsList.Items {
util.MergeAnnotation(&metricsList.Items[i], autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName)
}
return metricsList, err
}
metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc)
if err != nil {
return nil, err
}
var podMetrics []metrics.PodMetrics
for index := range metricsQuery {
metricsData := metricsQuery[index].(*unstructured.UnstructuredList)
internalMetrics, err := metricsConvertV1beta1PodToInternalPod(metricsData.Items...)
if err != nil {
continue
}
podMetrics = append(podMetrics, internalMetrics...)
}
return podMetrics, nil
}
// queryNodeMetricsByName queries metrics by node name from target clusters
func (r *ResourceMetricsProvider) queryNodeMetricsByName(name string) ([]metrics.NodeMetrics, error) {
resourceQueryFunc := func(sci typedmanager.SingleClusterInformerManager, _ string) error {
nodeInterface, err := sci.Lister(PodsGVR)
if err != nil {
return err
}
lister := nodeInterface.(listv1.NodeLister)
_, err = lister.Get(name)
return err
}
metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) {
metricsValue, err := sci.GetClient().Resource(nodeMetricsGVR).Get(context.Background(), name, metav1.GetOptions{})
if err != nil {
return nil, err
}
util.MergeAnnotation(metricsValue, autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName)
return metricsValue, err
}
metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc)
if err != nil {
return nil, err
}
var nodeMetrics []metrics.NodeMetrics
for index := range metricsQuery {
internalMetrics, err := metricsConvertV1beta1NodeToInternalNode(*metricsQuery[index].(*unstructured.Unstructured))
if err != nil {
continue
}
nodeMetrics = append(nodeMetrics, internalMetrics...)
}
return nodeMetrics, nil
}
// queryNodeMetricsBySelector queries metrics by node selector from target clusters
func (r *ResourceMetricsProvider) queryNodeMetricsBySelector(selector string) ([]metrics.NodeMetrics, error) {
labelSelector, err := labels.Parse(selector)
if err != nil {
klog.Errorf("Failed to parse label selector: %v", err)
return nil, err
}
resourceQueryFunc := func(sci typedmanager.SingleClusterInformerManager, clusterName string) error {
nodeInterface, err := sci.Lister(NodesGVR)
if err != nil {
return err
}
lister := nodeInterface.(listv1.NodeLister)
nodes, err := lister.List(labelSelector)
if err != nil {
klog.Errorf("Failed to list pods in cluster(%s): %v", clusterName, err)
return err
}
if len(nodes) == 0 {
return errors.NewNotFound(PodsGVR.GroupResource(), "")
}
return nil
}
metricsQueryFunc := func(sci genericmanager.SingleClusterInformerManager, clusterName string) (interface{}, error) {
metricsList, err := sci.GetClient().Resource(nodeMetricsGVR).List(context.Background(), metav1.ListOptions{
LabelSelector: selector,
})
if err != nil {
return nil, err
}
for i := range metricsList.Items {
util.MergeAnnotation(&metricsList.Items[i], autoscalingv1alpha1.QuerySourceAnnotationKey, clusterName)
}
return metricsList, err
}
metricsQuery, err := r.getMetricsParallel(resourceQueryFunc, metricsQueryFunc)
if err != nil {
return nil, err
}
var nodeMetrics []metrics.NodeMetrics
for index := range metricsQuery {
metricsData := metricsQuery[index].(*unstructured.UnstructuredList)
internalMetrics, err := metricsConvertV1beta1NodeToInternalNode(metricsData.Items...)
if err != nil {
continue
}
nodeMetrics = append(nodeMetrics, internalMetrics...)
}
return nodeMetrics, nil
}
// GetPodMetrics queries metrics by the internal constructed pod
func (r *ResourceMetricsProvider) GetPodMetrics(pods ...*metav1.PartialObjectMetadata) ([]metrics.PodMetrics, error) {
var queryData []metrics.PodMetrics
if len(pods) == 0 {
return queryData, nil
}
var err error
// In the previous step, we construct the annotations, so it couldn't be nil
if _, ok := pods[0].Annotations[labelSelectorAnnotationInternal]; ok {
namespace := pods[0].Annotations[namespaceSpecifiedAnnotation]
selectorStr := pods[0].Annotations[labelSelectorAnnotationInternal]
queryData, err = r.queryPodMetricsBySelector(selectorStr, namespace)
} else {
queryData, err = r.queryPodMetricsByName(pods[0].Name, pods[0].Namespace)
}
if err != nil {
return nil, err
}
return queryData, nil
}
// GetNodeMetrics queries metrics by the internal constructed node
func (r *ResourceMetricsProvider) GetNodeMetrics(nodes ...*corev1.Node) ([]metrics.NodeMetrics, error) {
var queryData []metrics.NodeMetrics
if len(nodes) == 0 {
return queryData, nil
}
var err error
// In the previous step, we construct the annotations, so it couldn't be nil
if _, ok := nodes[0].Annotations[labelSelectorAnnotationInternal]; ok {
selectorStr := nodes[0].Annotations[labelSelectorAnnotationInternal]
queryData, err = r.queryNodeMetricsBySelector(selectorStr)
} else {
queryData, err = r.queryNodeMetricsByName(nodes[0].Name)
}
if err != nil {
return nil, err
}
return queryData, nil
}
// PodLister is an internal lister for pods
type PodLister struct {
namespaceSpecified string
clusterLister clusterlister.ClusterLister
informerManager typedmanager.MultiClusterInformerManager
}
// NewPodLister creates an internal new PodLister
func NewPodLister(clusterLister clusterlister.ClusterLister, informerManager typedmanager.MultiClusterInformerManager) *PodLister {
return &PodLister{
clusterLister: clusterLister,
informerManager: informerManager,
}
}
// List returns the internal constructed pod with label selector info
func (p *PodLister) List(selector labels.Selector) (ret []runtime.Object, err error) {
klog.V(4).Infof("List query pods with selector: %s", selector.String())
clusters, err := p.clusterLister.List(labels.Everything())
if err != nil {
return nil, err
}
for _, cluster := range clusters {
sci := p.informerManager.GetSingleClusterManager(cluster.Name)
if sci == nil {
klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name)
continue
}
lister, err := sci.Lister(PodsGVR)
if err != nil {
klog.Errorf("Failed to get lister for cluster(%s): %v", cluster.Name, err)
continue
}
podLister := lister.(listv1.PodLister)
pods, err := podLister.Pods(p.namespaceSpecified).List(selector)
if err != nil {
klog.Errorf("Failed to list pods from cluster(%s) in namespace(%s): %v", cluster.Name, p.namespaceSpecified, err)
return nil, err
}
for i := range pods {
podPartial := p.convertToPodPartialData(pods[i], selector.String(), true)
ret = append(ret, podPartial)
}
}
return ret, nil
}
// convertToPodPartialData converts pod to partial data
func (p *PodLister) convertToPodPartialData(pod *corev1.Pod, selector string, labelSelector bool) *metav1.PartialObjectMetadata {
ret := &metav1.PartialObjectMetadata{
TypeMeta: pod.TypeMeta,
ObjectMeta: pod.ObjectMeta,
}
if ret.Annotations == nil {
ret.Annotations = map[string]string{}
}
// If user sets this annotation, we need to remove it to avoid parsing wrong next.
if !labelSelector {
delete(ret.Annotations, namespaceSpecifiedAnnotation)
delete(ret.Annotations, labelSelectorAnnotationInternal)
return ret
}
ret.Annotations[labelSelectorAnnotationInternal] = selector
ret.Annotations[namespaceSpecifiedAnnotation] = p.namespaceSpecified
return ret
}
// Get returns the internal constructed pod with name info
func (p *PodLister) Get(name string) (runtime.Object, error) {
klog.V(4).Infof("Query pod in namespace(%s) with name:%s", p.namespaceSpecified, name)
clusters, err := p.clusterLister.List(labels.Everything())
if err != nil {
return nil, err
}
var podPartial *metav1.PartialObjectMetadata
for _, cluster := range clusters {
sci := p.informerManager.GetSingleClusterManager(cluster.Name)
if sci == nil {
klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name)
continue
}
sciLister, err := sci.Lister(PodsGVR)
if err != nil {
klog.Errorf("Failed to get lister for cluster(%s): %v", cluster.Name, err)
continue
}
podLister := sciLister.(listv1.PodLister)
pod, err := podLister.Pods(p.namespaceSpecified).Get(name)
if err != nil {
if !errors.IsNotFound(err) {
klog.Errorf("Failed to get pod from clsuster(%s) in namespace(%s): %v", cluster.Name, p.namespaceSpecified, err)
}
continue
}
if podPartial != nil {
err := fmt.Errorf("the pod(%s) found in more than one clusters", name)
return nil, errors.NewConflict(PodsGVR.GroupResource(), name, err)
}
podPartial = p.convertToPodPartialData(pod, "", false)
}
if podPartial != nil {
return podPartial, nil
}
return nil, errors.NewNotFound(PodsGVR.GroupResource(), name)
}
// ByNamespace returns the pod lister with namespace info
func (p *PodLister) ByNamespace(namespace string) cache.GenericNamespaceLister {
klog.V(4).Infof("Query Pods in namespace: %s", namespace)
listerCopy := &PodLister{
clusterLister: p.clusterLister,
informerManager: p.informerManager,
}
listerCopy.namespaceSpecified = namespace
return listerCopy
}
// NodeLister is an internal lister for nodes
type NodeLister struct {
clusterLister clusterlister.ClusterLister
informerManager typedmanager.MultiClusterInformerManager
}
// NewNodeLister creates an internal new NodeLister
func NewNodeLister(clusterLister clusterlister.ClusterLister, informerManager typedmanager.MultiClusterInformerManager) *NodeLister {
return &NodeLister{
clusterLister: clusterLister,
informerManager: informerManager,
}
}
// List returns the internal constructed node with label selector info
func (n *NodeLister) List(selector labels.Selector) (ret []*corev1.Node, err error) {
klog.V(4).Infof("Query node metrics with selector: %s", selector.String())
clusters, err := n.clusterLister.List(labels.Everything())
if err != nil {
return nil, err
}
for _, cluster := range clusters {
sci := n.informerManager.GetSingleClusterManager(cluster.Name)
if sci == nil {
klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name)
continue
}
nodeInterface, err := sci.Lister(NodesGVR)
if err != nil {
klog.Errorf("Failed to get lister for cluster(%s): %v", cluster.Name, err)
continue
}
nodes, err := nodeInterface.(listv1.NodeLister).List(selector)
if err != nil {
klog.Errorf("Failed to list nodes from cluster(%s): %v", cluster.Name, err)
return nil, err
}
for index := range nodes {
nodeTyped := nodes[index]
if nodeTyped.Annotations == nil {
nodeTyped.Annotations = map[string]string{}
}
// If user sets this annotation, we need to reset it.
nodeTyped.Annotations[labelSelectorAnnotationInternal] = selector.String()
ret = append(ret, nodeTyped)
}
}
return ret, nil
}
// Get returns the internal constructed node with name info
func (n *NodeLister) Get(name string) (*corev1.Node, error) {
klog.V(4).Infof("Query node metrics with name:%s", name)
clusters, err := n.clusterLister.List(labels.Everything())
if err != nil {
return nil, err
}
var nodeTyped *corev1.Node
for _, cluster := range clusters {
sci := n.informerManager.GetSingleClusterManager(cluster.Name)
if sci == nil {
klog.Errorf("Failed to get SingleClusterInformerManager for cluster(%s)", cluster.Name)
continue
}
sciLister, err := sci.Lister(NodesGVR)
if err != nil {
klog.Errorf("Failed to get lister for cluster(%s): %v", cluster.Name, err)
continue
}
node, err := sciLister.(listv1.NodeLister).Get(name)
if err != nil {
if !errors.IsNotFound(err) {
klog.Errorf("Failed to get node from cluster(%s):%v", cluster.Name, err)
}
continue
}
if nodeTyped != nil {
err := fmt.Errorf("the node(%s) found in more than one clusters", name)
return nil, errors.NewConflict(NodesGVR.GroupResource(), name, err)
}
nodeTyped = node
if nodeTyped.Annotations == nil {
nodeTyped.Annotations = map[string]string{}
}
// If user sets this annotation, we need to remove it to avoid parsing wrong next.
delete(nodeTyped.Annotations, labelSelectorAnnotationInternal)
}
if nodeTyped != nil {
return nodeTyped, nil
}
return nil, errors.NewNotFound(NodesGVR.GroupResource(), name)
}
// metricsConvertV1beta1PodToInternalPod converts metricsv1beta1.PodMetrics to metrics.PodMetrics
func metricsConvertV1beta1PodToInternalPod(objs ...unstructured.Unstructured) ([]metrics.PodMetrics, error) {
var podMetricsV1beta1 []metricsv1beta1.PodMetrics
for index := range objs {
single := metricsv1beta1.PodMetrics{}
if err := helper.ConvertToTypedObject(&objs[index], &single); err != nil {
klog.Errorf("Failed to convert to typed object: %v", err)
return nil, err
}
podMetricsV1beta1 = append(podMetricsV1beta1, single)
}
var podMetricsInternal []metrics.PodMetrics
for index := range podMetricsV1beta1 {
single := metrics.PodMetrics{}
if err := metricsv1beta1.Convert_v1beta1_PodMetrics_To_metrics_PodMetrics(&podMetricsV1beta1[index], &single, nil); err != nil {
klog.Errorf("Failed to convert to typed object: %v", err)
return nil, err
}
podMetricsInternal = append(podMetricsInternal, single)
}
return podMetricsInternal, nil
}
// metricsConvertV1beta1NodeToInternalNode converts metricsv1beta1.NodeMetrics to metrics.NodeMetrics
func metricsConvertV1beta1NodeToInternalNode(objs ...unstructured.Unstructured) ([]metrics.NodeMetrics, error) {
var nodeMetricsV1beta1 []metricsv1beta1.NodeMetrics
for index := range objs {
single := metricsv1beta1.NodeMetrics{}
if err := helper.ConvertToTypedObject(&objs[index], &single); err != nil {
klog.Errorf("Failed to convert to typed object: %v", err)
return nil, err
}
nodeMetricsV1beta1 = append(nodeMetricsV1beta1, single)
}
var nodeMetricsInternal []metrics.NodeMetrics
for index := range nodeMetricsV1beta1 {
single := metrics.NodeMetrics{}
if err := metricsv1beta1.Convert_v1beta1_NodeMetrics_To_metrics_NodeMetrics(&nodeMetricsV1beta1[index], &single, nil); err != nil {
klog.Errorf("Failed to convert to typed object: %v", err)
return nil, err
}
nodeMetricsInternal = append(nodeMetricsInternal, single)
}
return nodeMetricsInternal, nil
}