package search import ( "fmt" "reflect" "strings" "sync" "time" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" searchv1alpha1 "github.com/karmada-io/karmada/pkg/apis/search/v1alpha1" informerfactory "github.com/karmada-io/karmada/pkg/generated/informers/externalversions" clusterlister "github.com/karmada-io/karmada/pkg/generated/listers/cluster/v1alpha1" "github.com/karmada-io/karmada/pkg/search/backendstore" "github.com/karmada-io/karmada/pkg/util" "github.com/karmada-io/karmada/pkg/util/fedinformer" "github.com/karmada-io/karmada/pkg/util/fedinformer/genericmanager" "github.com/karmada-io/karmada/pkg/util/gclient" "github.com/karmada-io/karmada/pkg/util/restmapper" ) type registrySet map[string]struct{} type clusterRegistry struct { registries registrySet resources map[schema.GroupVersionResource]registrySet } func (c *clusterRegistry) unregistry() bool { return len(c.registries) == 0 } // Controller ResourceRegistry controller type Controller struct { restConfig *rest.Config restMapper meta.RESTMapper informerFactory informerfactory.SharedInformerFactory clusterLister clusterlister.ClusterLister queue workqueue.RateLimitingInterface clusterRegistry sync.Map InformerManager genericmanager.MultiClusterInformerManager } // NewController returns a new ResourceRegistry controller func NewController(restConfig *rest.Config, factory informerfactory.SharedInformerFactory, restMapper meta.RESTMapper) (*Controller, error) { clusterLister := factory.Cluster().V1alpha1().Clusters().Lister() queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) c := &Controller{ restConfig: restConfig, informerFactory: factory, clusterLister: clusterLister, queue: queue, restMapper: restMapper, InformerManager: genericmanager.GetInstance(), } c.addAllEventHandlers() // TODO: leader election and full sync cs, err := kubernetes.NewForConfig(restConfig) if err != nil { return nil, err } backendstore.Init(cs) return c, nil } // addAllEventHandlers adds all event handlers to the informer func (c *Controller) addAllEventHandlers() { clusterInformer := c.informerFactory.Cluster().V1alpha1().Clusters().Informer() _, err := clusterInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addCluster, UpdateFunc: c.updateCluster, DeleteFunc: c.deleteCluster, }) if err != nil { klog.Errorf("Failed to add handlers for Clusters: %v", err) } resourceRegistryInformer := c.informerFactory.Search().V1alpha1().ResourceRegistries().Informer() _, err = resourceRegistryInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: c.addResourceRegistry, UpdateFunc: c.updateResourceRegistry, DeleteFunc: c.deleteResourceRegistry, }) if err != nil { klog.Errorf("Failed to add handlers for Clusters: %v", err) } // ignore the error here because the informers haven't been started _ = clusterInformer.SetTransform(fedinformer.StripUnusedFields) _ = resourceRegistryInformer.SetTransform(fedinformer.StripUnusedFields) } // Start the controller func (c *Controller) Start(stopCh <-chan struct{}) { klog.Infof("Starting karmada search controller") defer runtime.HandleCrash() c.informerFactory.WaitForCacheSync(stopCh) go wait.Until(c.worker, time.Second, stopCh) go func() { <-stopCh genericmanager.StopInstance() klog.Infof("Shutting down karmada search controller") }() } // worker processes the queue of resourceRegistry objects. func (c *Controller) worker() { for c.cacheNext() { } } // cacheNext processes the next cluster object in the queue. func (c *Controller) cacheNext() bool { // Wait until there is a new item in the working queue key, shutdown := c.queue.Get() if shutdown { klog.Errorf("Fail to pop item from queue") return false } // Tell the queue that we are done with processing this key. This unblocks the key for other workers // This allows safe parallel processing because two pods with the same key are never processed in // parallel. defer c.queue.Done(key) err := c.doCacheCluster(key.(string)) // Handle the error if something went wrong during the execution of the business logic c.handleErr(err, key) return true } // handleErr checks if an error happened and makes sure we will retry later. func (c *Controller) handleErr(err error, key interface{}) { if err == nil { c.queue.Forget(key) return } klog.Errorf("Error cache member cluster %v, %v", key, err) c.queue.AddRateLimited(key) } func (c *Controller) getClusterMatchedRegistries(cluster *clusterv1alpha1.Cluster) (indexedByName, matched map[string]*searchv1alpha1.ResourceRegistry, err error) { client := c.informerFactory.Search().V1alpha1().ResourceRegistries().Lister() var registries []*searchv1alpha1.ResourceRegistry if registries, err = client.List(labels.Everything()); err != nil { klog.Errorf("List resource registries for reconciling cluster failed, err: %s", err) return } if len(registries) == 0 { klog.Infof("No resource registries, no need to reconcile cluster") return } indexedByName = make(map[string]*searchv1alpha1.ResourceRegistry, len(registries)) matched = make(map[string]*searchv1alpha1.ResourceRegistry, len(registries)) for i, registry := range registries { indexedByName[registry.Name] = registries[i] if util.ClusterMatches(cluster, registry.Spec.TargetCluster) { matched[registry.Name] = registries[i] } } return } func (c *Controller) getRegistryAddedResources(registry *searchv1alpha1.ResourceRegistry, cr *clusterRegistry, added []schema.GroupVersionResource) []schema.GroupVersionResource { resourcesToWatch := c.getResources(registry.Spec.ResourceSelectors) for _, resource := range resourcesToWatch { if resourcesOnWatching, exist := cr.resources[resource]; exist { resourcesOnWatching[registry.Name] = struct{}{} } else { added = append(added, resource) cr.resources[resource] = registrySet{registry.Name: struct{}{}} } } return added } func (c *Controller) getClusterRegistriesModification(registries map[string]*searchv1alpha1.ResourceRegistry, matched map[string]*searchv1alpha1.ResourceRegistry, cr *clusterRegistry, added []string, removed []string) (updated *clusterRegistry, addedResources []schema.GroupVersionResource, removedResources []schema.GroupVersionResource) { defer func() { updated = cr }() if cr == nil { cr = &clusterRegistry{ resources: make(map[schema.GroupVersionResource]registrySet), registries: make(registrySet)} } for _, name := range removed { delete(cr.registries, name) for resource, previousRegistries := range cr.resources { delete(previousRegistries, name) if len(previousRegistries) == 0 { removedResources = append(removedResources, resource) } } for _, resource := range removedResources { delete(cr.resources, resource) } } for _, name := range added { cr.registries[name] = struct{}{} addedResources = c.getRegistryAddedResources(registries[name], cr, addedResources) } for _, registry := range matched { addedResources = c.getRegistryAddedResources(registry, cr, addedResources) } return } func (c *Controller) findOpenSearchBackend(matchedRegistries map[string]*searchv1alpha1.ResourceRegistry) *searchv1alpha1.BackendStoreConfig { for _, registry := range matchedRegistries { if backend := registry.Spec.BackendStore; backend != nil && backend.OpenSearch != nil { // one cluster may be related to multi registries, // however only one backend could keep in memory with one cluster return backend } } return nil } func (c *Controller) clusterAbleToCache(cluster string) (cls *clusterv1alpha1.Cluster, able bool, err error) { cls, err = c.clusterLister.Get(cluster) if err != nil { if apierrors.IsNotFound(err) { klog.Infof("Try to stop cluster informer %s", cluster) c.InformerManager.Stop(cluster) return } return } if !cls.DeletionTimestamp.IsZero() { klog.Infof("Try to stop cluster informer %s", cluster) c.InformerManager.Stop(cluster) return } if !util.IsClusterReady(&cls.Status) { klog.Warningf("cluster %s is notReady try to stop this cluster informer", cluster) c.InformerManager.Stop(cluster) return } able = true return } func (c *Controller) reconcileClusterWithRegistries(cls *clusterv1alpha1.Cluster) (matchedRegistries map[string]*searchv1alpha1.ResourceRegistry, cr *clusterRegistry, resourcesChanged, newRegistry bool, err error) { cluster := cls.Name var allRegistries map[string]*searchv1alpha1.ResourceRegistry allRegistries, matchedRegistries, err = c.getClusterMatchedRegistries(cls) if err != nil { klog.Errorf("Get cluster matched resource registries failed, cluster: %s, error: %s", cluster, err) return } klog.V(4).Infof("Registries matched with cluster, cluster: %s, registries: %s", cluster, strings.Join(util.Keys(matchedRegistries), ", ")) v, hasExistingRegistries := c.clusterRegistry.Load(cluster) var addedRegistries, removedRegistries []string if !hasExistingRegistries { klog.Infof("Cluster %s has no existing registries", cluster) addedRegistries, removedRegistries = util.DiffKey(registrySet(nil), matchedRegistries) } else { crv := v.(clusterRegistry) cr = &crv addedRegistries, removedRegistries = util.DiffKey(cr.registries, matchedRegistries) } if len(addedRegistries) > 0 { klog.V(4).Infof("New registries added to cluster, cluster: %s, registries: %s", cluster, strings.Join(addedRegistries, ", ")) } else { klog.V(4).Infof("No registries added to cluster, cluster: %s", cluster) } if len(removedRegistries) > 0 { klog.V(4).Infof("Old registries removed from cluster, cluster: %s, registries: %s", cluster, strings.Join(removedRegistries, ", ")) } else { klog.V(4).Infof("No registries removed from cluster, cluster: %s", cluster) } var addedResources, removedResources []schema.GroupVersionResource cr, addedResources, removedResources = c.getClusterRegistriesModification(allRegistries, matchedRegistries, cr, addedRegistries, removedRegistries) currentWatchingResources := util.Keys(cr.resources) resourcesString := util.StringerJoin(currentWatchingResources, ", ") if len(addedResources) > 0 { klog.V(4).Infof("New watching resources added to cluster, cluster: %s, resources: %s", cluster, util.StringerJoin(addedResources, ", ")) } else { klog.V(4).Infof("No watching resources added to cluster, cluster: %s, resources: %s", cluster, resourcesString) } if len(removedResources) > 0 { klog.V(4).Infof("Resources watching removed from cluster, cluster: %s, resources: %s", cluster, util.StringerJoin(removedResources, ", ")) } else { klog.V(4).Infof("No resources watching removed from cluster, cluster: %s, resources: %s", cluster, resourcesString) } resourcesChanged = len(addedResources) > 0 || len(removedResources) > 0 newRegistry = !hasExistingRegistries return } func (c *Controller) getRegistryBackendHandler(cluster string, matchedRegistries map[string]*searchv1alpha1.ResourceRegistry) (cache.ResourceEventHandler, error) { backend := backendstore.GetBackend(cluster) if backend == nil { backendConfig := c.findOpenSearchBackend(matchedRegistries) backendstore.AddBackend(cluster, backendConfig) backend = backendstore.GetBackend(cluster) } if backend == nil { return nil, fmt.Errorf("failed to get backend store for cluster %s", cluster) } handler := backend.ResourceEventHandlerFuncs() if handler == nil { return nil, fmt.Errorf("failed to get resource event handler for cluster %s", cluster) } return handler, nil } // doCacheCluster processes the resourceRegistry object // TODO: update status func (c *Controller) doCacheCluster(cluster string) error { // STEP0: stop informer manager for the cluster which does not exist anymore or is not ready. cls, able, err := c.clusterAbleToCache(cluster) if err != nil || !able { return err } matchedRegistries, cr, resourcesChanged, newRegistry, err := c.reconcileClusterWithRegistries(cls) if err != nil { return err } // STEP1: stop informer manager for the cluster which is not referenced by any `SearchRegistry` object. if cr.unregistry() { klog.Infof("Try to stop cluster informer %s", cluster) c.InformerManager.Stop(cluster) return nil } if resourcesChanged { klog.Infof("Create new informer for cluster %s", cluster) c.InformerManager.Stop(cluster) } if newRegistry { c.clusterRegistry.Store(cluster, *cr) } handler, err := c.getRegistryBackendHandler(cluster, matchedRegistries) if err != nil { return err } var newInformerCreated bool // STEP2: added/updated cluster, builds an informer manager for a specific cluster. if !c.InformerManager.IsManagerExist(cluster) { klog.Info("Try to build informer manager for cluster ", cluster) controlPlaneClient := gclient.NewForConfigOrDie(c.restConfig) clusterDynamicClient, err := util.NewClusterDynamicClientSet(cluster, controlPlaneClient) if err != nil { return err } _ = c.InformerManager.ForCluster(cluster, clusterDynamicClient.DynamicClientSet, 0) newInformerCreated = true } if !newInformerCreated && !resourcesChanged { return nil } sci := c.InformerManager.GetSingleClusterManager(cluster) for gvr := range cr.resources { klog.Infof("Add informer for %s, %v", cluster, gvr) sci.ForResource(gvr, handler) } klog.Infof("Start informer for %s", cluster) sci.Start() _ = sci.WaitForCacheSync() klog.Infof("Start informer for %s done", cluster) return nil } // addResourceRegistry parse the resourceRegistry object and add Cluster to the queue func (c *Controller) addResourceRegistry(obj interface{}) { rr := obj.(*searchv1alpha1.ResourceRegistry) for _, cluster := range c.getClusters(rr.Spec.TargetCluster) { c.queue.Add(cluster) } } // updateResourceRegistry parse the resourceRegistry object and add (added/deleted) Cluster to the queue func (c *Controller) updateResourceRegistry(oldObj, newObj interface{}) { oldRR := oldObj.(*searchv1alpha1.ResourceRegistry) newRR := newObj.(*searchv1alpha1.ResourceRegistry) if reflect.DeepEqual(oldRR.Spec, newRR.Spec) { klog.V(4).Infof("Ignore ResourceRegistry(%s) update event as spec not changed", newRR.Name) return } clusterSet := sets.New[string]() for _, registry := range []*searchv1alpha1.ResourceRegistry{newRR, oldRR} { clusters := c.getClusters(registry.Spec.TargetCluster) clusterSet.Insert(clusters...) } for _, cluster := range clusterSet.UnsortedList() { c.queue.Add(cluster) } } // deleteResourceRegistry parse the resourceRegistry object and add deleted Cluster to the queue func (c *Controller) deleteResourceRegistry(obj interface{}) { rr, isRR := obj.(*searchv1alpha1.ResourceRegistry) // We can get DeletedFinalStateUnknown instead of *searchv1alpha1.ResourceRegistry here and // we need to handle that correctly. if !isRR { deletedState, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { klog.ErrorS(nil, "Received unpexpected object", "object", obj) return } rr, ok = deletedState.Obj.(*searchv1alpha1.ResourceRegistry) if !ok { klog.ErrorS(nil, "DeletedFinalStateUnknown contained non-ResourceRegistry object", "object", deletedState.Obj) return } } for _, cluster := range c.getClusters(rr.Spec.TargetCluster) { c.queue.Add(cluster) } } // addCluster adds a cluster object to the queue if needed func (c *Controller) addCluster(obj interface{}) { cluster := obj.(*clusterv1alpha1.Cluster) c.queue.Add(cluster.GetName()) } // updateCluster rebuild informer if Cluster.Spec is changed func (c *Controller) updateCluster(oldObj, curObj interface{}) { curCluster := curObj.(*clusterv1alpha1.Cluster) oldCluster := oldObj.(*clusterv1alpha1.Cluster) if curCluster.ResourceVersion == oldCluster.ResourceVersion { // no change, do nothing. return } if curCluster.DeletionTimestamp != nil { // cluster is being deleted. c.queue.Add(curCluster.GetName()) } if !reflect.DeepEqual(curCluster.Spec, oldCluster.Spec) || !reflect.DeepEqual(curCluster.Labels, oldCluster.Labels) { c.queue.Add(curCluster.GetName()) } } // deleteCluster set cluster to not exists func (c *Controller) deleteCluster(obj interface{}) { cluster, isCluster := obj.(*clusterv1alpha1.Cluster) // We can get DeletedFinalStateUnknown instead of *clusterV1alpha1.Cluster here and // we need to handle that correctly. if !isCluster { deletedState, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { klog.ErrorS(nil, "Received unexpected object", "object", obj) return } cluster, ok = deletedState.Obj.(*clusterv1alpha1.Cluster) if !ok { klog.ErrorS(nil, "DeletedFinalStateUnknown contained non-Cluster object", "object", deletedState.Obj) return } } _, ok := c.clusterRegistry.Load(cluster.GetName()) if !ok { // unregistered cluster, do nothing. return } // remove backend store backendstore.DeleteBackend(cluster.GetName()) c.queue.Add(cluster.GetName()) } // getClusters returns the cluster from the resourceRegistry object func (c *Controller) getClusters(affinity policyv1alpha1.ClusterAffinity) []string { clusters := make([]string, 0) lst, err := c.clusterLister.List(labels.Everything()) if err != nil { klog.Errorf("Failed to list clusters: %v", err) return clusters } for _, cls := range lst { if util.ClusterMatches(cls, affinity) { clusters = append(clusters, cls.GetName()) } } return clusters } // getResources returns the resources from the resourceRegistry object func (c *Controller) getResources(selectors []searchv1alpha1.ResourceSelector) []schema.GroupVersionResource { resources := make([]schema.GroupVersionResource, 0) for _, rs := range selectors { gvr, err := restmapper.GetGroupVersionResource( c.restMapper, schema.FromAPIVersionAndKind(rs.APIVersion, rs.Kind), ) if err != nil { klog.Errorf("Failed to get gvr: %v", err) continue } resources = append(resources, gvr) } return resources }