Merge pull request #4737 from XiShanYongYe-Chang/fix-eps-remain-when-controller-restart

Fix eps remain when karmada-controller-manager restart
This commit is contained in:
karmada-bot 2024-03-27 16:00:55 +08:00 committed by GitHub
commit db15f17b8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 161 additions and 38 deletions

View File

@ -29,6 +29,7 @@ import (
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"
@ -63,12 +64,18 @@ func (c *EndpointSliceController) Reconcile(ctx context.Context, req controllerr
return controllerruntime.Result{}, nil
}
}
return controllerruntime.Result{Requeue: true}, err
return controllerruntime.Result{}, err
}
if !work.DeletionTimestamp.IsZero() {
return controllerruntime.Result{}, nil
err := helper.DeleteEndpointSlice(c.Client, labels.Set{
workv1alpha1.WorkNamespaceLabel: req.Namespace,
workv1alpha1.WorkNameLabel: req.Name,
})
if err != nil {
return controllerruntime.Result{}, err
}
return controllerruntime.Result{}, c.removeFinalizer(work.DeepCopy())
}
// TBD: The work is managed by service-export-controller and endpointslice-collect-controller now,
@ -79,10 +86,20 @@ func (c *EndpointSliceController) Reconcile(ctx context.Context, req controllerr
workv1alpha1.WorkNamespaceLabel: req.Namespace,
workv1alpha1.WorkNameLabel: req.Name,
})
return controllerruntime.Result{}, err
if err != nil {
return controllerruntime.Result{}, err
}
return controllerruntime.Result{}, c.removeFinalizer(work.DeepCopy())
}
return c.collectEndpointSliceFromWork(work)
return controllerruntime.Result{}, c.collectEndpointSliceFromWork(work)
}
func (c *EndpointSliceController) removeFinalizer(work *workv1alpha1.Work) error {
if !controllerutil.RemoveFinalizer(work, util.EndpointSliceControllerFinalizer) {
return nil
}
return c.Client.Update(context.TODO(), work)
}
// SetupWithManager creates a controller and register to controller manager.
@ -94,8 +111,8 @@ func (c *EndpointSliceController) SetupWithManager(mgr controllerruntime.Manager
UpdateFunc: func(updateEvent event.UpdateEvent) bool {
// TBD: We care about the work with label util.MultiClusterServiceNameLabel because the work is
// managed by service-export-controller and endpointslice-collect-controller now, We should delete this after the conflict is fixed.
return (util.GetLabelValue(updateEvent.ObjectNew.GetLabels(), util.ServiceNameLabel) != "" ||
util.GetLabelValue(updateEvent.ObjectNew.GetLabels(), util.MultiClusterServiceNameLabel) != "")
return util.GetLabelValue(updateEvent.ObjectNew.GetLabels(), util.ServiceNameLabel) != "" ||
util.GetLabelValue(updateEvent.ObjectNew.GetLabels(), util.MultiClusterServiceNameLabel) != ""
},
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
return util.GetLabelValue(deleteEvent.Object.GetLabels(), util.ServiceNameLabel) != ""
@ -107,25 +124,25 @@ func (c *EndpointSliceController) SetupWithManager(mgr controllerruntime.Manager
return controllerruntime.NewControllerManagedBy(mgr).For(&workv1alpha1.Work{}, builder.WithPredicates(serviceImportPredicateFun)).Complete(c)
}
func (c *EndpointSliceController) collectEndpointSliceFromWork(work *workv1alpha1.Work) (controllerruntime.Result, error) {
func (c *EndpointSliceController) collectEndpointSliceFromWork(work *workv1alpha1.Work) error {
clusterName, err := names.GetClusterName(work.Namespace)
if err != nil {
klog.Errorf("Failed to get cluster name for work %s/%s", work.Namespace, work.Name)
return controllerruntime.Result{Requeue: true}, err
return err
}
for _, manifest := range work.Spec.Workload.Manifests {
unstructObj := &unstructured.Unstructured{}
if err := unstructObj.UnmarshalJSON(manifest.Raw); err != nil {
klog.Errorf("Failed to unmarshal workload, error is: %v", err)
return controllerruntime.Result{Requeue: true}, err
return err
}
endpointSlice := &discoveryv1.EndpointSlice{}
err = helper.ConvertToTypedObject(unstructObj, endpointSlice)
if err != nil {
klog.Errorf("Failed to convert unstructured to typed object: %v", err)
return controllerruntime.Result{Requeue: true}, err
return err
}
desiredEndpointSlice := deriveEndpointSlice(endpointSlice, clusterName)
@ -137,11 +154,11 @@ func (c *EndpointSliceController) collectEndpointSliceFromWork(work *workv1alpha
}
if err = helper.CreateOrUpdateEndpointSlice(c.Client, desiredEndpointSlice); err != nil {
return controllerruntime.Result{Requeue: true}, err
return err
}
}
return controllerruntime.Result{}, nil
return nil
}
func deriveEndpointSlice(original *discoveryv1.EndpointSlice, migratedFrom string) *discoveryv1.EndpointSlice {

View File

@ -22,6 +22,7 @@ import (
"reflect"
"strings"
"sync"
"time"
discoveryv1 "k8s.io/api/discovery/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
@ -34,6 +35,7 @@ import (
"k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
@ -66,19 +68,21 @@ type ServiceExportController struct {
WorkerNumber int // WorkerNumber is the number of worker goroutines
PredicateFunc predicate.Predicate // PredicateFunc is the function that filters events before enqueuing the keys.
ClusterDynamicClientSetFunc func(clusterName string, client client.Client) (*util.DynamicClusterClient, error)
ClusterCacheSyncTimeout metav1.Duration
// eventHandlers holds the handlers which used to handle events reported from member clusters.
// Each handler takes the cluster name as key and takes the handler function as the value, e.g.
// "member1": instance of ResourceEventHandler
eventHandlers sync.Map
worker util.AsyncWorker // worker process resources periodic from rateLimitingQueue.
ClusterCacheSyncTimeout metav1.Duration
// worker process resources periodic from rateLimitingQueue.
worker util.AsyncWorker
}
var (
serviceExportGVR = mcsv1alpha1.SchemeGroupVersion.WithResource("serviceexports")
serviceExportGVK = mcsv1alpha1.SchemeGroupVersion.WithKind("ServiceExport")
serviceExportGVK = mcsv1alpha1.SchemeGroupVersion.WithKind(util.ServiceExportKind)
endpointSliceGVR = discoveryv1.SchemeGroupVersion.WithResource("endpointslices")
endpointSliceGVK = discoveryv1.SchemeGroupVersion.WithKind(util.EndpointSliceKind)
)
// Reconcile performs a full reconciliation for the object referred to by the Request.
@ -90,7 +94,7 @@ func (c *ServiceExportController) Reconcile(ctx context.Context, req controllerr
if apierrors.IsNotFound(err) {
return controllerruntime.Result{}, nil
}
return controllerruntime.Result{Requeue: true}, err
return controllerruntime.Result{}, err
}
if !work.DeletionTimestamp.IsZero() {
@ -108,21 +112,21 @@ func (c *ServiceExportController) Reconcile(ctx context.Context, req controllerr
clusterName, err := names.GetClusterName(work.Namespace)
if err != nil {
klog.Errorf("Failed to get member cluster name for work %s/%s", work.Namespace, work.Name)
return controllerruntime.Result{Requeue: true}, err
return controllerruntime.Result{}, err
}
cluster, err := util.GetCluster(c.Client, clusterName)
if err != nil {
klog.Errorf("Failed to get the given member cluster %s", clusterName)
return controllerruntime.Result{Requeue: true}, err
return controllerruntime.Result{}, err
}
if !util.IsClusterReady(&cluster.Status) {
klog.Errorf("Stop sync work(%s/%s) for cluster(%s) as cluster not ready.", work.Namespace, work.Name, cluster.Name)
return controllerruntime.Result{Requeue: true}, fmt.Errorf("cluster(%s) not ready", cluster.Name)
return controllerruntime.Result{}, fmt.Errorf("cluster(%s) not ready", cluster.Name)
}
return c.buildResourceInformers(cluster)
return controllerruntime.Result{}, c.buildResourceInformers(cluster)
}
// SetupWithManager creates a controller and register to controller manager.
@ -139,6 +143,52 @@ func (c *ServiceExportController) RunWorkQueue() {
}
c.worker = util.NewAsyncWorker(workerOptions)
c.worker.Run(c.WorkerNumber, c.StopChan)
go c.enqueueReportedEpsServiceExport()
}
func (c *ServiceExportController) enqueueReportedEpsServiceExport() {
workList := &workv1alpha1.WorkList{}
err := wait.PollUntil(1*time.Second, func() (done bool, err error) {
err = c.List(context.TODO(), workList, client.MatchingLabels{util.PropagationInstruction: util.PropagationInstructionSuppressed})
if err != nil {
klog.Errorf("Failed to list collected EndpointSlices Work from member clusters: %v", err)
return false, nil
}
return true, nil
}, context.TODO().Done())
if err != nil {
return
}
for index := range workList.Items {
work := workList.Items[index]
if !helper.IsWorkContains(work.Spec.Workload.Manifests, endpointSliceGVK) {
continue
}
managedByStr := work.Labels[util.EndpointSliceWorkManagedByLabel]
if !strings.Contains(managedByStr, serviceExportGVK.Kind) {
continue
}
clusterName, err := names.GetClusterName(work.GetNamespace())
if err != nil {
continue
}
key := keys.FederatedKey{
Cluster: clusterName,
ClusterWideKey: keys.ClusterWideKey{
Group: serviceExportGVK.Group,
Version: serviceExportGVK.Version,
Kind: serviceExportGVK.Kind,
Namespace: work.Labels[util.ServiceNamespaceLabel],
Name: work.Labels[util.ServiceNameLabel],
},
}
c.worker.Add(key)
}
}
func (c *ServiceExportController) syncServiceExportOrEndpointSlice(key util.QueueKey) error {
@ -148,7 +198,7 @@ func (c *ServiceExportController) syncServiceExportOrEndpointSlice(key util.Queu
return fmt.Errorf("invalid key")
}
klog.V(4).Infof("Begin to sync %s %s.", fedKey.Kind, fedKey.NamespaceKey())
klog.V(4).Infof("Begin to sync %s", fedKey)
switch fedKey.Kind {
case util.ServiceExportKind:
@ -168,13 +218,13 @@ func (c *ServiceExportController) syncServiceExportOrEndpointSlice(key util.Queu
return nil
}
func (c *ServiceExportController) buildResourceInformers(cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
func (c *ServiceExportController) buildResourceInformers(cluster *clusterv1alpha1.Cluster) error {
err := c.registerInformersAndStart(cluster)
if err != nil {
klog.Errorf("Failed to register informer for Cluster %s. Error: %v.", cluster.Name, err)
return controllerruntime.Result{Requeue: true}, err
return err
}
return controllerruntime.Result{}, nil
return nil
}
// registerInformersAndStart builds informer manager for cluster if it doesn't exist, then constructs informers for gvr
@ -351,6 +401,11 @@ func (c *ServiceExportController) reportEndpointSliceWithServiceExportCreate(ser
return err
}
err = c.removeOrphanWork(endpointSliceObjects, serviceExportKey.Cluster)
if err != nil {
return err
}
for index := range endpointSliceObjects {
if err = reportEndpointSlice(c.Client, endpointSliceObjects[index].(*unstructured.Unstructured), serviceExportKey.Cluster); err != nil {
errs = append(errs, err)
@ -359,6 +414,49 @@ func (c *ServiceExportController) reportEndpointSliceWithServiceExportCreate(ser
return utilerrors.NewAggregate(errs)
}
func (c *ServiceExportController) removeOrphanWork(endpointSliceObjects []runtime.Object, targetCluster string) error {
willReportWorks := sets.NewString()
for index := range endpointSliceObjects {
endpointSlice := endpointSliceObjects[index].(*unstructured.Unstructured)
workName := names.GenerateWorkName(endpointSlice.GetKind(), endpointSlice.GetName(), endpointSlice.GetNamespace())
willReportWorks.Insert(workName)
}
collectedEpsWorkList := &workv1alpha1.WorkList{}
if err := c.List(context.TODO(), collectedEpsWorkList, &client.ListOptions{
Namespace: names.GenerateExecutionSpaceName(targetCluster),
LabelSelector: labels.SelectorFromSet(labels.Set{
util.PropagationInstruction: util.PropagationInstructionSuppressed,
}),
}); err != nil {
klog.Errorf("Failed to list suppressed work list under namespace %s: %v", names.GenerateExecutionSpaceName(targetCluster), err)
return err
}
var errs []error
for index := range collectedEpsWorkList.Items {
work := collectedEpsWorkList.Items[index]
if !helper.IsWorkContains(work.Spec.Workload.Manifests, endpointSliceGVK) {
continue
}
managedByStr := work.Labels[util.EndpointSliceWorkManagedByLabel]
if !strings.Contains(managedByStr, serviceExportGVK.Kind) {
continue
}
if willReportWorks.Has(work.Name) {
continue
}
err := cleanEndpointSliceWork(c.Client, &work)
if err != nil {
errs = append(errs, err)
}
}
return utilerrors.NewAggregate(errs)
}
// reportEndpointSliceWithEndpointSliceCreateOrUpdate reports the EndpointSlice when referencing service has been exported.
func (c *ServiceExportController) reportEndpointSliceWithEndpointSliceCreateOrUpdate(clusterName string, endpointSlice *unstructured.Unstructured) error {
relatedServiceName := endpointSlice.GetLabels()[discoveryv1.LabelServiceName]
@ -410,27 +508,31 @@ func getEndpointSliceWorkMeta(c client.Client, ns string, workName string, endpo
return metav1.ObjectMeta{}, err
}
labels := map[string]string{
util.ServiceNamespaceLabel: endpointSlice.GetNamespace(),
util.ServiceNameLabel: endpointSlice.GetLabels()[discoveryv1.LabelServiceName],
// indicate the Work should be not propagated since it's collected resource.
util.PropagationInstruction: util.PropagationInstructionSuppressed,
util.ManagedByKarmadaLabel: util.ManagedByKarmadaLabelValue,
util.EndpointSliceWorkManagedByLabel: util.ServiceExportKind,
workMeta := metav1.ObjectMeta{
Name: workName,
Namespace: ns,
Finalizers: []string{util.EndpointSliceControllerFinalizer},
Labels: map[string]string{
util.ServiceNamespaceLabel: endpointSlice.GetNamespace(),
util.ServiceNameLabel: endpointSlice.GetLabels()[discoveryv1.LabelServiceName],
// indicate the Work should be not propagated since it's collected resource.
util.PropagationInstruction: util.PropagationInstructionSuppressed,
util.ManagedByKarmadaLabel: util.ManagedByKarmadaLabelValue,
util.EndpointSliceWorkManagedByLabel: util.ServiceExportKind,
},
}
if existWork.Labels == nil || (err != nil && apierrors.IsNotFound(err)) {
workMeta := metav1.ObjectMeta{Name: workName, Namespace: ns, Labels: labels}
return workMeta, nil
}
labels = util.DedupeAndMergeLabels(labels, existWork.Labels)
workMeta.Labels = util.DedupeAndMergeLabels(workMeta.Labels, existWork.Labels)
if value, ok := existWork.Labels[util.EndpointSliceWorkManagedByLabel]; ok {
controllerSet := sets.New[string]()
controllerSet.Insert(strings.Split(value, ".")...)
controllerSet.Insert(util.ServiceExportKind)
labels[util.EndpointSliceWorkManagedByLabel] = strings.Join(controllerSet.UnsortedList(), ".")
workMeta.Labels[util.EndpointSliceWorkManagedByLabel] = strings.Join(controllerSet.UnsortedList(), ".")
}
workMeta := metav1.ObjectMeta{Name: workName, Namespace: ns, Labels: labels}
return workMeta, nil
}

View File

@ -98,7 +98,7 @@ const (
// The overrides items should be sorted alphabetically in ascending order by ClusterOverridePolicy's name.
AppliedClusterOverrides = "policy.karmada.io/applied-cluster-overrides"
// EndPointSliceProvisionClusterAnnotation is added to work of the dispatch EndpointSlice in consumption clusters's namespace.
// EndpointSliceProvisionClusterAnnotation is added to work of the dispatch EndpointSlice in consumption clusters' namespace.
EndpointSliceProvisionClusterAnnotation = "endpointslice.karmada.io/provision-cluster"
)
@ -116,6 +116,10 @@ const (
// before ResourceBinding itself is deleted.
BindingControllerFinalizer = "karmada.io/binding-controller"
// EndpointSliceControllerFinalizer is added to Work, which holds EndpointSlice collected from member clusters,
// to ensure related EndpointSlices are deleted before Work itself is deleted.
EndpointSliceControllerFinalizer = "karmada.io/endpointslice-controller"
// MCSEndpointSliceCollectControllerFinalizer is added to mcs to ensure related Works in provider clusters are deleted
MCSEndpointSliceCollectControllerFinalizer = "karmada.io/mcs-endpointslice-collect-controller"