From d06fe2b5b371d519419ba40ccda25c0db9892d73 Mon Sep 17 00:00:00 2001 From: Poor12 Date: Mon, 24 Apr 2023 19:35:11 +0800 Subject: [PATCH] add application failover controller Signed-off-by: Poor12 --- .../app/controllermanager.go | 71 +++-- .../work/v1alpha2/binding_types_helper.go | 85 +++++- .../v1alpha2/binding_types_helper_test.go | 51 +++- .../work/v1alpha2/well_known_constants.go | 4 + .../work/v1alpha2/zz_generated.deepcopy.go | 26 ++ pkg/controllers/applicationfailover/common.go | 95 +++++++ .../applicationfailover/common_test.go | 92 +++++++ .../crb_application_failover_controller.go | 255 +++++++++++++++++ .../rb_application_failover_controller.go | 260 ++++++++++++++++++ pkg/controllers/cluster/taint_manager.go | 4 +- pkg/generated/openapi/zz_generated.openapi.go | 48 ++++ .../clustereviction/cluster_eviction.go | 3 +- pkg/util/helper/cluster.go | 12 - pkg/util/helper/cluster_test.go | 46 ---- 14 files changed, 957 insertions(+), 95 deletions(-) create mode 100644 pkg/controllers/applicationfailover/common.go create mode 100644 pkg/controllers/applicationfailover/common_test.go create mode 100644 pkg/controllers/applicationfailover/crb_application_failover_controller.go create mode 100644 pkg/controllers/applicationfailover/rb_application_failover_controller.go diff --git a/cmd/controller-manager/app/controllermanager.go b/cmd/controller-manager/app/controllermanager.go index 05bb209ff..d7d2b2848 100644 --- a/cmd/controller-manager/app/controllermanager.go +++ b/cmd/controller-manager/app/controllermanager.go @@ -30,6 +30,7 @@ import ( workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" "github.com/karmada-io/karmada/pkg/clusterdiscovery/clusterapi" + "github.com/karmada-io/karmada/pkg/controllers/applicationfailover" "github.com/karmada-io/karmada/pkg/controllers/binding" "github.com/karmada-io/karmada/pkg/controllers/cluster" controllerscontext "github.com/karmada-io/karmada/pkg/controllers/context" @@ -196,6 +197,7 @@ func init() { controllers["federatedResourceQuotaSync"] = startFederatedResourceQuotaSyncController controllers["federatedResourceQuotaStatus"] = startFederatedResourceQuotaStatusController controllers["gracefulEviction"] = startGracefulEvictionController + controllers["applicationFailover"] = startApplicationFailoverController } func startClusterController(ctx controllerscontext.Context) (enabled bool, err error) { @@ -500,31 +502,54 @@ func startFederatedResourceQuotaStatusController(ctx controllerscontext.Context) } func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bool, err error) { - if features.FeatureGate.Enabled(features.GracefulEviction) { - rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{ - Client: ctx.Mgr.GetClient(), - EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName), - RateLimiterOptions: ctx.Opts.RateLimiterOptions, - GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration, - } - if err := rbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil { - return false, err - } - - crbGracefulEvictionController := &gracefuleviction.CRBGracefulEvictionController{ - Client: ctx.Mgr.GetClient(), - EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.CRBGracefulEvictionControllerName), - RateLimiterOptions: ctx.Opts.RateLimiterOptions, - GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration, - } - if err := crbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil { - return false, err - } - - return true, nil + if !features.FeatureGate.Enabled(features.GracefulEviction) { + return false, nil + } + rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{ + Client: ctx.Mgr.GetClient(), + EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName), + RateLimiterOptions: ctx.Opts.RateLimiterOptions, + GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration, + } + if err := rbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil { + return false, err } - return false, nil + crbGracefulEvictionController := &gracefuleviction.CRBGracefulEvictionController{ + Client: ctx.Mgr.GetClient(), + EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.CRBGracefulEvictionControllerName), + RateLimiterOptions: ctx.Opts.RateLimiterOptions, + GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration, + } + if err := crbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil { + return false, err + } + + return true, nil +} + +func startApplicationFailoverController(ctx controllerscontext.Context) (enabled bool, err error) { + if !features.FeatureGate.Enabled(features.Failover) { + return false, nil + } + rbApplicationFailoverController := applicationfailover.RBApplicationFailoverController{ + Client: ctx.Mgr.GetClient(), + EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.RBApplicationFailoverControllerName), + ResourceInterpreter: ctx.ResourceInterpreter, + } + if err = rbApplicationFailoverController.SetupWithManager(ctx.Mgr); err != nil { + return false, err + } + + crbApplicationFailoverController := applicationfailover.CRBApplicationFailoverController{ + Client: ctx.Mgr.GetClient(), + EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.CRBApplicationFailoverControllerName), + ResourceInterpreter: ctx.ResourceInterpreter, + } + if err = crbApplicationFailoverController.SetupWithManager(ctx.Mgr); err != nil { + return false, err + } + return true, nil } // setupControllers initialize controllers and setup one by one. diff --git a/pkg/apis/work/v1alpha2/binding_types_helper.go b/pkg/apis/work/v1alpha2/binding_types_helper.go index c8777a000..80c2d4feb 100644 --- a/pkg/apis/work/v1alpha2/binding_types_helper.go +++ b/pkg/apis/work/v1alpha2/binding_types_helper.go @@ -1,5 +1,62 @@ package v1alpha2 +// TaskOptions represents options for GracefulEvictionTasks. +type TaskOptions struct { + producer string + reason string + message string + gracePeriodSeconds *int32 + suppressDeletion *bool +} + +// Option configures a TaskOptions +type Option func(*TaskOptions) + +// NewTaskOptions builds a TaskOptions +func NewTaskOptions(opts ...Option) *TaskOptions { + options := TaskOptions{} + for _, opt := range opts { + opt(&options) + } + + return &options +} + +// WithProducer sets the producer for TaskOptions +func WithProducer(producer string) Option { + return func(o *TaskOptions) { + o.producer = producer + } +} + +// WithReason sets the reason for TaskOptions +func WithReason(reason string) Option { + return func(o *TaskOptions) { + o.reason = reason + } +} + +// WithMessage sets the message for TaskOptions +func WithMessage(message string) Option { + return func(o *TaskOptions) { + o.message = message + } +} + +// WithGracePeriodSeconds sets the gracePeriodSeconds for TaskOptions +func WithGracePeriodSeconds(gracePeriodSeconds *int32) Option { + return func(o *TaskOptions) { + o.gracePeriodSeconds = gracePeriodSeconds + } +} + +// WithSuppressDeletion sets the suppressDeletion for TaskOptions +func WithSuppressDeletion(suppressDeletion *bool) Option { + return func(o *TaskOptions) { + o.suppressDeletion = suppressDeletion + } +} + // TargetContains checks if specific cluster present on the target list. func (s *ResourceBindingSpec) TargetContains(name string) bool { for i := range s.Clusters { @@ -11,6 +68,16 @@ func (s *ResourceBindingSpec) TargetContains(name string) bool { return false } +// ClusterInGracefulEvictionTasks checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. +func (s *ResourceBindingSpec) ClusterInGracefulEvictionTasks(name string) bool { + for _, task := range s.GracefulEvictionTasks { + if task.FromCluster == name { + return true + } + } + return false +} + // AssignedReplicasForCluster returns assigned replicas for specific cluster. func (s *ResourceBindingSpec) AssignedReplicasForCluster(targetCluster string) int32 { for i := range s.Clusters { @@ -44,7 +111,7 @@ func (s *ResourceBindingSpec) RemoveCluster(name string) { // GracefulEvictCluster removes specific cluster from the target list in a graceful way by // building a graceful eviction task. // This function no-opts if the cluster does not exist. -func (s *ResourceBindingSpec) GracefulEvictCluster(name, producer, reason, message string) { +func (s *ResourceBindingSpec) GracefulEvictCluster(name string, options *TaskOptions) { // find the cluster index var i int for i = 0; i < len(s.Clusters); i++ { @@ -63,19 +130,19 @@ func (s *ResourceBindingSpec) GracefulEvictCluster(name, producer, reason, messa s.Clusters = append(s.Clusters[:i], s.Clusters[i+1:]...) // skip if the target cluster already in the task list - for _, task := range s.GracefulEvictionTasks { - if task.FromCluster == evictCluster.Name { - return - } + if s.ClusterInGracefulEvictionTasks(evictCluster.Name) { + return } // build eviction task evictingCluster := evictCluster.DeepCopy() evictionTask := GracefulEvictionTask{ - FromCluster: evictingCluster.Name, - Reason: reason, - Message: message, - Producer: producer, + FromCluster: evictingCluster.Name, + Reason: options.reason, + Message: options.message, + Producer: options.producer, + GracePeriodSeconds: options.gracePeriodSeconds, + SuppressDeletion: options.suppressDeletion, } if evictingCluster.Replicas > 0 { evictionTask.Replicas = &evictingCluster.Replicas diff --git a/pkg/apis/work/v1alpha2/binding_types_helper_test.go b/pkg/apis/work/v1alpha2/binding_types_helper_test.go index bdd6984c8..77db73e4c 100644 --- a/pkg/apis/work/v1alpha2/binding_types_helper_test.go +++ b/pkg/apis/work/v1alpha2/binding_types_helper_test.go @@ -293,7 +293,7 @@ func TestResourceBindingSpec_GracefulEvictCluster(t *testing.T) { for _, test := range tests { tc := test t.Run(tc.Name, func(t *testing.T) { - tc.InputSpec.GracefulEvictCluster(tc.EvictEvent.FromCluster, tc.EvictEvent.Producer, tc.EvictEvent.Reason, tc.EvictEvent.Message) + tc.InputSpec.GracefulEvictCluster(tc.EvictEvent.FromCluster, NewTaskOptions(WithProducer(tc.EvictEvent.Producer), WithReason(tc.EvictEvent.Reason), WithMessage(tc.EvictEvent.Message))) if !reflect.DeepEqual(tc.InputSpec.Clusters, tc.ExpectSpec.Clusters) { t.Fatalf("expect clusters: %v, but got: %v", tc.ExpectSpec.Clusters, tc.InputSpec.Clusters) @@ -304,3 +304,52 @@ func TestResourceBindingSpec_GracefulEvictCluster(t *testing.T) { }) } } + +func TestResourceBindingSpec_ClusterInGracefulEvictionTasks(t *testing.T) { + gracefulEvictionTasks := []GracefulEvictionTask{ + { + FromCluster: "member1", + Producer: EvictionProducerTaintManager, + Reason: EvictionReasonTaintUntolerated, + }, + { + FromCluster: "member2", + Producer: EvictionProducerTaintManager, + Reason: EvictionReasonTaintUntolerated, + }, + } + + tests := []struct { + name string + InputSpec ResourceBindingSpec + targetCluster string + expect bool + }{ + { + name: "targetCluster is in the process of eviction", + InputSpec: ResourceBindingSpec{ + GracefulEvictionTasks: gracefulEvictionTasks, + }, + targetCluster: "member1", + expect: true, + }, + { + name: "targetCluster is not in the process of eviction", + InputSpec: ResourceBindingSpec{ + GracefulEvictionTasks: gracefulEvictionTasks, + }, + targetCluster: "member3", + expect: false, + }, + } + + for _, test := range tests { + tc := test + t.Run(tc.name, func(t *testing.T) { + result := tc.InputSpec.ClusterInGracefulEvictionTasks(tc.targetCluster) + if result != tc.expect { + t.Errorf("expected: %v, but got: %v", tc.expect, result) + } + }) + } +} diff --git a/pkg/apis/work/v1alpha2/well_known_constants.go b/pkg/apis/work/v1alpha2/well_known_constants.go index ffbec1175..ebe6130cc 100644 --- a/pkg/apis/work/v1alpha2/well_known_constants.go +++ b/pkg/apis/work/v1alpha2/well_known_constants.go @@ -82,6 +82,10 @@ const ( // EvictionReasonTaintUntolerated describes the eviction is triggered // because can not tolerate taint or exceed toleration period of time. EvictionReasonTaintUntolerated = "TaintUntolerated" + + // EvictionReasonApplicationFailure describes the eviction is triggered + // because the application fails and reaches the condition of ApplicationFailoverBehavior. + EvictionReasonApplicationFailure = "ApplicationFailure" ) // Define eviction producers. diff --git a/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go b/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go index f47eefef0..bf5ebcbab 100644 --- a/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go +++ b/pkg/apis/work/v1alpha2/zz_generated.deepcopy.go @@ -383,3 +383,29 @@ func (in *TargetCluster) DeepCopy() *TargetCluster { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *TaskOptions) DeepCopyInto(out *TaskOptions) { + *out = *in + if in.gracePeriodSeconds != nil { + in, out := &in.gracePeriodSeconds, &out.gracePeriodSeconds + *out = new(int32) + **out = **in + } + if in.suppressDeletion != nil { + in, out := &in.suppressDeletion, &out.suppressDeletion + *out = new(bool) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskOptions. +func (in *TaskOptions) DeepCopy() *TaskOptions { + if in == nil { + return nil + } + out := new(TaskOptions) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/controllers/applicationfailover/common.go b/pkg/controllers/applicationfailover/common.go new file mode 100644 index 000000000..809dbbf6f --- /dev/null +++ b/pkg/controllers/applicationfailover/common.go @@ -0,0 +1,95 @@ +package applicationfailover + +import ( + "sync" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/util/fedinformer/keys" +) + +type workloadUnhealthyMap struct { + sync.RWMutex + // key is the resource type + // value is also a map. Its key is the cluster where the unhealthy workload resides. + // Its value is the time when the unhealthy state was first observed. + workloadUnhealthy map[keys.ClusterWideKey]map[string]metav1.Time +} + +func newWorkloadUnhealthyMap() *workloadUnhealthyMap { + return &workloadUnhealthyMap{ + workloadUnhealthy: make(map[keys.ClusterWideKey]map[string]metav1.Time), + } +} + +func (m *workloadUnhealthyMap) delete(key keys.ClusterWideKey) { + m.Lock() + defer m.Unlock() + delete(m.workloadUnhealthy, key) +} + +func (m *workloadUnhealthyMap) hasWorkloadBeenUnhealthy(resource keys.ClusterWideKey, cluster string) bool { + m.RLock() + defer m.RUnlock() + + unhealthyClusters := m.workloadUnhealthy[resource] + if unhealthyClusters == nil { + return false + } + + _, exist := unhealthyClusters[cluster] + return exist +} + +func (m *workloadUnhealthyMap) setTimeStamp(resource keys.ClusterWideKey, cluster string) { + m.Lock() + defer m.Unlock() + + unhealthyClusters := m.workloadUnhealthy[resource] + if unhealthyClusters == nil { + unhealthyClusters = make(map[string]metav1.Time) + } + + unhealthyClusters[cluster] = metav1.Now() + m.workloadUnhealthy[resource] = unhealthyClusters +} + +func (m *workloadUnhealthyMap) getTimeStamp(resource keys.ClusterWideKey, cluster string) metav1.Time { + m.RLock() + defer m.RUnlock() + + unhealthyClusters := m.workloadUnhealthy[resource] + return unhealthyClusters[cluster] +} + +func (m *workloadUnhealthyMap) deleteIrrelevantClusters(resource keys.ClusterWideKey, allClusters sets.Set[string]) { + m.Lock() + defer m.Unlock() + + unhealthyClusters := m.workloadUnhealthy[resource] + if unhealthyClusters == nil { + return + } + for cluster := range unhealthyClusters { + if !allClusters.Has(cluster) { + delete(unhealthyClusters, cluster) + } + } + m.workloadUnhealthy[resource] = unhealthyClusters +} + +// filterIrrelevantClusters filters clusters which is in the process of eviction or in the Healthy/Unknown state. +func filterIrrelevantClusters(aggregatedStatusItems []workv1alpha2.AggregatedStatusItem, resourceBindingSpec workv1alpha2.ResourceBindingSpec) []string { + var filteredClusters []string + for _, aggregatedStatusItem := range aggregatedStatusItems { + cluster := aggregatedStatusItem.ClusterName + + if aggregatedStatusItem.Health == workv1alpha2.ResourceUnhealthy && !resourceBindingSpec.ClusterInGracefulEvictionTasks(cluster) { + filteredClusters = append(filteredClusters, cluster) + } + } + + return filteredClusters +} diff --git a/pkg/controllers/applicationfailover/common_test.go b/pkg/controllers/applicationfailover/common_test.go new file mode 100644 index 000000000..d8b9efe27 --- /dev/null +++ b/pkg/controllers/applicationfailover/common_test.go @@ -0,0 +1,92 @@ +package applicationfailover + +import ( + "reflect" + "testing" + + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" +) + +func TestFilterIrrelevantClusters(t *testing.T) { + tests := []struct { + name string + aggregatedStatusItems []workv1alpha2.AggregatedStatusItem + resourceBindingSpec workv1alpha2.ResourceBindingSpec + expectedClusters []string + }{ + { + name: "all applications are healthy", + aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{ + { + ClusterName: "member1", + Health: workv1alpha2.ResourceHealthy, + }, + { + ClusterName: "member2", + Health: workv1alpha2.ResourceHealthy, + }, + }, + resourceBindingSpec: workv1alpha2.ResourceBindingSpec{}, + expectedClusters: nil, + }, + { + name: "all applications are unknown", + aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{ + { + ClusterName: "member1", + Health: workv1alpha2.ResourceUnknown, + }, + { + ClusterName: "member2", + Health: workv1alpha2.ResourceUnknown, + }, + }, + resourceBindingSpec: workv1alpha2.ResourceBindingSpec{}, + expectedClusters: nil, + }, + { + name: "one application is unhealthy and not in gracefulEvictionTasks", + aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{ + { + ClusterName: "member1", + Health: workv1alpha2.ResourceHealthy, + }, + { + ClusterName: "member2", + Health: workv1alpha2.ResourceUnhealthy, + }, + }, + resourceBindingSpec: workv1alpha2.ResourceBindingSpec{}, + expectedClusters: []string{"member2"}, + }, + { + name: "one application is unhealthy and in gracefulEvictionTasks", + aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{ + { + ClusterName: "member1", + Health: workv1alpha2.ResourceHealthy, + }, + { + ClusterName: "member2", + Health: workv1alpha2.ResourceUnhealthy, + }, + }, + resourceBindingSpec: workv1alpha2.ResourceBindingSpec{ + GracefulEvictionTasks: []workv1alpha2.GracefulEvictionTask{ + { + FromCluster: "member2", + }, + }, + }, + expectedClusters: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := filterIrrelevantClusters(tt.aggregatedStatusItems, tt.resourceBindingSpec); !reflect.DeepEqual(got, tt.expectedClusters) { + t.Errorf("filterIrrelevantClusters() = %v, want %v", got, tt.expectedClusters) + } + }) + } +} diff --git a/pkg/controllers/applicationfailover/crb_application_failover_controller.go b/pkg/controllers/applicationfailover/crb_application_failover_controller.go new file mode 100644 index 000000000..2ac7f31a4 --- /dev/null +++ b/pkg/controllers/applicationfailover/crb_application_failover_controller.go @@ -0,0 +1,255 @@ +package applicationfailover + +import ( + "context" + "fmt" + "math" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1" + policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/features" + "github.com/karmada-io/karmada/pkg/resourceinterpreter" + "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" + "github.com/karmada-io/karmada/pkg/util/fedinformer/keys" + "github.com/karmada-io/karmada/pkg/util/helper" +) + +// CRBApplicationFailoverControllerName is the controller name that will be used when reporting events. +const CRBApplicationFailoverControllerName = "cluster-resource-binding-application-failover-controller" + +// CRBApplicationFailoverController is to sync ClusterResourceBinding's application failover behavior. +type CRBApplicationFailoverController struct { + client.Client + EventRecorder record.EventRecorder + RateLimiterOptions ratelimiterflag.Options + + // workloadUnhealthyMap records which clusters the specific resource is in an unhealthy state + workloadUnhealthyMap *workloadUnhealthyMap + ResourceInterpreter resourceinterpreter.ResourceInterpreter +} + +// Reconcile performs a full reconciliation for the object referred to by the Request. +// The Controller will requeue the Request to be processed again if an error is non-nil or +// Result.Requeue is true, otherwise upon completion it will remove the work from the queue. +func (c *CRBApplicationFailoverController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) { + klog.V(4).Infof("Reconciling ClusterResourceBinding %s.", req.Name) + + binding := &workv1alpha2.ClusterResourceBinding{} + if err := c.Client.Get(ctx, req.NamespacedName, binding); err != nil { + if apierrors.IsNotFound(err) { + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + c.workloadUnhealthyMap.delete(resource) + return controllerruntime.Result{}, nil + } + return controllerruntime.Result{Requeue: true}, err + } + + if !c.clusterResourceBindingFilter(binding) { + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + c.workloadUnhealthyMap.delete(resource) + return controllerruntime.Result{}, nil + } + + retryDuration, err := c.syncBinding(binding) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + if retryDuration > 0 { + klog.V(4).Infof("Retry to check health status of the workload after %v minutes.", retryDuration.Minutes()) + return controllerruntime.Result{RequeueAfter: retryDuration}, nil + } + return controllerruntime.Result{}, nil +} + +func (c *CRBApplicationFailoverController) detectFailure(clusters []string, tolerationSeconds *int32, resource keys.ClusterWideKey) (int32, []string) { + var needEvictClusters []string + duration := int32(math.MaxInt32) + + for _, cluster := range clusters { + if !c.workloadUnhealthyMap.hasWorkloadBeenUnhealthy(resource, cluster) { + c.workloadUnhealthyMap.setTimeStamp(resource, cluster) + if duration > *tolerationSeconds { + duration = *tolerationSeconds + } + continue + } + // When the workload in a cluster is in an unhealthy state for more than the tolerance time, + // and the cluster has not been in the GracefulEvictionTasks, + // the cluster will be added to the list that needs to be evicted. + unHealthyTimeStamp := c.workloadUnhealthyMap.getTimeStamp(resource, cluster) + timeNow := metav1.Now() + if timeNow.After(unHealthyTimeStamp.Add(time.Duration(*tolerationSeconds) * time.Second)) { + needEvictClusters = append(needEvictClusters, cluster) + } else { + if duration > *tolerationSeconds-int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) { + duration = *tolerationSeconds - int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) + } + } + } + + if duration == int32(math.MaxInt32) { + duration = 0 + } + return duration, needEvictClusters +} + +func (c *CRBApplicationFailoverController) syncBinding(binding *workv1alpha2.ClusterResourceBinding) (time.Duration, error) { + tolerationSeconds := binding.Spec.Failover.Application.DecisionConditions.TolerationSeconds + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + klog.Errorf("failed to get key from binding(%s)'s resource", binding.Name) + return 0, err + } + + allClusters := sets.New[string]() + for _, cluster := range binding.Spec.Clusters { + allClusters.Insert(cluster.Name) + } + + unhealthyClusters := filterIrrelevantClusters(binding.Status.AggregatedStatus, binding.Spec) + duration, needEvictClusters := c.detectFailure(unhealthyClusters, tolerationSeconds, resource) + + err = c.evictBinding(binding, needEvictClusters) + if err != nil { + klog.Errorf("Failed to evict binding(%s), err: %v.", binding.Name, err) + return 0, err + } + + if len(needEvictClusters) != 0 { + if err = c.updateBinding(binding, allClusters, needEvictClusters); err != nil { + return 0, err + } + } + + // Cleanup clusters that have been evicted or removed in the workloadUnhealthyMap + c.workloadUnhealthyMap.deleteIrrelevantClusters(resource, allClusters) + + return time.Duration(duration) * time.Second, nil +} + +func (c *CRBApplicationFailoverController) evictBinding(binding *workv1alpha2.ClusterResourceBinding, clusters []string) error { + for _, cluster := range clusters { + switch binding.Spec.Failover.Application.PurgeMode { + case policyv1alpha1.Graciously: + if features.FeatureGate.Enabled(features.GracefulEviction) { + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(CRBApplicationFailoverControllerName), workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithGracePeriodSeconds(binding.Spec.Failover.Application.GracePeriodSeconds))) + } else { + err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Graciously) + klog.Error(err) + return err + } + case policyv1alpha1.Never: + if features.FeatureGate.Enabled(features.GracefulEviction) { + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(CRBApplicationFailoverControllerName), workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithSuppressDeletion(pointer.Bool(true)))) + } else { + err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never) + klog.Error(err) + return err + } + case policyv1alpha1.Immediately: + binding.Spec.RemoveCluster(cluster) + } + } + + return nil +} + +func (c *CRBApplicationFailoverController) updateBinding(binding *workv1alpha2.ClusterResourceBinding, allClusters sets.Set[string], needEvictClusters []string) error { + if err := c.Update(context.TODO(), binding); err != nil { + for _, cluster := range needEvictClusters { + helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, err) + } + klog.ErrorS(err, "Failed to update binding", "binding", klog.KObj(binding)) + return err + } + for _, cluster := range needEvictClusters { + allClusters.Delete(cluster) + } + if !features.FeatureGate.Enabled(features.GracefulEviction) { + for _, cluster := range needEvictClusters { + helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, nil) + } + } + + return nil +} + +// SetupWithManager creates a controller and register to controller manager. +func (c *CRBApplicationFailoverController) SetupWithManager(mgr controllerruntime.Manager) error { + c.workloadUnhealthyMap = newWorkloadUnhealthyMap() + clusterResourceBindingPredicateFn := predicate.Funcs{ + CreateFunc: func(createEvent event.CreateEvent) bool { + obj := createEvent.Object.(*workv1alpha2.ClusterResourceBinding) + if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil { + return false + } + return true + }, + UpdateFunc: func(updateEvent event.UpdateEvent) bool { + oldObj := updateEvent.ObjectOld.(*workv1alpha2.ClusterResourceBinding) + newObj := updateEvent.ObjectNew.(*workv1alpha2.ClusterResourceBinding) + if (oldObj.Spec.Failover == nil || oldObj.Spec.Failover.Application == nil) && + (newObj.Spec.Failover == nil || newObj.Spec.Failover.Application == nil) { + return false + } + return true + }, + DeleteFunc: func(deleteEvent event.DeleteEvent) bool { + obj := deleteEvent.Object.(*workv1alpha2.ClusterResourceBinding) + if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil { + return false + } + return true + }, + GenericFunc: func(genericEvent event.GenericEvent) bool { return false }, + } + + return controllerruntime.NewControllerManagedBy(mgr). + For(&workv1alpha2.ClusterResourceBinding{}, builder.WithPredicates(clusterResourceBindingPredicateFn)). + WithOptions(controller.Options{RateLimiter: ratelimiterflag.DefaultControllerRateLimiter(c.RateLimiterOptions)}). + Complete(c) +} + +func (c *CRBApplicationFailoverController) clusterResourceBindingFilter(crb *workv1alpha2.ClusterResourceBinding) bool { + if crb.Spec.Failover == nil || crb.Spec.Failover.Application == nil { + return false + } + + if len(crb.Status.AggregatedStatus) == 0 { + return false + } + + resourceKey, err := helper.ConstructClusterWideKey(crb.Spec.Resource) + if err != nil { + // Never reach + klog.Errorf("Failed to construct clusterWideKey from clusterResourceBinding(%s)", crb.Name) + return false + } + + if !c.ResourceInterpreter.HookEnabled(resourceKey.GroupVersionKind(), configv1alpha1.InterpreterOperationInterpretHealth) { + return false + } + + return true +} diff --git a/pkg/controllers/applicationfailover/rb_application_failover_controller.go b/pkg/controllers/applicationfailover/rb_application_failover_controller.go new file mode 100644 index 000000000..04f43ad5d --- /dev/null +++ b/pkg/controllers/applicationfailover/rb_application_failover_controller.go @@ -0,0 +1,260 @@ +package applicationfailover + +import ( + "context" + "fmt" + "math" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + controllerruntime "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1" + policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" + workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" + "github.com/karmada-io/karmada/pkg/features" + "github.com/karmada-io/karmada/pkg/resourceinterpreter" + "github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag" + "github.com/karmada-io/karmada/pkg/util/fedinformer/keys" + "github.com/karmada-io/karmada/pkg/util/helper" +) + +// RBApplicationFailoverControllerName is the controller name that will be used when reporting events. +const RBApplicationFailoverControllerName = "resource-binding-application-failover-controller" + +// RBApplicationFailoverController is to sync ResourceBinding's application failover behavior. +type RBApplicationFailoverController struct { + client.Client + EventRecorder record.EventRecorder + RateLimiterOptions ratelimiterflag.Options + + // workloadUnhealthyMap records which clusters the specific resource is in an unhealthy state + workloadUnhealthyMap *workloadUnhealthyMap + ResourceInterpreter resourceinterpreter.ResourceInterpreter +} + +// Reconcile performs a full reconciliation for the object referred to by the Request. +// The Controller will requeue the Request to be processed again if an error is non-nil or +// Result.Requeue is true, otherwise upon completion it will remove the work from the queue. +func (c *RBApplicationFailoverController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) { + klog.V(4).Infof("Reconciling ResourceBinding %s.", req.NamespacedName.String()) + + binding := &workv1alpha2.ResourceBinding{} + if err := c.Client.Get(ctx, req.NamespacedName, binding); err != nil { + if apierrors.IsNotFound(err) { + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + c.workloadUnhealthyMap.delete(resource) + return controllerruntime.Result{}, nil + } + return controllerruntime.Result{Requeue: true}, err + } + + if !c.bindingFilter(binding) { + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + c.workloadUnhealthyMap.delete(resource) + return controllerruntime.Result{}, nil + } + + retryDuration, err := c.syncBinding(binding) + if err != nil { + return controllerruntime.Result{Requeue: true}, err + } + if retryDuration > 0 { + klog.V(4).Infof("Retry to check health status of the workload after %v minutes.", retryDuration.Minutes()) + return controllerruntime.Result{RequeueAfter: retryDuration}, nil + } + return controllerruntime.Result{}, nil +} + +func (c *RBApplicationFailoverController) detectFailure(clusters []string, tolerationSeconds *int32, resource keys.ClusterWideKey) (int32, []string) { + var needEvictClusters []string + duration := int32(math.MaxInt32) + + for _, cluster := range clusters { + if !c.workloadUnhealthyMap.hasWorkloadBeenUnhealthy(resource, cluster) { + c.workloadUnhealthyMap.setTimeStamp(resource, cluster) + if duration > *tolerationSeconds { + duration = *tolerationSeconds + } + continue + } + // When the workload in a cluster is in an unhealthy state for more than the tolerance time, + // and the cluster has not been in the GracefulEvictionTasks, + // the cluster will be added to the list that needs to be evicted. + unHealthyTimeStamp := c.workloadUnhealthyMap.getTimeStamp(resource, cluster) + timeNow := metav1.Now() + if timeNow.After(unHealthyTimeStamp.Add(time.Duration(*tolerationSeconds) * time.Second)) { + needEvictClusters = append(needEvictClusters, cluster) + } else { + if duration > *tolerationSeconds-int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) { + duration = *tolerationSeconds - int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) + } + } + } + + if duration == int32(math.MaxInt32) { + duration = 0 + } + return duration, needEvictClusters +} + +func (c *RBApplicationFailoverController) syncBinding(binding *workv1alpha2.ResourceBinding) (time.Duration, error) { + tolerationSeconds := binding.Spec.Failover.Application.DecisionConditions.TolerationSeconds + resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource) + if err != nil { + klog.Errorf("failed to get key from binding(%s)'s resource", binding.Name) + return 0, err + } + + allClusters := sets.New[string]() + for _, cluster := range binding.Spec.Clusters { + allClusters.Insert(cluster.Name) + } + + unhealthyClusters := filterIrrelevantClusters(binding.Status.AggregatedStatus, binding.Spec) + duration, needEvictClusters := c.detectFailure(unhealthyClusters, tolerationSeconds, resource) + + err = c.evictBinding(binding, needEvictClusters) + if err != nil { + klog.Errorf("Failed to evict binding(%s/%s), err: %v.", binding.Namespace, binding.Name, err) + return 0, err + } + + if len(needEvictClusters) != 0 { + if err = c.updateBinding(binding, allClusters, needEvictClusters); err != nil { + return 0, err + } + } + + // Cleanup clusters that have been evicted or removed in the workloadUnhealthyMap + c.workloadUnhealthyMap.deleteIrrelevantClusters(resource, allClusters) + + return time.Duration(duration) * time.Second, nil +} + +func (c *RBApplicationFailoverController) evictBinding(binding *workv1alpha2.ResourceBinding, clusters []string) error { + for _, cluster := range clusters { + switch binding.Spec.Failover.Application.PurgeMode { + case policyv1alpha1.Graciously: + if features.FeatureGate.Enabled(features.GracefulEviction) { + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName), + workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithGracePeriodSeconds(binding.Spec.Failover.Application.GracePeriodSeconds))) + } else { + err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Graciously) + klog.Error(err) + return err + } + case policyv1alpha1.Never: + if features.FeatureGate.Enabled(features.GracefulEviction) { + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName), + workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithSuppressDeletion(pointer.Bool(true)))) + } else { + err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never) + klog.Error(err) + return err + } + case policyv1alpha1.Immediately: + binding.Spec.RemoveCluster(cluster) + } + } + + return nil +} + +func (c *RBApplicationFailoverController) updateBinding(binding *workv1alpha2.ResourceBinding, allClusters sets.Set[string], needEvictClusters []string) error { + if err := c.Update(context.TODO(), binding); err != nil { + for _, cluster := range needEvictClusters { + helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, err) + } + klog.ErrorS(err, "Failed to update binding", "binding", klog.KObj(binding)) + return err + } + for _, cluster := range needEvictClusters { + allClusters.Delete(cluster) + } + if !features.FeatureGate.Enabled(features.GracefulEviction) { + for _, cluster := range needEvictClusters { + helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, nil) + } + } + + return nil +} + +// SetupWithManager creates a controller and register to controller manager. +func (c *RBApplicationFailoverController) SetupWithManager(mgr controllerruntime.Manager) error { + c.workloadUnhealthyMap = newWorkloadUnhealthyMap() + resourceBindingPredicateFn := predicate.Funcs{ + CreateFunc: func(createEvent event.CreateEvent) bool { + obj := createEvent.Object.(*workv1alpha2.ResourceBinding) + if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil { + return false + } + return true + }, + UpdateFunc: func(updateEvent event.UpdateEvent) bool { + oldObj := updateEvent.ObjectOld.(*workv1alpha2.ResourceBinding) + newObj := updateEvent.ObjectNew.(*workv1alpha2.ResourceBinding) + if (oldObj.Spec.Failover == nil || oldObj.Spec.Failover.Application == nil) && + (newObj.Spec.Failover == nil || newObj.Spec.Failover.Application == nil) { + return false + } + return true + }, + DeleteFunc: func(deleteEvent event.DeleteEvent) bool { + obj := deleteEvent.Object.(*workv1alpha2.ResourceBinding) + if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil { + return false + } + return true + }, + GenericFunc: func(genericEvent event.GenericEvent) bool { return false }, + } + + return controllerruntime.NewControllerManagedBy(mgr). + For(&workv1alpha2.ResourceBinding{}, builder.WithPredicates(resourceBindingPredicateFn)). + WithOptions(controller.Options{RateLimiter: ratelimiterflag.DefaultControllerRateLimiter(c.RateLimiterOptions)}). + Complete(c) +} + +func (c *RBApplicationFailoverController) bindingFilter(rb *workv1alpha2.ResourceBinding) bool { + if rb.Spec.Failover == nil || rb.Spec.Failover.Application == nil { + return false + } + + if len(rb.Status.AggregatedStatus) == 0 { + return false + } + + resourceKey, err := helper.ConstructClusterWideKey(rb.Spec.Resource) + if err != nil { + // Never reach + klog.Errorf("Failed to construct clusterWideKey from binding(%s/%s)", rb.Namespace, rb.Name) + return false + } + + if !c.ResourceInterpreter.HookEnabled(resourceKey.GroupVersionKind(), configv1alpha1.InterpreterOperationInterpretHealth) { + return false + } + + if !rb.Spec.PropagateDeps { + return false + } + return true +} diff --git a/pkg/controllers/cluster/taint_manager.go b/pkg/controllers/cluster/taint_manager.go index a083f7216..1bef8954a 100644 --- a/pkg/controllers/cluster/taint_manager.go +++ b/pkg/controllers/cluster/taint_manager.go @@ -154,7 +154,7 @@ func (tc *NoExecuteTaintManager) syncBindingEviction(key util.QueueKey) error { if needEviction || tolerationTime == 0 { // update final result to evict the target cluster if features.FeatureGate.Enabled(features.GracefulEviction) { - binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.EvictionProducerTaintManager, workv1alpha2.EvictionReasonTaintUntolerated, "") + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(workv1alpha2.EvictionProducerTaintManager), workv1alpha2.WithReason(workv1alpha2.EvictionReasonTaintUntolerated))) } else { binding.Spec.RemoveCluster(cluster) } @@ -206,7 +206,7 @@ func (tc *NoExecuteTaintManager) syncClusterBindingEviction(key util.QueueKey) e if needEviction || tolerationTime == 0 { // update final result to evict the target cluster if features.FeatureGate.Enabled(features.GracefulEviction) { - binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.EvictionProducerTaintManager, workv1alpha2.EvictionReasonTaintUntolerated, "") + binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(workv1alpha2.EvictionProducerTaintManager), workv1alpha2.WithReason(workv1alpha2.EvictionReasonTaintUntolerated))) } else { binding.Spec.RemoveCluster(cluster) } diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index d9139cd13..f3d10b11a 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -129,6 +129,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ResourceBindingSpec": schema_pkg_apis_work_v1alpha2_ResourceBindingSpec(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ResourceBindingStatus": schema_pkg_apis_work_v1alpha2_ResourceBindingStatus(ref), "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.TargetCluster": schema_pkg_apis_work_v1alpha2_TargetCluster(ref), + "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.TaskOptions": schema_pkg_apis_work_v1alpha2_TaskOptions(ref), "k8s.io/api/admissionregistration/v1.MutatingWebhook": schema_k8sio_api_admissionregistration_v1_MutatingWebhook(ref), "k8s.io/api/admissionregistration/v1.MutatingWebhookConfiguration": schema_k8sio_api_admissionregistration_v1_MutatingWebhookConfiguration(ref), "k8s.io/api/admissionregistration/v1.MutatingWebhookConfigurationList": schema_k8sio_api_admissionregistration_v1_MutatingWebhookConfigurationList(ref), @@ -5574,6 +5575,53 @@ func schema_pkg_apis_work_v1alpha2_TargetCluster(ref common.ReferenceCallback) c } } +func schema_pkg_apis_work_v1alpha2_TaskOptions(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "TaskOptions represents options for GracefulEvictionTasks.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "producer": { + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "reason": { + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "message": { + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "gracePeriodSeconds": { + SchemaProps: spec.SchemaProps{ + Type: []string{"integer"}, + Format: "int32", + }, + }, + "suppressDeletion": { + SchemaProps: spec.SchemaProps{ + Type: []string{"boolean"}, + Format: "", + }, + }, + }, + Required: []string{"producer", "reason", "message", "gracePeriodSeconds", "suppressDeletion"}, + }, + }, + } +} + func schema_k8sio_api_admissionregistration_v1_MutatingWebhook(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go b/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go index b46977443..9629c8e93 100644 --- a/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go +++ b/pkg/scheduler/framework/plugins/clustereviction/cluster_eviction.go @@ -8,7 +8,6 @@ import ( clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" "github.com/karmada-io/karmada/pkg/scheduler/framework" - "github.com/karmada-io/karmada/pkg/util/helper" ) const ( @@ -33,7 +32,7 @@ func (p *ClusterEviction) Name() string { // Filter checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, _ *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result { - if helper.ClusterInGracefulEvictionTasks(bindingSpec.GracefulEvictionTasks, cluster.Name) { + if bindingSpec.ClusterInGracefulEvictionTasks(cluster.Name) { klog.V(2).Infof("Cluster(%s) is in the process of eviction.", cluster.Name) return framework.NewResult(framework.Unschedulable, "cluster(s) is in the process of eviction") } diff --git a/pkg/util/helper/cluster.go b/pkg/util/helper/cluster.go index a095c13cc..229c140c1 100644 --- a/pkg/util/helper/cluster.go +++ b/pkg/util/helper/cluster.go @@ -2,7 +2,6 @@ package helper import ( clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" - workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" ) // IsAPIEnabled checks if target API (or CRD) referencing by groupVersion and kind has been installed. @@ -22,14 +21,3 @@ func IsAPIEnabled(APIEnablements []clusterv1alpha1.APIEnablement, groupVersion s return false } - -// ClusterInGracefulEvictionTasks checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction. -func ClusterInGracefulEvictionTasks(gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask, clusterName string) bool { - for _, task := range gracefulEvictionTasks { - if task.FromCluster == clusterName { - return true - } - } - - return false -} diff --git a/pkg/util/helper/cluster_test.go b/pkg/util/helper/cluster_test.go index 68f6adfba..badef95a3 100644 --- a/pkg/util/helper/cluster_test.go +++ b/pkg/util/helper/cluster_test.go @@ -4,7 +4,6 @@ import ( "testing" clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" - workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2" ) func TestIsAPIEnabled(t *testing.T) { @@ -73,48 +72,3 @@ func TestIsAPIEnabled(t *testing.T) { }) } } - -func TestClusterInGracefulEvictionTasks(t *testing.T) { - gracefulEvictionTasks := []workv1alpha2.GracefulEvictionTask{ - { - FromCluster: ClusterMember1, - Producer: workv1alpha2.EvictionProducerTaintManager, - Reason: workv1alpha2.EvictionReasonTaintUntolerated, - }, - { - FromCluster: ClusterMember2, - Producer: workv1alpha2.EvictionProducerTaintManager, - Reason: workv1alpha2.EvictionReasonTaintUntolerated, - }, - } - - tests := []struct { - name string - gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask - targetCluster string - expect bool - }{ - { - name: "targetCluster is in the process of eviction", - gracefulEvictionTasks: gracefulEvictionTasks, - targetCluster: ClusterMember1, - expect: true, - }, - { - name: "targetCluster is not in the process of eviction", - gracefulEvictionTasks: gracefulEvictionTasks, - targetCluster: ClusterMember3, - expect: false, - }, - } - - for _, test := range tests { - tc := test - t.Run(tc.name, func(t *testing.T) { - result := ClusterInGracefulEvictionTasks(tc.gracefulEvictionTasks, tc.targetCluster) - if result != tc.expect { - t.Errorf("expected: %v, but got: %v", tc.expect, result) - } - }) - } -}