Merge pull request #3461 from Poor12/add-health-controller-draft
Add application failover controller
This commit is contained in:
commit
99f3c4f1f4
|
@ -30,6 +30,7 @@ import (
|
|||
workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
"github.com/karmada-io/karmada/pkg/clusterdiscovery/clusterapi"
|
||||
"github.com/karmada-io/karmada/pkg/controllers/applicationfailover"
|
||||
"github.com/karmada-io/karmada/pkg/controllers/binding"
|
||||
"github.com/karmada-io/karmada/pkg/controllers/cluster"
|
||||
controllerscontext "github.com/karmada-io/karmada/pkg/controllers/context"
|
||||
|
@ -196,6 +197,7 @@ func init() {
|
|||
controllers["federatedResourceQuotaSync"] = startFederatedResourceQuotaSyncController
|
||||
controllers["federatedResourceQuotaStatus"] = startFederatedResourceQuotaStatusController
|
||||
controllers["gracefulEviction"] = startGracefulEvictionController
|
||||
controllers["applicationFailover"] = startApplicationFailoverController
|
||||
}
|
||||
|
||||
func startClusterController(ctx controllerscontext.Context) (enabled bool, err error) {
|
||||
|
@ -500,31 +502,54 @@ func startFederatedResourceQuotaStatusController(ctx controllerscontext.Context)
|
|||
}
|
||||
|
||||
func startGracefulEvictionController(ctx controllerscontext.Context) (enabled bool, err error) {
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName),
|
||||
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
|
||||
GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration,
|
||||
}
|
||||
if err := rbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
crbGracefulEvictionController := &gracefuleviction.CRBGracefulEvictionController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.CRBGracefulEvictionControllerName),
|
||||
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
|
||||
GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration,
|
||||
}
|
||||
if err := crbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
if !features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
return false, nil
|
||||
}
|
||||
rbGracefulEvictionController := &gracefuleviction.RBGracefulEvictionController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.RBGracefulEvictionControllerName),
|
||||
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
|
||||
GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration,
|
||||
}
|
||||
if err := rbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return false, nil
|
||||
crbGracefulEvictionController := &gracefuleviction.CRBGracefulEvictionController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(gracefuleviction.CRBGracefulEvictionControllerName),
|
||||
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
|
||||
GracefulEvictionTimeout: ctx.Opts.GracefulEvictionTimeout.Duration,
|
||||
}
|
||||
if err := crbGracefulEvictionController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func startApplicationFailoverController(ctx controllerscontext.Context) (enabled bool, err error) {
|
||||
if !features.FeatureGate.Enabled(features.Failover) {
|
||||
return false, nil
|
||||
}
|
||||
rbApplicationFailoverController := applicationfailover.RBApplicationFailoverController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.RBApplicationFailoverControllerName),
|
||||
ResourceInterpreter: ctx.ResourceInterpreter,
|
||||
}
|
||||
if err = rbApplicationFailoverController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
crbApplicationFailoverController := applicationfailover.CRBApplicationFailoverController{
|
||||
Client: ctx.Mgr.GetClient(),
|
||||
EventRecorder: ctx.Mgr.GetEventRecorderFor(applicationfailover.CRBApplicationFailoverControllerName),
|
||||
ResourceInterpreter: ctx.ResourceInterpreter,
|
||||
}
|
||||
if err = crbApplicationFailoverController.SetupWithManager(ctx.Mgr); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// setupControllers initialize controllers and setup one by one.
|
||||
|
|
|
@ -1,5 +1,62 @@
|
|||
package v1alpha2
|
||||
|
||||
// TaskOptions represents options for GracefulEvictionTasks.
|
||||
type TaskOptions struct {
|
||||
producer string
|
||||
reason string
|
||||
message string
|
||||
gracePeriodSeconds *int32
|
||||
suppressDeletion *bool
|
||||
}
|
||||
|
||||
// Option configures a TaskOptions
|
||||
type Option func(*TaskOptions)
|
||||
|
||||
// NewTaskOptions builds a TaskOptions
|
||||
func NewTaskOptions(opts ...Option) *TaskOptions {
|
||||
options := TaskOptions{}
|
||||
for _, opt := range opts {
|
||||
opt(&options)
|
||||
}
|
||||
|
||||
return &options
|
||||
}
|
||||
|
||||
// WithProducer sets the producer for TaskOptions
|
||||
func WithProducer(producer string) Option {
|
||||
return func(o *TaskOptions) {
|
||||
o.producer = producer
|
||||
}
|
||||
}
|
||||
|
||||
// WithReason sets the reason for TaskOptions
|
||||
func WithReason(reason string) Option {
|
||||
return func(o *TaskOptions) {
|
||||
o.reason = reason
|
||||
}
|
||||
}
|
||||
|
||||
// WithMessage sets the message for TaskOptions
|
||||
func WithMessage(message string) Option {
|
||||
return func(o *TaskOptions) {
|
||||
o.message = message
|
||||
}
|
||||
}
|
||||
|
||||
// WithGracePeriodSeconds sets the gracePeriodSeconds for TaskOptions
|
||||
func WithGracePeriodSeconds(gracePeriodSeconds *int32) Option {
|
||||
return func(o *TaskOptions) {
|
||||
o.gracePeriodSeconds = gracePeriodSeconds
|
||||
}
|
||||
}
|
||||
|
||||
// WithSuppressDeletion sets the suppressDeletion for TaskOptions
|
||||
func WithSuppressDeletion(suppressDeletion *bool) Option {
|
||||
return func(o *TaskOptions) {
|
||||
o.suppressDeletion = suppressDeletion
|
||||
}
|
||||
}
|
||||
|
||||
// TargetContains checks if specific cluster present on the target list.
|
||||
func (s *ResourceBindingSpec) TargetContains(name string) bool {
|
||||
for i := range s.Clusters {
|
||||
|
@ -11,6 +68,16 @@ func (s *ResourceBindingSpec) TargetContains(name string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// ClusterInGracefulEvictionTasks checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction.
|
||||
func (s *ResourceBindingSpec) ClusterInGracefulEvictionTasks(name string) bool {
|
||||
for _, task := range s.GracefulEvictionTasks {
|
||||
if task.FromCluster == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AssignedReplicasForCluster returns assigned replicas for specific cluster.
|
||||
func (s *ResourceBindingSpec) AssignedReplicasForCluster(targetCluster string) int32 {
|
||||
for i := range s.Clusters {
|
||||
|
@ -44,7 +111,7 @@ func (s *ResourceBindingSpec) RemoveCluster(name string) {
|
|||
// GracefulEvictCluster removes specific cluster from the target list in a graceful way by
|
||||
// building a graceful eviction task.
|
||||
// This function no-opts if the cluster does not exist.
|
||||
func (s *ResourceBindingSpec) GracefulEvictCluster(name, producer, reason, message string) {
|
||||
func (s *ResourceBindingSpec) GracefulEvictCluster(name string, options *TaskOptions) {
|
||||
// find the cluster index
|
||||
var i int
|
||||
for i = 0; i < len(s.Clusters); i++ {
|
||||
|
@ -63,19 +130,19 @@ func (s *ResourceBindingSpec) GracefulEvictCluster(name, producer, reason, messa
|
|||
s.Clusters = append(s.Clusters[:i], s.Clusters[i+1:]...)
|
||||
|
||||
// skip if the target cluster already in the task list
|
||||
for _, task := range s.GracefulEvictionTasks {
|
||||
if task.FromCluster == evictCluster.Name {
|
||||
return
|
||||
}
|
||||
if s.ClusterInGracefulEvictionTasks(evictCluster.Name) {
|
||||
return
|
||||
}
|
||||
|
||||
// build eviction task
|
||||
evictingCluster := evictCluster.DeepCopy()
|
||||
evictionTask := GracefulEvictionTask{
|
||||
FromCluster: evictingCluster.Name,
|
||||
Reason: reason,
|
||||
Message: message,
|
||||
Producer: producer,
|
||||
FromCluster: evictingCluster.Name,
|
||||
Reason: options.reason,
|
||||
Message: options.message,
|
||||
Producer: options.producer,
|
||||
GracePeriodSeconds: options.gracePeriodSeconds,
|
||||
SuppressDeletion: options.suppressDeletion,
|
||||
}
|
||||
if evictingCluster.Replicas > 0 {
|
||||
evictionTask.Replicas = &evictingCluster.Replicas
|
||||
|
|
|
@ -293,7 +293,7 @@ func TestResourceBindingSpec_GracefulEvictCluster(t *testing.T) {
|
|||
for _, test := range tests {
|
||||
tc := test
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
tc.InputSpec.GracefulEvictCluster(tc.EvictEvent.FromCluster, tc.EvictEvent.Producer, tc.EvictEvent.Reason, tc.EvictEvent.Message)
|
||||
tc.InputSpec.GracefulEvictCluster(tc.EvictEvent.FromCluster, NewTaskOptions(WithProducer(tc.EvictEvent.Producer), WithReason(tc.EvictEvent.Reason), WithMessage(tc.EvictEvent.Message)))
|
||||
|
||||
if !reflect.DeepEqual(tc.InputSpec.Clusters, tc.ExpectSpec.Clusters) {
|
||||
t.Fatalf("expect clusters: %v, but got: %v", tc.ExpectSpec.Clusters, tc.InputSpec.Clusters)
|
||||
|
@ -304,3 +304,52 @@ func TestResourceBindingSpec_GracefulEvictCluster(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResourceBindingSpec_ClusterInGracefulEvictionTasks(t *testing.T) {
|
||||
gracefulEvictionTasks := []GracefulEvictionTask{
|
||||
{
|
||||
FromCluster: "member1",
|
||||
Producer: EvictionProducerTaintManager,
|
||||
Reason: EvictionReasonTaintUntolerated,
|
||||
},
|
||||
{
|
||||
FromCluster: "member2",
|
||||
Producer: EvictionProducerTaintManager,
|
||||
Reason: EvictionReasonTaintUntolerated,
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
InputSpec ResourceBindingSpec
|
||||
targetCluster string
|
||||
expect bool
|
||||
}{
|
||||
{
|
||||
name: "targetCluster is in the process of eviction",
|
||||
InputSpec: ResourceBindingSpec{
|
||||
GracefulEvictionTasks: gracefulEvictionTasks,
|
||||
},
|
||||
targetCluster: "member1",
|
||||
expect: true,
|
||||
},
|
||||
{
|
||||
name: "targetCluster is not in the process of eviction",
|
||||
InputSpec: ResourceBindingSpec{
|
||||
GracefulEvictionTasks: gracefulEvictionTasks,
|
||||
},
|
||||
targetCluster: "member3",
|
||||
expect: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
tc := test
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
result := tc.InputSpec.ClusterInGracefulEvictionTasks(tc.targetCluster)
|
||||
if result != tc.expect {
|
||||
t.Errorf("expected: %v, but got: %v", tc.expect, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,6 +82,10 @@ const (
|
|||
// EvictionReasonTaintUntolerated describes the eviction is triggered
|
||||
// because can not tolerate taint or exceed toleration period of time.
|
||||
EvictionReasonTaintUntolerated = "TaintUntolerated"
|
||||
|
||||
// EvictionReasonApplicationFailure describes the eviction is triggered
|
||||
// because the application fails and reaches the condition of ApplicationFailoverBehavior.
|
||||
EvictionReasonApplicationFailure = "ApplicationFailure"
|
||||
)
|
||||
|
||||
// Define eviction producers.
|
||||
|
|
|
@ -383,3 +383,29 @@ func (in *TargetCluster) DeepCopy() *TargetCluster {
|
|||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *TaskOptions) DeepCopyInto(out *TaskOptions) {
|
||||
*out = *in
|
||||
if in.gracePeriodSeconds != nil {
|
||||
in, out := &in.gracePeriodSeconds, &out.gracePeriodSeconds
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.suppressDeletion != nil {
|
||||
in, out := &in.suppressDeletion, &out.suppressDeletion
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaskOptions.
|
||||
func (in *TaskOptions) DeepCopy() *TaskOptions {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(TaskOptions)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
package applicationfailover
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
"github.com/karmada-io/karmada/pkg/util/fedinformer/keys"
|
||||
)
|
||||
|
||||
type workloadUnhealthyMap struct {
|
||||
sync.RWMutex
|
||||
// key is the resource type
|
||||
// value is also a map. Its key is the cluster where the unhealthy workload resides.
|
||||
// Its value is the time when the unhealthy state was first observed.
|
||||
workloadUnhealthy map[keys.ClusterWideKey]map[string]metav1.Time
|
||||
}
|
||||
|
||||
func newWorkloadUnhealthyMap() *workloadUnhealthyMap {
|
||||
return &workloadUnhealthyMap{
|
||||
workloadUnhealthy: make(map[keys.ClusterWideKey]map[string]metav1.Time),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *workloadUnhealthyMap) delete(key keys.ClusterWideKey) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
delete(m.workloadUnhealthy, key)
|
||||
}
|
||||
|
||||
func (m *workloadUnhealthyMap) hasWorkloadBeenUnhealthy(resource keys.ClusterWideKey, cluster string) bool {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
unhealthyClusters := m.workloadUnhealthy[resource]
|
||||
if unhealthyClusters == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
_, exist := unhealthyClusters[cluster]
|
||||
return exist
|
||||
}
|
||||
|
||||
func (m *workloadUnhealthyMap) setTimeStamp(resource keys.ClusterWideKey, cluster string) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
unhealthyClusters := m.workloadUnhealthy[resource]
|
||||
if unhealthyClusters == nil {
|
||||
unhealthyClusters = make(map[string]metav1.Time)
|
||||
}
|
||||
|
||||
unhealthyClusters[cluster] = metav1.Now()
|
||||
m.workloadUnhealthy[resource] = unhealthyClusters
|
||||
}
|
||||
|
||||
func (m *workloadUnhealthyMap) getTimeStamp(resource keys.ClusterWideKey, cluster string) metav1.Time {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
unhealthyClusters := m.workloadUnhealthy[resource]
|
||||
return unhealthyClusters[cluster]
|
||||
}
|
||||
|
||||
func (m *workloadUnhealthyMap) deleteIrrelevantClusters(resource keys.ClusterWideKey, allClusters sets.Set[string]) {
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
unhealthyClusters := m.workloadUnhealthy[resource]
|
||||
if unhealthyClusters == nil {
|
||||
return
|
||||
}
|
||||
for cluster := range unhealthyClusters {
|
||||
if !allClusters.Has(cluster) {
|
||||
delete(unhealthyClusters, cluster)
|
||||
}
|
||||
}
|
||||
m.workloadUnhealthy[resource] = unhealthyClusters
|
||||
}
|
||||
|
||||
// filterIrrelevantClusters filters clusters which is in the process of eviction or in the Healthy/Unknown state.
|
||||
func filterIrrelevantClusters(aggregatedStatusItems []workv1alpha2.AggregatedStatusItem, resourceBindingSpec workv1alpha2.ResourceBindingSpec) []string {
|
||||
var filteredClusters []string
|
||||
for _, aggregatedStatusItem := range aggregatedStatusItems {
|
||||
cluster := aggregatedStatusItem.ClusterName
|
||||
|
||||
if aggregatedStatusItem.Health == workv1alpha2.ResourceUnhealthy && !resourceBindingSpec.ClusterInGracefulEvictionTasks(cluster) {
|
||||
filteredClusters = append(filteredClusters, cluster)
|
||||
}
|
||||
}
|
||||
|
||||
return filteredClusters
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
package applicationfailover
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
)
|
||||
|
||||
func TestFilterIrrelevantClusters(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
aggregatedStatusItems []workv1alpha2.AggregatedStatusItem
|
||||
resourceBindingSpec workv1alpha2.ResourceBindingSpec
|
||||
expectedClusters []string
|
||||
}{
|
||||
{
|
||||
name: "all applications are healthy",
|
||||
aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{
|
||||
{
|
||||
ClusterName: "member1",
|
||||
Health: workv1alpha2.ResourceHealthy,
|
||||
},
|
||||
{
|
||||
ClusterName: "member2",
|
||||
Health: workv1alpha2.ResourceHealthy,
|
||||
},
|
||||
},
|
||||
resourceBindingSpec: workv1alpha2.ResourceBindingSpec{},
|
||||
expectedClusters: nil,
|
||||
},
|
||||
{
|
||||
name: "all applications are unknown",
|
||||
aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{
|
||||
{
|
||||
ClusterName: "member1",
|
||||
Health: workv1alpha2.ResourceUnknown,
|
||||
},
|
||||
{
|
||||
ClusterName: "member2",
|
||||
Health: workv1alpha2.ResourceUnknown,
|
||||
},
|
||||
},
|
||||
resourceBindingSpec: workv1alpha2.ResourceBindingSpec{},
|
||||
expectedClusters: nil,
|
||||
},
|
||||
{
|
||||
name: "one application is unhealthy and not in gracefulEvictionTasks",
|
||||
aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{
|
||||
{
|
||||
ClusterName: "member1",
|
||||
Health: workv1alpha2.ResourceHealthy,
|
||||
},
|
||||
{
|
||||
ClusterName: "member2",
|
||||
Health: workv1alpha2.ResourceUnhealthy,
|
||||
},
|
||||
},
|
||||
resourceBindingSpec: workv1alpha2.ResourceBindingSpec{},
|
||||
expectedClusters: []string{"member2"},
|
||||
},
|
||||
{
|
||||
name: "one application is unhealthy and in gracefulEvictionTasks",
|
||||
aggregatedStatusItems: []workv1alpha2.AggregatedStatusItem{
|
||||
{
|
||||
ClusterName: "member1",
|
||||
Health: workv1alpha2.ResourceHealthy,
|
||||
},
|
||||
{
|
||||
ClusterName: "member2",
|
||||
Health: workv1alpha2.ResourceUnhealthy,
|
||||
},
|
||||
},
|
||||
resourceBindingSpec: workv1alpha2.ResourceBindingSpec{
|
||||
GracefulEvictionTasks: []workv1alpha2.GracefulEvictionTask{
|
||||
{
|
||||
FromCluster: "member2",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedClusters: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := filterIrrelevantClusters(tt.aggregatedStatusItems, tt.resourceBindingSpec); !reflect.DeepEqual(got, tt.expectedClusters) {
|
||||
t.Errorf("filterIrrelevantClusters() = %v, want %v", got, tt.expectedClusters)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,255 @@
|
|||
package applicationfailover
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/pointer"
|
||||
controllerruntime "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/builder"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
"sigs.k8s.io/controller-runtime/pkg/event"
|
||||
"sigs.k8s.io/controller-runtime/pkg/predicate"
|
||||
|
||||
configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1"
|
||||
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
"github.com/karmada-io/karmada/pkg/features"
|
||||
"github.com/karmada-io/karmada/pkg/resourceinterpreter"
|
||||
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
|
||||
"github.com/karmada-io/karmada/pkg/util/fedinformer/keys"
|
||||
"github.com/karmada-io/karmada/pkg/util/helper"
|
||||
)
|
||||
|
||||
// CRBApplicationFailoverControllerName is the controller name that will be used when reporting events.
|
||||
const CRBApplicationFailoverControllerName = "cluster-resource-binding-application-failover-controller"
|
||||
|
||||
// CRBApplicationFailoverController is to sync ClusterResourceBinding's application failover behavior.
|
||||
type CRBApplicationFailoverController struct {
|
||||
client.Client
|
||||
EventRecorder record.EventRecorder
|
||||
RateLimiterOptions ratelimiterflag.Options
|
||||
|
||||
// workloadUnhealthyMap records which clusters the specific resource is in an unhealthy state
|
||||
workloadUnhealthyMap *workloadUnhealthyMap
|
||||
ResourceInterpreter resourceinterpreter.ResourceInterpreter
|
||||
}
|
||||
|
||||
// Reconcile performs a full reconciliation for the object referred to by the Request.
|
||||
// The Controller will requeue the Request to be processed again if an error is non-nil or
|
||||
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
|
||||
func (c *CRBApplicationFailoverController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) {
|
||||
klog.V(4).Infof("Reconciling ClusterResourceBinding %s.", req.Name)
|
||||
|
||||
binding := &workv1alpha2.ClusterResourceBinding{}
|
||||
if err := c.Client.Get(ctx, req.NamespacedName, binding); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
c.workloadUnhealthyMap.delete(resource)
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
|
||||
if !c.clusterResourceBindingFilter(binding) {
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
c.workloadUnhealthyMap.delete(resource)
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
retryDuration, err := c.syncBinding(binding)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
if retryDuration > 0 {
|
||||
klog.V(4).Infof("Retry to check health status of the workload after %v minutes.", retryDuration.Minutes())
|
||||
return controllerruntime.Result{RequeueAfter: retryDuration}, nil
|
||||
}
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
func (c *CRBApplicationFailoverController) detectFailure(clusters []string, tolerationSeconds *int32, resource keys.ClusterWideKey) (int32, []string) {
|
||||
var needEvictClusters []string
|
||||
duration := int32(math.MaxInt32)
|
||||
|
||||
for _, cluster := range clusters {
|
||||
if !c.workloadUnhealthyMap.hasWorkloadBeenUnhealthy(resource, cluster) {
|
||||
c.workloadUnhealthyMap.setTimeStamp(resource, cluster)
|
||||
if duration > *tolerationSeconds {
|
||||
duration = *tolerationSeconds
|
||||
}
|
||||
continue
|
||||
}
|
||||
// When the workload in a cluster is in an unhealthy state for more than the tolerance time,
|
||||
// and the cluster has not been in the GracefulEvictionTasks,
|
||||
// the cluster will be added to the list that needs to be evicted.
|
||||
unHealthyTimeStamp := c.workloadUnhealthyMap.getTimeStamp(resource, cluster)
|
||||
timeNow := metav1.Now()
|
||||
if timeNow.After(unHealthyTimeStamp.Add(time.Duration(*tolerationSeconds) * time.Second)) {
|
||||
needEvictClusters = append(needEvictClusters, cluster)
|
||||
} else {
|
||||
if duration > *tolerationSeconds-int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) {
|
||||
duration = *tolerationSeconds - int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if duration == int32(math.MaxInt32) {
|
||||
duration = 0
|
||||
}
|
||||
return duration, needEvictClusters
|
||||
}
|
||||
|
||||
func (c *CRBApplicationFailoverController) syncBinding(binding *workv1alpha2.ClusterResourceBinding) (time.Duration, error) {
|
||||
tolerationSeconds := binding.Spec.Failover.Application.DecisionConditions.TolerationSeconds
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to get key from binding(%s)'s resource", binding.Name)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
allClusters := sets.New[string]()
|
||||
for _, cluster := range binding.Spec.Clusters {
|
||||
allClusters.Insert(cluster.Name)
|
||||
}
|
||||
|
||||
unhealthyClusters := filterIrrelevantClusters(binding.Status.AggregatedStatus, binding.Spec)
|
||||
duration, needEvictClusters := c.detectFailure(unhealthyClusters, tolerationSeconds, resource)
|
||||
|
||||
err = c.evictBinding(binding, needEvictClusters)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to evict binding(%s), err: %v.", binding.Name, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(needEvictClusters) != 0 {
|
||||
if err = c.updateBinding(binding, allClusters, needEvictClusters); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup clusters that have been evicted or removed in the workloadUnhealthyMap
|
||||
c.workloadUnhealthyMap.deleteIrrelevantClusters(resource, allClusters)
|
||||
|
||||
return time.Duration(duration) * time.Second, nil
|
||||
}
|
||||
|
||||
func (c *CRBApplicationFailoverController) evictBinding(binding *workv1alpha2.ClusterResourceBinding, clusters []string) error {
|
||||
for _, cluster := range clusters {
|
||||
switch binding.Spec.Failover.Application.PurgeMode {
|
||||
case policyv1alpha1.Graciously:
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(CRBApplicationFailoverControllerName), workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithGracePeriodSeconds(binding.Spec.Failover.Application.GracePeriodSeconds)))
|
||||
} else {
|
||||
err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Graciously)
|
||||
klog.Error(err)
|
||||
return err
|
||||
}
|
||||
case policyv1alpha1.Never:
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(CRBApplicationFailoverControllerName), workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithSuppressDeletion(pointer.Bool(true))))
|
||||
} else {
|
||||
err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never)
|
||||
klog.Error(err)
|
||||
return err
|
||||
}
|
||||
case policyv1alpha1.Immediately:
|
||||
binding.Spec.RemoveCluster(cluster)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *CRBApplicationFailoverController) updateBinding(binding *workv1alpha2.ClusterResourceBinding, allClusters sets.Set[string], needEvictClusters []string) error {
|
||||
if err := c.Update(context.TODO(), binding); err != nil {
|
||||
for _, cluster := range needEvictClusters {
|
||||
helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, err)
|
||||
}
|
||||
klog.ErrorS(err, "Failed to update binding", "binding", klog.KObj(binding))
|
||||
return err
|
||||
}
|
||||
for _, cluster := range needEvictClusters {
|
||||
allClusters.Delete(cluster)
|
||||
}
|
||||
if !features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
for _, cluster := range needEvictClusters {
|
||||
helper.EmitClusterEvictionEventForClusterResourceBinding(binding, cluster, c.EventRecorder, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupWithManager creates a controller and register to controller manager.
|
||||
func (c *CRBApplicationFailoverController) SetupWithManager(mgr controllerruntime.Manager) error {
|
||||
c.workloadUnhealthyMap = newWorkloadUnhealthyMap()
|
||||
clusterResourceBindingPredicateFn := predicate.Funcs{
|
||||
CreateFunc: func(createEvent event.CreateEvent) bool {
|
||||
obj := createEvent.Object.(*workv1alpha2.ClusterResourceBinding)
|
||||
if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
UpdateFunc: func(updateEvent event.UpdateEvent) bool {
|
||||
oldObj := updateEvent.ObjectOld.(*workv1alpha2.ClusterResourceBinding)
|
||||
newObj := updateEvent.ObjectNew.(*workv1alpha2.ClusterResourceBinding)
|
||||
if (oldObj.Spec.Failover == nil || oldObj.Spec.Failover.Application == nil) &&
|
||||
(newObj.Spec.Failover == nil || newObj.Spec.Failover.Application == nil) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
|
||||
obj := deleteEvent.Object.(*workv1alpha2.ClusterResourceBinding)
|
||||
if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
GenericFunc: func(genericEvent event.GenericEvent) bool { return false },
|
||||
}
|
||||
|
||||
return controllerruntime.NewControllerManagedBy(mgr).
|
||||
For(&workv1alpha2.ClusterResourceBinding{}, builder.WithPredicates(clusterResourceBindingPredicateFn)).
|
||||
WithOptions(controller.Options{RateLimiter: ratelimiterflag.DefaultControllerRateLimiter(c.RateLimiterOptions)}).
|
||||
Complete(c)
|
||||
}
|
||||
|
||||
func (c *CRBApplicationFailoverController) clusterResourceBindingFilter(crb *workv1alpha2.ClusterResourceBinding) bool {
|
||||
if crb.Spec.Failover == nil || crb.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(crb.Status.AggregatedStatus) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
resourceKey, err := helper.ConstructClusterWideKey(crb.Spec.Resource)
|
||||
if err != nil {
|
||||
// Never reach
|
||||
klog.Errorf("Failed to construct clusterWideKey from clusterResourceBinding(%s)", crb.Name)
|
||||
return false
|
||||
}
|
||||
|
||||
if !c.ResourceInterpreter.HookEnabled(resourceKey.GroupVersionKind(), configv1alpha1.InterpreterOperationInterpretHealth) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,260 @@
|
|||
package applicationfailover
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/utils/pointer"
|
||||
controllerruntime "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/builder"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/controller"
|
||||
"sigs.k8s.io/controller-runtime/pkg/event"
|
||||
"sigs.k8s.io/controller-runtime/pkg/predicate"
|
||||
|
||||
configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1"
|
||||
policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
"github.com/karmada-io/karmada/pkg/features"
|
||||
"github.com/karmada-io/karmada/pkg/resourceinterpreter"
|
||||
"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
|
||||
"github.com/karmada-io/karmada/pkg/util/fedinformer/keys"
|
||||
"github.com/karmada-io/karmada/pkg/util/helper"
|
||||
)
|
||||
|
||||
// RBApplicationFailoverControllerName is the controller name that will be used when reporting events.
|
||||
const RBApplicationFailoverControllerName = "resource-binding-application-failover-controller"
|
||||
|
||||
// RBApplicationFailoverController is to sync ResourceBinding's application failover behavior.
|
||||
type RBApplicationFailoverController struct {
|
||||
client.Client
|
||||
EventRecorder record.EventRecorder
|
||||
RateLimiterOptions ratelimiterflag.Options
|
||||
|
||||
// workloadUnhealthyMap records which clusters the specific resource is in an unhealthy state
|
||||
workloadUnhealthyMap *workloadUnhealthyMap
|
||||
ResourceInterpreter resourceinterpreter.ResourceInterpreter
|
||||
}
|
||||
|
||||
// Reconcile performs a full reconciliation for the object referred to by the Request.
|
||||
// The Controller will requeue the Request to be processed again if an error is non-nil or
|
||||
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
|
||||
func (c *RBApplicationFailoverController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) {
|
||||
klog.V(4).Infof("Reconciling ResourceBinding %s.", req.NamespacedName.String())
|
||||
|
||||
binding := &workv1alpha2.ResourceBinding{}
|
||||
if err := c.Client.Get(ctx, req.NamespacedName, binding); err != nil {
|
||||
if apierrors.IsNotFound(err) {
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
c.workloadUnhealthyMap.delete(resource)
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
|
||||
if !c.bindingFilter(binding) {
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
c.workloadUnhealthyMap.delete(resource)
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
retryDuration, err := c.syncBinding(binding)
|
||||
if err != nil {
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
if retryDuration > 0 {
|
||||
klog.V(4).Infof("Retry to check health status of the workload after %v minutes.", retryDuration.Minutes())
|
||||
return controllerruntime.Result{RequeueAfter: retryDuration}, nil
|
||||
}
|
||||
return controllerruntime.Result{}, nil
|
||||
}
|
||||
|
||||
func (c *RBApplicationFailoverController) detectFailure(clusters []string, tolerationSeconds *int32, resource keys.ClusterWideKey) (int32, []string) {
|
||||
var needEvictClusters []string
|
||||
duration := int32(math.MaxInt32)
|
||||
|
||||
for _, cluster := range clusters {
|
||||
if !c.workloadUnhealthyMap.hasWorkloadBeenUnhealthy(resource, cluster) {
|
||||
c.workloadUnhealthyMap.setTimeStamp(resource, cluster)
|
||||
if duration > *tolerationSeconds {
|
||||
duration = *tolerationSeconds
|
||||
}
|
||||
continue
|
||||
}
|
||||
// When the workload in a cluster is in an unhealthy state for more than the tolerance time,
|
||||
// and the cluster has not been in the GracefulEvictionTasks,
|
||||
// the cluster will be added to the list that needs to be evicted.
|
||||
unHealthyTimeStamp := c.workloadUnhealthyMap.getTimeStamp(resource, cluster)
|
||||
timeNow := metav1.Now()
|
||||
if timeNow.After(unHealthyTimeStamp.Add(time.Duration(*tolerationSeconds) * time.Second)) {
|
||||
needEvictClusters = append(needEvictClusters, cluster)
|
||||
} else {
|
||||
if duration > *tolerationSeconds-int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) {
|
||||
duration = *tolerationSeconds - int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if duration == int32(math.MaxInt32) {
|
||||
duration = 0
|
||||
}
|
||||
return duration, needEvictClusters
|
||||
}
|
||||
|
||||
func (c *RBApplicationFailoverController) syncBinding(binding *workv1alpha2.ResourceBinding) (time.Duration, error) {
|
||||
tolerationSeconds := binding.Spec.Failover.Application.DecisionConditions.TolerationSeconds
|
||||
resource, err := helper.ConstructClusterWideKey(binding.Spec.Resource)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to get key from binding(%s)'s resource", binding.Name)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
allClusters := sets.New[string]()
|
||||
for _, cluster := range binding.Spec.Clusters {
|
||||
allClusters.Insert(cluster.Name)
|
||||
}
|
||||
|
||||
unhealthyClusters := filterIrrelevantClusters(binding.Status.AggregatedStatus, binding.Spec)
|
||||
duration, needEvictClusters := c.detectFailure(unhealthyClusters, tolerationSeconds, resource)
|
||||
|
||||
err = c.evictBinding(binding, needEvictClusters)
|
||||
if err != nil {
|
||||
klog.Errorf("Failed to evict binding(%s/%s), err: %v.", binding.Namespace, binding.Name, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if len(needEvictClusters) != 0 {
|
||||
if err = c.updateBinding(binding, allClusters, needEvictClusters); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup clusters that have been evicted or removed in the workloadUnhealthyMap
|
||||
c.workloadUnhealthyMap.deleteIrrelevantClusters(resource, allClusters)
|
||||
|
||||
return time.Duration(duration) * time.Second, nil
|
||||
}
|
||||
|
||||
func (c *RBApplicationFailoverController) evictBinding(binding *workv1alpha2.ResourceBinding, clusters []string) error {
|
||||
for _, cluster := range clusters {
|
||||
switch binding.Spec.Failover.Application.PurgeMode {
|
||||
case policyv1alpha1.Graciously:
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName),
|
||||
workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithGracePeriodSeconds(binding.Spec.Failover.Application.GracePeriodSeconds)))
|
||||
} else {
|
||||
err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Graciously)
|
||||
klog.Error(err)
|
||||
return err
|
||||
}
|
||||
case policyv1alpha1.Never:
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName),
|
||||
workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithSuppressDeletion(pointer.Bool(true))))
|
||||
} else {
|
||||
err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never)
|
||||
klog.Error(err)
|
||||
return err
|
||||
}
|
||||
case policyv1alpha1.Immediately:
|
||||
binding.Spec.RemoveCluster(cluster)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *RBApplicationFailoverController) updateBinding(binding *workv1alpha2.ResourceBinding, allClusters sets.Set[string], needEvictClusters []string) error {
|
||||
if err := c.Update(context.TODO(), binding); err != nil {
|
||||
for _, cluster := range needEvictClusters {
|
||||
helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, err)
|
||||
}
|
||||
klog.ErrorS(err, "Failed to update binding", "binding", klog.KObj(binding))
|
||||
return err
|
||||
}
|
||||
for _, cluster := range needEvictClusters {
|
||||
allClusters.Delete(cluster)
|
||||
}
|
||||
if !features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
for _, cluster := range needEvictClusters {
|
||||
helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, nil)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetupWithManager creates a controller and register to controller manager.
|
||||
func (c *RBApplicationFailoverController) SetupWithManager(mgr controllerruntime.Manager) error {
|
||||
c.workloadUnhealthyMap = newWorkloadUnhealthyMap()
|
||||
resourceBindingPredicateFn := predicate.Funcs{
|
||||
CreateFunc: func(createEvent event.CreateEvent) bool {
|
||||
obj := createEvent.Object.(*workv1alpha2.ResourceBinding)
|
||||
if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
UpdateFunc: func(updateEvent event.UpdateEvent) bool {
|
||||
oldObj := updateEvent.ObjectOld.(*workv1alpha2.ResourceBinding)
|
||||
newObj := updateEvent.ObjectNew.(*workv1alpha2.ResourceBinding)
|
||||
if (oldObj.Spec.Failover == nil || oldObj.Spec.Failover.Application == nil) &&
|
||||
(newObj.Spec.Failover == nil || newObj.Spec.Failover.Application == nil) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
|
||||
obj := deleteEvent.Object.(*workv1alpha2.ResourceBinding)
|
||||
if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
},
|
||||
GenericFunc: func(genericEvent event.GenericEvent) bool { return false },
|
||||
}
|
||||
|
||||
return controllerruntime.NewControllerManagedBy(mgr).
|
||||
For(&workv1alpha2.ResourceBinding{}, builder.WithPredicates(resourceBindingPredicateFn)).
|
||||
WithOptions(controller.Options{RateLimiter: ratelimiterflag.DefaultControllerRateLimiter(c.RateLimiterOptions)}).
|
||||
Complete(c)
|
||||
}
|
||||
|
||||
func (c *RBApplicationFailoverController) bindingFilter(rb *workv1alpha2.ResourceBinding) bool {
|
||||
if rb.Spec.Failover == nil || rb.Spec.Failover.Application == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
if len(rb.Status.AggregatedStatus) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
resourceKey, err := helper.ConstructClusterWideKey(rb.Spec.Resource)
|
||||
if err != nil {
|
||||
// Never reach
|
||||
klog.Errorf("Failed to construct clusterWideKey from binding(%s/%s)", rb.Namespace, rb.Name)
|
||||
return false
|
||||
}
|
||||
|
||||
if !c.ResourceInterpreter.HookEnabled(resourceKey.GroupVersionKind(), configv1alpha1.InterpreterOperationInterpretHealth) {
|
||||
return false
|
||||
}
|
||||
|
||||
if !rb.Spec.PropagateDeps {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -154,7 +154,7 @@ func (tc *NoExecuteTaintManager) syncBindingEviction(key util.QueueKey) error {
|
|||
if needEviction || tolerationTime == 0 {
|
||||
// update final result to evict the target cluster
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.EvictionProducerTaintManager, workv1alpha2.EvictionReasonTaintUntolerated, "")
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(workv1alpha2.EvictionProducerTaintManager), workv1alpha2.WithReason(workv1alpha2.EvictionReasonTaintUntolerated)))
|
||||
} else {
|
||||
binding.Spec.RemoveCluster(cluster)
|
||||
}
|
||||
|
@ -206,7 +206,7 @@ func (tc *NoExecuteTaintManager) syncClusterBindingEviction(key util.QueueKey) e
|
|||
if needEviction || tolerationTime == 0 {
|
||||
// update final result to evict the target cluster
|
||||
if features.FeatureGate.Enabled(features.GracefulEviction) {
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.EvictionProducerTaintManager, workv1alpha2.EvictionReasonTaintUntolerated, "")
|
||||
binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(workv1alpha2.EvictionProducerTaintManager), workv1alpha2.WithReason(workv1alpha2.EvictionReasonTaintUntolerated)))
|
||||
} else {
|
||||
binding.Spec.RemoveCluster(cluster)
|
||||
}
|
||||
|
|
|
@ -129,6 +129,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
|
|||
"github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ResourceBindingSpec": schema_pkg_apis_work_v1alpha2_ResourceBindingSpec(ref),
|
||||
"github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.ResourceBindingStatus": schema_pkg_apis_work_v1alpha2_ResourceBindingStatus(ref),
|
||||
"github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.TargetCluster": schema_pkg_apis_work_v1alpha2_TargetCluster(ref),
|
||||
"github.com/karmada-io/karmada/pkg/apis/work/v1alpha2.TaskOptions": schema_pkg_apis_work_v1alpha2_TaskOptions(ref),
|
||||
"k8s.io/api/admissionregistration/v1.MutatingWebhook": schema_k8sio_api_admissionregistration_v1_MutatingWebhook(ref),
|
||||
"k8s.io/api/admissionregistration/v1.MutatingWebhookConfiguration": schema_k8sio_api_admissionregistration_v1_MutatingWebhookConfiguration(ref),
|
||||
"k8s.io/api/admissionregistration/v1.MutatingWebhookConfigurationList": schema_k8sio_api_admissionregistration_v1_MutatingWebhookConfigurationList(ref),
|
||||
|
@ -5574,6 +5575,53 @@ func schema_pkg_apis_work_v1alpha2_TargetCluster(ref common.ReferenceCallback) c
|
|||
}
|
||||
}
|
||||
|
||||
func schema_pkg_apis_work_v1alpha2_TaskOptions(ref common.ReferenceCallback) common.OpenAPIDefinition {
|
||||
return common.OpenAPIDefinition{
|
||||
Schema: spec.Schema{
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Description: "TaskOptions represents options for GracefulEvictionTasks.",
|
||||
Type: []string{"object"},
|
||||
Properties: map[string]spec.Schema{
|
||||
"producer": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Default: "",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"reason": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Default: "",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"message": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Default: "",
|
||||
Type: []string{"string"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
"gracePeriodSeconds": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"integer"},
|
||||
Format: "int32",
|
||||
},
|
||||
},
|
||||
"suppressDeletion": {
|
||||
SchemaProps: spec.SchemaProps{
|
||||
Type: []string{"boolean"},
|
||||
Format: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
Required: []string{"producer", "reason", "message", "gracePeriodSeconds", "suppressDeletion"},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func schema_k8sio_api_admissionregistration_v1_MutatingWebhook(ref common.ReferenceCallback) common.OpenAPIDefinition {
|
||||
return common.OpenAPIDefinition{
|
||||
Schema: spec.Schema{
|
||||
|
|
|
@ -8,7 +8,6 @@ import (
|
|||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
"github.com/karmada-io/karmada/pkg/scheduler/framework"
|
||||
"github.com/karmada-io/karmada/pkg/util/helper"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -33,7 +32,7 @@ func (p *ClusterEviction) Name() string {
|
|||
|
||||
// Filter checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction.
|
||||
func (p *ClusterEviction) Filter(_ context.Context, bindingSpec *workv1alpha2.ResourceBindingSpec, _ *workv1alpha2.ResourceBindingStatus, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
||||
if helper.ClusterInGracefulEvictionTasks(bindingSpec.GracefulEvictionTasks, cluster.Name) {
|
||||
if bindingSpec.ClusterInGracefulEvictionTasks(cluster.Name) {
|
||||
klog.V(2).Infof("Cluster(%s) is in the process of eviction.", cluster.Name)
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) is in the process of eviction")
|
||||
}
|
||||
|
|
|
@ -2,7 +2,6 @@ package helper
|
|||
|
||||
import (
|
||||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
)
|
||||
|
||||
// IsAPIEnabled checks if target API (or CRD) referencing by groupVersion and kind has been installed.
|
||||
|
@ -22,14 +21,3 @@ func IsAPIEnabled(APIEnablements []clusterv1alpha1.APIEnablement, groupVersion s
|
|||
|
||||
return false
|
||||
}
|
||||
|
||||
// ClusterInGracefulEvictionTasks checks if the target cluster is in the GracefulEvictionTasks which means it is in the process of eviction.
|
||||
func ClusterInGracefulEvictionTasks(gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask, clusterName string) bool {
|
||||
for _, task := range gracefulEvictionTasks {
|
||||
if task.FromCluster == clusterName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"testing"
|
||||
|
||||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
||||
workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
|
||||
)
|
||||
|
||||
func TestIsAPIEnabled(t *testing.T) {
|
||||
|
@ -73,48 +72,3 @@ func TestIsAPIEnabled(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterInGracefulEvictionTasks(t *testing.T) {
|
||||
gracefulEvictionTasks := []workv1alpha2.GracefulEvictionTask{
|
||||
{
|
||||
FromCluster: ClusterMember1,
|
||||
Producer: workv1alpha2.EvictionProducerTaintManager,
|
||||
Reason: workv1alpha2.EvictionReasonTaintUntolerated,
|
||||
},
|
||||
{
|
||||
FromCluster: ClusterMember2,
|
||||
Producer: workv1alpha2.EvictionProducerTaintManager,
|
||||
Reason: workv1alpha2.EvictionReasonTaintUntolerated,
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
gracefulEvictionTasks []workv1alpha2.GracefulEvictionTask
|
||||
targetCluster string
|
||||
expect bool
|
||||
}{
|
||||
{
|
||||
name: "targetCluster is in the process of eviction",
|
||||
gracefulEvictionTasks: gracefulEvictionTasks,
|
||||
targetCluster: ClusterMember1,
|
||||
expect: true,
|
||||
},
|
||||
{
|
||||
name: "targetCluster is not in the process of eviction",
|
||||
gracefulEvictionTasks: gracefulEvictionTasks,
|
||||
targetCluster: ClusterMember3,
|
||||
expect: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
tc := test
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
result := ClusterInGracefulEvictionTasks(tc.gracefulEvictionTasks, tc.targetCluster)
|
||||
if result != tc.expect {
|
||||
t.Errorf("expected: %v, but got: %v", tc.expect, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue