karmada/pkg/controllers/applicationfailover/rb_application_failover_con...

package applicationfailover

import (
	"context"
	"fmt"
	"math"
	"time"

	apierrors "k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/types"
	"k8s.io/apimachinery/pkg/util/sets"
	"k8s.io/client-go/tools/record"
	"k8s.io/klog/v2"
	"k8s.io/utils/pointer"
	controllerruntime "sigs.k8s.io/controller-runtime"
	"sigs.k8s.io/controller-runtime/pkg/builder"
	"sigs.k8s.io/controller-runtime/pkg/client"
	"sigs.k8s.io/controller-runtime/pkg/controller"
	"sigs.k8s.io/controller-runtime/pkg/event"
	"sigs.k8s.io/controller-runtime/pkg/predicate"

	configv1alpha1 "github.com/karmada-io/karmada/pkg/apis/config/v1alpha1"
	policyv1alpha1 "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1"
	workv1alpha2 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha2"
	"github.com/karmada-io/karmada/pkg/features"
	"github.com/karmada-io/karmada/pkg/resourceinterpreter"
	"github.com/karmada-io/karmada/pkg/sharedcli/ratelimiterflag"
	"github.com/karmada-io/karmada/pkg/util/helper"
)

// RBApplicationFailoverControllerName is the controller name that will be used when reporting events.
const RBApplicationFailoverControllerName = "resource-binding-application-failover-controller"

// RBApplicationFailoverController is to sync ResourceBinding's application failover behavior.
type RBApplicationFailoverController struct {
	client.Client
	EventRecorder      record.EventRecorder
	RateLimiterOptions ratelimiterflag.Options

	// workloadUnhealthyMap records which clusters the specific resource is in an unhealthy state
	workloadUnhealthyMap *workloadUnhealthyMap
	ResourceInterpreter  resourceinterpreter.ResourceInterpreter
}

// Reconcile performs a full reconciliation for the object referred to by the Request.
// The Controller will requeue the Request to be processed again if an error is non-nil or
// Result.Requeue is true, otherwise upon completion it will remove the work from the queue.
func (c *RBApplicationFailoverController) Reconcile(ctx context.Context, req controllerruntime.Request) (controllerruntime.Result, error) {
	klog.V(4).Infof("Reconciling ResourceBinding %s.", req.NamespacedName.String())

	binding := &workv1alpha2.ResourceBinding{}
	if err := c.Client.Get(ctx, req.NamespacedName, binding); err != nil {
		if apierrors.IsNotFound(err) {
			c.workloadUnhealthyMap.delete(req.NamespacedName)
			return controllerruntime.Result{}, nil
		}
		return controllerruntime.Result{Requeue: true}, err
	}

	if !c.bindingFilter(binding) {
		c.workloadUnhealthyMap.delete(req.NamespacedName)
		return controllerruntime.Result{}, nil
	}

	retryDuration, err := c.syncBinding(binding)
	if err != nil {
		return controllerruntime.Result{Requeue: true}, err
	}
	if retryDuration > 0 {
		klog.V(4).Infof("Retry to check health status of the workload after %v minutes.", retryDuration.Minutes())
		return controllerruntime.Result{RequeueAfter: retryDuration}, nil
	}
	return controllerruntime.Result{}, nil
}

func (c *RBApplicationFailoverController) detectFailure(clusters []string, tolerationSeconds *int32, key types.NamespacedName) (int32, []string) {
	var needEvictClusters []string
	duration := int32(math.MaxInt32)

	for _, cluster := range clusters {
		if !c.workloadUnhealthyMap.hasWorkloadBeenUnhealthy(key, cluster) {
			c.workloadUnhealthyMap.setTimeStamp(key, cluster)
			if duration > *tolerationSeconds {
				duration = *tolerationSeconds
			}
			continue
		}
		// When the workload in a cluster is in an unhealthy state for more than the tolerance time,
		// and the cluster has not been in the GracefulEvictionTasks,
		// the cluster will be added to the list that needs to be evicted.
		unHealthyTimeStamp := c.workloadUnhealthyMap.getTimeStamp(key, cluster)
		timeNow := metav1.Now()
		if timeNow.After(unHealthyTimeStamp.Add(time.Duration(*tolerationSeconds) * time.Second)) {
			needEvictClusters = append(needEvictClusters, cluster)
		} else {
			if duration > *tolerationSeconds-int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds()) {
				duration = *tolerationSeconds - int32(timeNow.Sub(unHealthyTimeStamp.Time).Seconds())
			}
		}
	}

	if duration == int32(math.MaxInt32) {
		duration = 0
	}
	return duration, needEvictClusters
}

func (c *RBApplicationFailoverController) syncBinding(binding *workv1alpha2.ResourceBinding) (time.Duration, error) {
	key := types.NamespacedName{Name: binding.Name, Namespace: binding.Namespace}
	tolerationSeconds := binding.Spec.Failover.Application.DecisionConditions.TolerationSeconds

	allClusters := sets.New[string]()
	for _, cluster := range binding.Spec.Clusters {
		allClusters.Insert(cluster.Name)
	}

	unhealthyClusters, others := distinguishUnhealthyClustersWithOthers(binding.Status.AggregatedStatus, binding.Spec)
	duration, needEvictClusters := c.detectFailure(unhealthyClusters, tolerationSeconds, key)

	err := c.evictBinding(binding, needEvictClusters)
	if err != nil {
		klog.Errorf("Failed to evict binding(%s/%s), err: %v.", binding.Namespace, binding.Name, err)
		return 0, err
	}

	if len(needEvictClusters) != 0 {
		if err = c.updateBinding(binding, allClusters, needEvictClusters); err != nil {
			return 0, err
		}
	}

	// Cleanup clusters on which the application status is not unhealthy and clusters that have been evicted or removed in the workloadUnhealthyMap.
	c.workloadUnhealthyMap.deleteIrrelevantClusters(key, allClusters, others)

	return time.Duration(duration) * time.Second, nil
}

func (c *RBApplicationFailoverController) evictBinding(binding *workv1alpha2.ResourceBinding, clusters []string) error {
	for _, cluster := range clusters {
		switch binding.Spec.Failover.Application.PurgeMode {
		case policyv1alpha1.Graciously:
			if features.FeatureGate.Enabled(features.GracefulEviction) {
				binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName),
					workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithGracePeriodSeconds(binding.Spec.Failover.Application.GracePeriodSeconds)))
			} else {
				err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Graciously)
				klog.Error(err)
				return err
			}
		case policyv1alpha1.Never:
			if features.FeatureGate.Enabled(features.GracefulEviction) {
				binding.Spec.GracefulEvictCluster(cluster, workv1alpha2.NewTaskOptions(workv1alpha2.WithProducer(RBApplicationFailoverControllerName),
					workv1alpha2.WithReason(workv1alpha2.EvictionReasonApplicationFailure), workv1alpha2.WithSuppressDeletion(pointer.Bool(true))))
			} else {
				err := fmt.Errorf("GracefulEviction featureGate must be enabled when purgeMode is %s", policyv1alpha1.Never)
				klog.Error(err)
				return err
			}
		case policyv1alpha1.Immediately:
			binding.Spec.RemoveCluster(cluster)
		}
	}

	return nil
}

func (c *RBApplicationFailoverController) updateBinding(binding *workv1alpha2.ResourceBinding, allClusters sets.Set[string], needEvictClusters []string) error {
	if err := c.Update(context.TODO(), binding); err != nil {
		for _, cluster := range needEvictClusters {
			helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, err)
		}
		klog.ErrorS(err, "Failed to update binding", "binding", klog.KObj(binding))
		return err
	}
	for _, cluster := range needEvictClusters {
		allClusters.Delete(cluster)
	}
	if !features.FeatureGate.Enabled(features.GracefulEviction) {
		for _, cluster := range needEvictClusters {
			helper.EmitClusterEvictionEventForResourceBinding(binding, cluster, c.EventRecorder, nil)
		}
	}

	return nil
}

// SetupWithManager creates a controller and register to controller manager.
func (c *RBApplicationFailoverController) SetupWithManager(mgr controllerruntime.Manager) error {
	c.workloadUnhealthyMap = newWorkloadUnhealthyMap()
	resourceBindingPredicateFn := predicate.Funcs{
		CreateFunc: func(createEvent event.CreateEvent) bool {
			obj := createEvent.Object.(*workv1alpha2.ResourceBinding)
			if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
				return false
			}
			return true
		},
		UpdateFunc: func(updateEvent event.UpdateEvent) bool {
			oldObj := updateEvent.ObjectOld.(*workv1alpha2.ResourceBinding)
			newObj := updateEvent.ObjectNew.(*workv1alpha2.ResourceBinding)
			if (oldObj.Spec.Failover == nil || oldObj.Spec.Failover.Application == nil) &&
				(newObj.Spec.Failover == nil || newObj.Spec.Failover.Application == nil) {
				return false
			}
			return true
		},
		DeleteFunc: func(deleteEvent event.DeleteEvent) bool {
			obj := deleteEvent.Object.(*workv1alpha2.ResourceBinding)
			if obj.Spec.Failover == nil || obj.Spec.Failover.Application == nil {
				return false
			}
			return true
		},
		GenericFunc: func(genericEvent event.GenericEvent) bool { return false },
	}

	return controllerruntime.NewControllerManagedBy(mgr).
		For(&workv1alpha2.ResourceBinding{}, builder.WithPredicates(resourceBindingPredicateFn)).
		WithOptions(controller.Options{RateLimiter: ratelimiterflag.DefaultControllerRateLimiter(c.RateLimiterOptions)}).
		Complete(c)
}

func (c *RBApplicationFailoverController) bindingFilter(rb *workv1alpha2.ResourceBinding) bool {
	if rb.Spec.Failover == nil || rb.Spec.Failover.Application == nil {
		return false
	}

	if len(rb.Status.AggregatedStatus) == 0 {
		return false
	}

	resourceKey, err := helper.ConstructClusterWideKey(rb.Spec.Resource)
	if err != nil {
		// Never reach
		klog.Errorf("Failed to construct clusterWideKey from binding(%s/%s)", rb.Namespace, rb.Name)
		return false
	}

	if !c.ResourceInterpreter.HookEnabled(resourceKey.GroupVersionKind(), configv1alpha1.InterpreterOperationInterpretHealth) {
		return false
	}

	if !rb.Spec.PropagateDeps {
		return false
	}
	return true
}