Merge pull request #1854 from likakuli/feature_notreadytaint

feat: use taint instead of condition to filter cluster
This commit is contained in:
karmada-bot 2022-07-22 16:57:48 +08:00 committed by GitHub
commit 02836f928d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 5 deletions

View File

@ -6,4 +6,6 @@ const (
EventReasonCreateExecutionSpaceFailed = "CreateExecutionSpaceFailed"
// EventReasonRemoveExecutionSpaceFailed indicates that remove execution space failed.
EventReasonRemoveExecutionSpaceFailed = "RemoveExecutionSpaceFailed"
// EventReasonTaintClusterByConditionFailed indicates that taint cluster by condition
EventReasonTaintClusterByConditionFailed = "TaintClusterByCondition"
)

View File

@ -40,17 +40,32 @@ const (
var (
// UnreachableTaintTemplate is the taint for when a cluster becomes unreachable.
// Used for taint based eviction.
UnreachableTaintTemplate = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterUnreachable,
Effect: corev1.TaintEffectNoExecute,
}
// NotReadyTaintTemplate is the taint for when a cluster is not ready for
// executing resources.
// UnreachableTaintTemplateForSched is the taint for when a cluster becomes unreachable.
// Used for taint based schedule.
UnreachableTaintTemplateForSched = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterUnreachable,
Effect: corev1.TaintEffectNoSchedule,
}
// NotReadyTaintTemplate is the taint for when a cluster is not ready for executing resources.
// Used for taint based eviction.
NotReadyTaintTemplate = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterNotReady,
Effect: corev1.TaintEffectNoExecute,
}
// NotReadyTaintTemplateForSched is the taint for when a cluster is not ready for executing resources.
// Used for taint based schedule.
NotReadyTaintTemplateForSched = &corev1.Taint{
Key: clusterv1alpha1.TaintClusterNotReady,
Effect: corev1.TaintEffectNoSchedule,
}
)
// Controller is to sync Cluster.
@ -145,7 +160,7 @@ func (c *Controller) Reconcile(ctx context.Context, req controllerruntime.Reques
return c.removeCluster(cluster)
}
return c.syncCluster(cluster)
return c.syncCluster(ctx, cluster)
}
// Start starts an asynchronous loop that monitors the status of cluster.
@ -173,7 +188,7 @@ func (c *Controller) SetupWithManager(mgr controllerruntime.Manager) error {
})
}
func (c *Controller) syncCluster(cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
func (c *Controller) syncCluster(ctx context.Context, cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
// create execution space
err := c.createExecutionSpace(cluster)
if err != nil {
@ -181,6 +196,13 @@ func (c *Controller) syncCluster(cluster *clusterv1alpha1.Cluster) (controllerru
return controllerruntime.Result{Requeue: true}, err
}
// taint cluster by condition
err = c.taintClusterByCondition(ctx, cluster)
if err != nil {
c.EventRecorder.Event(cluster, corev1.EventTypeWarning, fmt.Sprintf("Failed %s", clusterv1alpha1.EventReasonTaintClusterByConditionFailed), err.Error())
return controllerruntime.Result{Requeue: true}, err
}
// ensure finalizer
return c.ensureFinalizer(cluster)
}
@ -516,3 +538,28 @@ func (c *Controller) processTaintBaseEviction(ctx context.Context, cluster *clus
}
return nil
}
func (c *Controller) taintClusterByCondition(ctx context.Context, cluster *clusterv1alpha1.Cluster) error {
currentReadyCondition := meta.FindStatusCondition(cluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady)
if currentReadyCondition != nil {
switch currentReadyCondition.Status {
case metav1.ConditionFalse:
// Add NotReadyTaintTemplateForSched taint immediately.
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{NotReadyTaintTemplateForSched}, []*corev1.Taint{UnreachableTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to instantly update UnreachableTaintForSched to NotReadyTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
}
case metav1.ConditionUnknown:
// Add UnreachableTaintTemplateForSched taint immediately.
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{UnreachableTaintTemplateForSched}, []*corev1.Taint{NotReadyTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to instantly swap NotReadyTaintForSched to UnreachableTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
}
case metav1.ConditionTrue:
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, nil, []*corev1.Taint{NotReadyTaintTemplateForSched, UnreachableTaintTemplateForSched}, cluster); err != nil {
klog.ErrorS(err, "Failed to remove schedule taints from cluster, will retry in next iteration.", "cluster", cluster.Name)
}
}
}
return nil
}

View File

@ -101,7 +101,8 @@ func (g *genericScheduler) findClustersThatFit(
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
var out []*clusterv1alpha1.Cluster
clusters := clusterInfo.GetReadyClusters()
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
clusters := clusterInfo.GetClusters()
for _, c := range clusters {
if result := fwk.RunFilterPlugins(ctx, placement, resource, c.Cluster()); !result.IsSuccess() {
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())