feat: taint cluster by condition and filter cluster by taint in scheduler
Signed-off-by: likakuli <1154584512@qq.com>
This commit is contained in:
parent
7e6c38a6e7
commit
a42c819c10
|
@ -6,4 +6,6 @@ const (
|
|||
EventReasonCreateExecutionSpaceFailed = "CreateExecutionSpaceFailed"
|
||||
// EventReasonRemoveExecutionSpaceFailed indicates that remove execution space failed.
|
||||
EventReasonRemoveExecutionSpaceFailed = "RemoveExecutionSpaceFailed"
|
||||
// EventReasonTaintClusterByConditionFailed indicates that taint cluster by condition
|
||||
EventReasonTaintClusterByConditionFailed = "TaintClusterByCondition"
|
||||
)
|
||||
|
|
|
@ -39,17 +39,32 @@ const (
|
|||
|
||||
var (
|
||||
// UnreachableTaintTemplate is the taint for when a cluster becomes unreachable.
|
||||
// Used for taint based eviction.
|
||||
UnreachableTaintTemplate = &corev1.Taint{
|
||||
Key: clusterv1alpha1.TaintClusterUnreachable,
|
||||
Effect: corev1.TaintEffectNoExecute,
|
||||
}
|
||||
|
||||
// NotReadyTaintTemplate is the taint for when a cluster is not ready for
|
||||
// executing resources.
|
||||
// UnreachableTaintTemplateForSched is the taint for when a cluster becomes unreachable.
|
||||
// Used for taint based schedule.
|
||||
UnreachableTaintTemplateForSched = &corev1.Taint{
|
||||
Key: clusterv1alpha1.TaintClusterUnreachable,
|
||||
Effect: corev1.TaintEffectNoSchedule,
|
||||
}
|
||||
|
||||
// NotReadyTaintTemplate is the taint for when a cluster is not ready for executing resources.
|
||||
// Used for taint based eviction.
|
||||
NotReadyTaintTemplate = &corev1.Taint{
|
||||
Key: clusterv1alpha1.TaintClusterNotReady,
|
||||
Effect: corev1.TaintEffectNoExecute,
|
||||
}
|
||||
|
||||
// NotReadyTaintTemplateForSched is the taint for when a cluster is not ready for executing resources.
|
||||
// Used for taint based schedule.
|
||||
NotReadyTaintTemplateForSched = &corev1.Taint{
|
||||
Key: clusterv1alpha1.TaintClusterNotReady,
|
||||
Effect: corev1.TaintEffectNoSchedule,
|
||||
}
|
||||
)
|
||||
|
||||
// Controller is to sync Cluster.
|
||||
|
@ -144,7 +159,7 @@ func (c *Controller) Reconcile(ctx context.Context, req controllerruntime.Reques
|
|||
return c.removeCluster(cluster)
|
||||
}
|
||||
|
||||
return c.syncCluster(cluster)
|
||||
return c.syncCluster(ctx, cluster)
|
||||
}
|
||||
|
||||
// Start starts an asynchronous loop that monitors the status of cluster.
|
||||
|
@ -172,7 +187,7 @@ func (c *Controller) SetupWithManager(mgr controllerruntime.Manager) error {
|
|||
})
|
||||
}
|
||||
|
||||
func (c *Controller) syncCluster(cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
|
||||
func (c *Controller) syncCluster(ctx context.Context, cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) {
|
||||
// create execution space
|
||||
err := c.createExecutionSpace(cluster)
|
||||
if err != nil {
|
||||
|
@ -180,6 +195,13 @@ func (c *Controller) syncCluster(cluster *clusterv1alpha1.Cluster) (controllerru
|
|||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
|
||||
// taint cluster by condition
|
||||
err = c.taintClusterByCondition(ctx, cluster)
|
||||
if err != nil {
|
||||
c.EventRecorder.Event(cluster, corev1.EventTypeWarning, fmt.Sprintf("Failed %s", clusterv1alpha1.EventReasonTaintClusterByConditionFailed), err.Error())
|
||||
return controllerruntime.Result{Requeue: true}, err
|
||||
}
|
||||
|
||||
// ensure finalizer
|
||||
return c.ensureFinalizer(cluster)
|
||||
}
|
||||
|
@ -511,3 +533,28 @@ func (c *Controller) processTaintBaseEviction(ctx context.Context, cluster *clus
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Controller) taintClusterByCondition(ctx context.Context, cluster *clusterv1alpha1.Cluster) error {
|
||||
currentReadyCondition := meta.FindStatusCondition(cluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady)
|
||||
|
||||
if currentReadyCondition != nil {
|
||||
switch currentReadyCondition.Status {
|
||||
case metav1.ConditionFalse:
|
||||
// Add NotReadyTaintTemplateForSched taint immediately.
|
||||
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{NotReadyTaintTemplateForSched}, []*corev1.Taint{UnreachableTaintTemplateForSched}, cluster); err != nil {
|
||||
klog.ErrorS(err, "Failed to instantly update UnreachableTaintForSched to NotReadyTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
|
||||
}
|
||||
case metav1.ConditionUnknown:
|
||||
// Add UnreachableTaintTemplateForSched taint immediately.
|
||||
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, []*corev1.Taint{UnreachableTaintTemplateForSched}, []*corev1.Taint{NotReadyTaintTemplateForSched}, cluster); err != nil {
|
||||
klog.ErrorS(err, "Failed to instantly swap NotReadyTaintForSched to UnreachableTaintForSched, will try again in the next cycle.", "cluster", cluster.Name)
|
||||
}
|
||||
case metav1.ConditionTrue:
|
||||
if err := utilhelper.UpdateClusterControllerTaint(ctx, c.Client, nil, []*corev1.Taint{NotReadyTaintTemplateForSched, UnreachableTaintTemplateForSched}, cluster); err != nil {
|
||||
klog.ErrorS(err, "Failed to remove schedule taints from cluster, will retry in next iteration.", "cluster", cluster.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -93,7 +93,8 @@ func (g *genericScheduler) findClustersThatFit(
|
|||
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
|
||||
|
||||
var out []*clusterv1alpha1.Cluster
|
||||
clusters := clusterInfo.GetReadyClusters()
|
||||
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
|
||||
clusters := clusterInfo.GetClusters()
|
||||
for _, c := range clusters {
|
||||
if result := fwk.RunFilterPlugins(ctx, placement, resource, c.Cluster()); !result.IsSuccess() {
|
||||
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
|
||||
|
|
Loading…
Reference in New Issue