Merge pull request #6061 from chaosi-zju/ensureWork

enhance ensureWork to continue processing clusters after individual failures
This commit is contained in:
karmada-bot 2025-01-24 11:50:52 +08:00 committed by GitHub
commit d24b2b9d08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 33 additions and 20 deletions

View File

@ -23,6 +23,7 @@ import (
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
@ -45,19 +46,8 @@ func ensureWork(
ctx context.Context, c client.Client, resourceInterpreter resourceinterpreter.ResourceInterpreter, workload *unstructured.Unstructured,
overrideManager overridemanager.OverrideManager, binding metav1.Object, scope apiextensionsv1.ResourceScope,
) error {
var targetClusters []workv1alpha2.TargetCluster
var bindingSpec workv1alpha2.ResourceBindingSpec
switch scope {
case apiextensionsv1.NamespaceScoped:
bindingObj := binding.(*workv1alpha2.ResourceBinding)
bindingSpec = bindingObj.Spec
case apiextensionsv1.ClusterScoped:
bindingObj := binding.(*workv1alpha2.ClusterResourceBinding)
bindingSpec = bindingObj.Spec
}
targetClusters = bindingSpec.Clusters
targetClusters = mergeTargetClusters(targetClusters, bindingSpec.RequiredBy)
bindingSpec := getBindingSpec(binding, scope)
targetClusters := mergeTargetClusters(bindingSpec.Clusters, bindingSpec.RequiredBy)
var jobCompletions []workv1alpha2.TargetCluster
var err error
@ -68,6 +58,7 @@ func ensureWork(
}
}
var errs []error
for i := range targetClusters {
targetCluster := targetClusters[i]
clonedWorkload := workload.DeepCopy()
@ -82,7 +73,8 @@ func ensureWork(
if err != nil {
klog.Errorf("Failed to revise replica for %s/%s/%s in cluster %s, err is: %v",
workload.GetKind(), workload.GetNamespace(), workload.GetName(), targetCluster.Name, err)
return err
errs = append(errs, err)
continue
}
}
@ -94,7 +86,8 @@ func ensureWork(
if err = helper.ApplyReplica(clonedWorkload, int64(jobCompletions[i].Replicas), util.CompletionsField); err != nil {
klog.Errorf("Failed to apply Completions for %s/%s/%s in cluster %s, err is: %v",
clonedWorkload.GetKind(), clonedWorkload.GetNamespace(), clonedWorkload.GetName(), targetCluster.Name, err)
return err
errs = append(errs, err)
continue
}
}
}
@ -102,8 +95,10 @@ func ensureWork(
// We should call ApplyOverridePolicies last, as override rules have the highest priority
cops, ops, err := overrideManager.ApplyOverridePolicies(clonedWorkload, targetCluster.Name)
if err != nil {
klog.Errorf("Failed to apply overrides for %s/%s/%s, err is: %v", clonedWorkload.GetKind(), clonedWorkload.GetNamespace(), clonedWorkload.GetName(), err)
return err
klog.Errorf("Failed to apply overrides for %s/%s/%s in cluster %s, err is: %v",
clonedWorkload.GetKind(), clonedWorkload.GetNamespace(), clonedWorkload.GetName(), targetCluster.Name, err)
errs = append(errs, err)
continue
}
workLabel := mergeLabel(clonedWorkload, binding, scope)
@ -111,8 +106,9 @@ func ensureWork(
annotations = mergeConflictResolution(clonedWorkload, bindingSpec.ConflictResolution, annotations)
annotations, err = RecordAppliedOverrides(cops, ops, annotations)
if err != nil {
klog.Errorf("Failed to record appliedOverrides, Error: %v", err)
return err
klog.Errorf("Failed to record appliedOverrides in cluster %s, Error: %v", targetCluster.Name, err)
errs = append(errs, err)
continue
}
if features.FeatureGate.Enabled(features.StatefulFailoverInjection) {
@ -137,12 +133,29 @@ func ensureWork(
ctrlutil.WithSuspendDispatching(shouldSuspendDispatching(bindingSpec.Suspension, targetCluster)),
ctrlutil.WithPreserveResourcesOnDeletion(ptr.Deref(bindingSpec.PreserveResourcesOnDeletion, false)),
); err != nil {
return err
errs = append(errs, err)
continue
}
}
if len(errs) > 0 {
return errors.NewAggregate(errs)
}
return nil
}
func getBindingSpec(binding metav1.Object, scope apiextensionsv1.ResourceScope) workv1alpha2.ResourceBindingSpec {
var bindingSpec workv1alpha2.ResourceBindingSpec
switch scope {
case apiextensionsv1.NamespaceScoped:
bindingObj := binding.(*workv1alpha2.ResourceBinding)
bindingSpec = bindingObj.Spec
case apiextensionsv1.ClusterScoped:
bindingObj := binding.(*workv1alpha2.ClusterResourceBinding)
bindingSpec = bindingObj.Spec
}
return bindingSpec
}
// injectReservedLabelState injects the reservedLabelState in to the failover to cluster.
// We have the following restrictions on whether to perform injection operations:
// 1. Only the scenario where an application is deployed in one cluster and migrated to