add scheduling diagnosis

Signed-off-by: Garrybest <garrybest@foxmail.com>
This commit is contained in:
Garrybest 2022-08-01 23:12:53 +08:00
parent 25af090405
commit b19cdb1031
7 changed files with 69 additions and 11 deletions

View File

@ -58,12 +58,16 @@ func (g *genericScheduler) Schedule(ctx context.Context, placement *policyv1alph
return result, fmt.Errorf("no clusters available to schedule")
}
feasibleClusters, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
feasibleClusters, diagnosis, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
if err != nil {
return result, fmt.Errorf("failed to findClustersThatFit: %v", err)
}
if len(feasibleClusters) == 0 {
return result, fmt.Errorf("no clusters fit")
return result, &framework.FitError{
NumAllClusters: clusterInfoSnapshot.NumOfClusters(),
Diagnosis: diagnosis,
}
}
klog.V(4).Infof("feasible clusters found: %v", feasibleClusters)
@ -97,21 +101,27 @@ func (g *genericScheduler) findClustersThatFit(
fwk framework.Framework,
placement *policyv1alpha1.Placement,
bindingSpec *workv1alpha2.ResourceBindingSpec,
clusterInfo *cache.Snapshot) ([]*clusterv1alpha1.Cluster, error) {
clusterInfo *cache.Snapshot,
) ([]*clusterv1alpha1.Cluster, framework.Diagnosis, error) {
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
diagnosis := framework.Diagnosis{
ClusterToResultMap: make(framework.ClusterToResultMap),
}
var out []*clusterv1alpha1.Cluster
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
clusters := clusterInfo.GetClusters()
for _, c := range clusters {
if result := fwk.RunFilterPlugins(ctx, placement, bindingSpec, c.Cluster()); !result.IsSuccess() {
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
diagnosis.ClusterToResultMap[c.Cluster().Name] = result
} else {
out = append(out, c.Cluster())
}
}
return out, nil
return out, diagnosis, nil
}
// prioritizeClusters prioritize the clusters by running the score plugins.

View File

@ -126,6 +126,11 @@ func (s *Result) AsError() error {
return errors.New(strings.Join(s.reasons, ", "))
}
// Reasons returns reasons of the Result.
func (s *Result) Reasons() []string {
return s.reasons
}
// ScorePlugin is an interface that must be implemented by "Score" plugins to rank
// clusters that passed the filtering phase.
type ScorePlugin interface {

View File

@ -37,7 +37,7 @@ func (p *APIEnablement) Filter(ctx context.Context, placement *policyv1alpha1.Pl
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
if !helper.IsAPIEnabled(cluster.Status.APIEnablements, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind) {
klog.V(2).Infof("Cluster(%s) not fit as missing API(%s, kind=%s)", cluster.Name, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind)
return framework.NewResult(framework.Unschedulable, "no such API resource")
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have the API resource")
}
return framework.NewResult(framework.Success)

View File

@ -39,7 +39,7 @@ func (p *ClusterAffinity) Filter(ctx context.Context, placement *policyv1alpha1.
if util.ClusterMatches(cluster, *affinity) {
return framework.NewResult(framework.Success)
}
return framework.NewResult(framework.Unschedulable, "cluster is not matched the placement cluster affinity constraint")
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't match the placement cluster affinity constraint")
}
// If no clusters specified and it is not excluded, mark it matched

View File

@ -34,11 +34,11 @@ func (p *SpreadConstraint) Filter(ctx context.Context, placement *policyv1alpha1
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
for _, spreadConstraint := range placement.SpreadConstraints {
if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldProvider && cluster.Spec.Provider == "" {
return framework.NewResult(framework.Unschedulable, "No Provider Property in the Cluster.Spec")
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have provider property")
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldRegion && cluster.Spec.Region == "" {
return framework.NewResult(framework.Unschedulable, "No Region Property in the Cluster.Spec")
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have region property")
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldZone && cluster.Spec.Zone == "" {
return framework.NewResult(framework.Unschedulable, "No Zone Property in the Cluster.Spec")
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have zone property")
}
}

View File

@ -52,6 +52,5 @@ func (p *TaintToleration) Filter(ctx context.Context, placement *policyv1alpha1.
return framework.NewResult(framework.Success)
}
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster had taint {%s: %s}, that the propagation policy didn't tolerate",
taint.Key, taint.Value))
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster(s) had untolerated taint {%s: %s}", taint.Key, taint.Value))
}

View File

@ -1,9 +1,21 @@
package framework
import (
"fmt"
"sort"
"strings"
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
)
const (
// NoClusterAvailableMsg is used to format message when no clusters available.
NoClusterAvailableMsg = "0/%v clusters are available"
)
// ClusterToResultMap declares map from cluster name to its Result.
type ClusterToResultMap map[string]*Result
// ClusterInfo is cluster level aggregated information.
type ClusterInfo struct {
// Overall cluster information.
@ -24,3 +36,35 @@ func (n *ClusterInfo) Cluster() *clusterv1alpha1.Cluster {
}
return n.cluster
}
// Diagnosis records the details to diagnose a scheduling failure.
type Diagnosis struct {
ClusterToResultMap ClusterToResultMap
}
// FitError describes a fit error of a object.
type FitError struct {
NumAllClusters int
Diagnosis Diagnosis
}
// Error returns detailed information of why the object failed to fit on each cluster
func (f *FitError) Error() string {
reasons := make(map[string]int)
for _, result := range f.Diagnosis.ClusterToResultMap {
for _, reason := range result.Reasons() {
reasons[reason]++
}
}
sortReasonsHistogram := func() []string {
var reasonStrings []string
for k, v := range reasons {
reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k))
}
sort.Strings(reasonStrings)
return reasonStrings
}
reasonMsg := fmt.Sprintf(NoClusterAvailableMsg+": %v.", f.NumAllClusters, strings.Join(sortReasonsHistogram(), ", "))
return reasonMsg
}