add scheduling diagnosis
Signed-off-by: Garrybest <garrybest@foxmail.com>
This commit is contained in:
parent
25af090405
commit
b19cdb1031
|
@ -58,12 +58,16 @@ func (g *genericScheduler) Schedule(ctx context.Context, placement *policyv1alph
|
|||
return result, fmt.Errorf("no clusters available to schedule")
|
||||
}
|
||||
|
||||
feasibleClusters, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
|
||||
feasibleClusters, diagnosis, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
|
||||
if err != nil {
|
||||
return result, fmt.Errorf("failed to findClustersThatFit: %v", err)
|
||||
}
|
||||
|
||||
if len(feasibleClusters) == 0 {
|
||||
return result, fmt.Errorf("no clusters fit")
|
||||
return result, &framework.FitError{
|
||||
NumAllClusters: clusterInfoSnapshot.NumOfClusters(),
|
||||
Diagnosis: diagnosis,
|
||||
}
|
||||
}
|
||||
klog.V(4).Infof("feasible clusters found: %v", feasibleClusters)
|
||||
|
||||
|
@ -97,21 +101,27 @@ func (g *genericScheduler) findClustersThatFit(
|
|||
fwk framework.Framework,
|
||||
placement *policyv1alpha1.Placement,
|
||||
bindingSpec *workv1alpha2.ResourceBindingSpec,
|
||||
clusterInfo *cache.Snapshot) ([]*clusterv1alpha1.Cluster, error) {
|
||||
clusterInfo *cache.Snapshot,
|
||||
) ([]*clusterv1alpha1.Cluster, framework.Diagnosis, error) {
|
||||
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
|
||||
|
||||
diagnosis := framework.Diagnosis{
|
||||
ClusterToResultMap: make(framework.ClusterToResultMap),
|
||||
}
|
||||
|
||||
var out []*clusterv1alpha1.Cluster
|
||||
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
|
||||
clusters := clusterInfo.GetClusters()
|
||||
for _, c := range clusters {
|
||||
if result := fwk.RunFilterPlugins(ctx, placement, bindingSpec, c.Cluster()); !result.IsSuccess() {
|
||||
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
|
||||
diagnosis.ClusterToResultMap[c.Cluster().Name] = result
|
||||
} else {
|
||||
out = append(out, c.Cluster())
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
return out, diagnosis, nil
|
||||
}
|
||||
|
||||
// prioritizeClusters prioritize the clusters by running the score plugins.
|
||||
|
|
|
@ -126,6 +126,11 @@ func (s *Result) AsError() error {
|
|||
return errors.New(strings.Join(s.reasons, ", "))
|
||||
}
|
||||
|
||||
// Reasons returns reasons of the Result.
|
||||
func (s *Result) Reasons() []string {
|
||||
return s.reasons
|
||||
}
|
||||
|
||||
// ScorePlugin is an interface that must be implemented by "Score" plugins to rank
|
||||
// clusters that passed the filtering phase.
|
||||
type ScorePlugin interface {
|
||||
|
|
|
@ -37,7 +37,7 @@ func (p *APIEnablement) Filter(ctx context.Context, placement *policyv1alpha1.Pl
|
|||
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
||||
if !helper.IsAPIEnabled(cluster.Status.APIEnablements, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind) {
|
||||
klog.V(2).Infof("Cluster(%s) not fit as missing API(%s, kind=%s)", cluster.Name, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind)
|
||||
return framework.NewResult(framework.Unschedulable, "no such API resource")
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have the API resource")
|
||||
}
|
||||
|
||||
return framework.NewResult(framework.Success)
|
||||
|
|
|
@ -39,7 +39,7 @@ func (p *ClusterAffinity) Filter(ctx context.Context, placement *policyv1alpha1.
|
|||
if util.ClusterMatches(cluster, *affinity) {
|
||||
return framework.NewResult(framework.Success)
|
||||
}
|
||||
return framework.NewResult(framework.Unschedulable, "cluster is not matched the placement cluster affinity constraint")
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't match the placement cluster affinity constraint")
|
||||
}
|
||||
|
||||
// If no clusters specified and it is not excluded, mark it matched
|
||||
|
|
|
@ -34,11 +34,11 @@ func (p *SpreadConstraint) Filter(ctx context.Context, placement *policyv1alpha1
|
|||
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
||||
for _, spreadConstraint := range placement.SpreadConstraints {
|
||||
if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldProvider && cluster.Spec.Provider == "" {
|
||||
return framework.NewResult(framework.Unschedulable, "No Provider Property in the Cluster.Spec")
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have provider property")
|
||||
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldRegion && cluster.Spec.Region == "" {
|
||||
return framework.NewResult(framework.Unschedulable, "No Region Property in the Cluster.Spec")
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have region property")
|
||||
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldZone && cluster.Spec.Zone == "" {
|
||||
return framework.NewResult(framework.Unschedulable, "No Zone Property in the Cluster.Spec")
|
||||
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have zone property")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -52,6 +52,5 @@ func (p *TaintToleration) Filter(ctx context.Context, placement *policyv1alpha1.
|
|||
return framework.NewResult(framework.Success)
|
||||
}
|
||||
|
||||
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster had taint {%s: %s}, that the propagation policy didn't tolerate",
|
||||
taint.Key, taint.Value))
|
||||
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster(s) had untolerated taint {%s: %s}", taint.Key, taint.Value))
|
||||
}
|
||||
|
|
|
@ -1,9 +1,21 @@
|
|||
package framework
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
||||
)
|
||||
|
||||
const (
|
||||
// NoClusterAvailableMsg is used to format message when no clusters available.
|
||||
NoClusterAvailableMsg = "0/%v clusters are available"
|
||||
)
|
||||
|
||||
// ClusterToResultMap declares map from cluster name to its Result.
|
||||
type ClusterToResultMap map[string]*Result
|
||||
|
||||
// ClusterInfo is cluster level aggregated information.
|
||||
type ClusterInfo struct {
|
||||
// Overall cluster information.
|
||||
|
@ -24,3 +36,35 @@ func (n *ClusterInfo) Cluster() *clusterv1alpha1.Cluster {
|
|||
}
|
||||
return n.cluster
|
||||
}
|
||||
|
||||
// Diagnosis records the details to diagnose a scheduling failure.
|
||||
type Diagnosis struct {
|
||||
ClusterToResultMap ClusterToResultMap
|
||||
}
|
||||
|
||||
// FitError describes a fit error of a object.
|
||||
type FitError struct {
|
||||
NumAllClusters int
|
||||
Diagnosis Diagnosis
|
||||
}
|
||||
|
||||
// Error returns detailed information of why the object failed to fit on each cluster
|
||||
func (f *FitError) Error() string {
|
||||
reasons := make(map[string]int)
|
||||
for _, result := range f.Diagnosis.ClusterToResultMap {
|
||||
for _, reason := range result.Reasons() {
|
||||
reasons[reason]++
|
||||
}
|
||||
}
|
||||
|
||||
sortReasonsHistogram := func() []string {
|
||||
var reasonStrings []string
|
||||
for k, v := range reasons {
|
||||
reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k))
|
||||
}
|
||||
sort.Strings(reasonStrings)
|
||||
return reasonStrings
|
||||
}
|
||||
reasonMsg := fmt.Sprintf(NoClusterAvailableMsg+": %v.", f.NumAllClusters, strings.Join(sortReasonsHistogram(), ", "))
|
||||
return reasonMsg
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue