Merge pull request #2302 from Garrybest/pr_diagnosis
add scheduling diagnosis
This commit is contained in:
commit
c11fb634bc
|
@ -58,12 +58,16 @@ func (g *genericScheduler) Schedule(ctx context.Context, placement *policyv1alph
|
||||||
return result, fmt.Errorf("no clusters available to schedule")
|
return result, fmt.Errorf("no clusters available to schedule")
|
||||||
}
|
}
|
||||||
|
|
||||||
feasibleClusters, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
|
feasibleClusters, diagnosis, err := g.findClustersThatFit(ctx, g.scheduleFramework, placement, spec, clusterInfoSnapshot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, fmt.Errorf("failed to findClustersThatFit: %v", err)
|
return result, fmt.Errorf("failed to findClustersThatFit: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(feasibleClusters) == 0 {
|
if len(feasibleClusters) == 0 {
|
||||||
return result, fmt.Errorf("no clusters fit")
|
return result, &framework.FitError{
|
||||||
|
NumAllClusters: clusterInfoSnapshot.NumOfClusters(),
|
||||||
|
Diagnosis: diagnosis,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
klog.V(4).Infof("feasible clusters found: %v", feasibleClusters)
|
klog.V(4).Infof("feasible clusters found: %v", feasibleClusters)
|
||||||
|
|
||||||
|
@ -97,21 +101,27 @@ func (g *genericScheduler) findClustersThatFit(
|
||||||
fwk framework.Framework,
|
fwk framework.Framework,
|
||||||
placement *policyv1alpha1.Placement,
|
placement *policyv1alpha1.Placement,
|
||||||
bindingSpec *workv1alpha2.ResourceBindingSpec,
|
bindingSpec *workv1alpha2.ResourceBindingSpec,
|
||||||
clusterInfo *cache.Snapshot) ([]*clusterv1alpha1.Cluster, error) {
|
clusterInfo *cache.Snapshot,
|
||||||
|
) ([]*clusterv1alpha1.Cluster, framework.Diagnosis, error) {
|
||||||
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
|
defer metrics.ScheduleStep(metrics.ScheduleStepFilter, time.Now())
|
||||||
|
|
||||||
|
diagnosis := framework.Diagnosis{
|
||||||
|
ClusterToResultMap: make(framework.ClusterToResultMap),
|
||||||
|
}
|
||||||
|
|
||||||
var out []*clusterv1alpha1.Cluster
|
var out []*clusterv1alpha1.Cluster
|
||||||
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
|
// DO NOT filter unhealthy cluster, let users make decisions by using ClusterTolerations of Placement.
|
||||||
clusters := clusterInfo.GetClusters()
|
clusters := clusterInfo.GetClusters()
|
||||||
for _, c := range clusters {
|
for _, c := range clusters {
|
||||||
if result := fwk.RunFilterPlugins(ctx, placement, bindingSpec, c.Cluster()); !result.IsSuccess() {
|
if result := fwk.RunFilterPlugins(ctx, placement, bindingSpec, c.Cluster()); !result.IsSuccess() {
|
||||||
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
|
klog.V(4).Infof("cluster %q is not fit, reason: %v", c.Cluster().Name, result.AsError())
|
||||||
|
diagnosis.ClusterToResultMap[c.Cluster().Name] = result
|
||||||
} else {
|
} else {
|
||||||
out = append(out, c.Cluster())
|
out = append(out, c.Cluster())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return out, nil
|
return out, diagnosis, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// prioritizeClusters prioritize the clusters by running the score plugins.
|
// prioritizeClusters prioritize the clusters by running the score plugins.
|
||||||
|
|
|
@ -126,6 +126,11 @@ func (s *Result) AsError() error {
|
||||||
return errors.New(strings.Join(s.reasons, ", "))
|
return errors.New(strings.Join(s.reasons, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reasons returns reasons of the Result.
|
||||||
|
func (s *Result) Reasons() []string {
|
||||||
|
return s.reasons
|
||||||
|
}
|
||||||
|
|
||||||
// ScorePlugin is an interface that must be implemented by "Score" plugins to rank
|
// ScorePlugin is an interface that must be implemented by "Score" plugins to rank
|
||||||
// clusters that passed the filtering phase.
|
// clusters that passed the filtering phase.
|
||||||
type ScorePlugin interface {
|
type ScorePlugin interface {
|
||||||
|
|
|
@ -37,7 +37,7 @@ func (p *APIEnablement) Filter(ctx context.Context, placement *policyv1alpha1.Pl
|
||||||
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
||||||
if !helper.IsAPIEnabled(cluster.Status.APIEnablements, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind) {
|
if !helper.IsAPIEnabled(cluster.Status.APIEnablements, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind) {
|
||||||
klog.V(2).Infof("Cluster(%s) not fit as missing API(%s, kind=%s)", cluster.Name, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind)
|
klog.V(2).Infof("Cluster(%s) not fit as missing API(%s, kind=%s)", cluster.Name, bindingSpec.Resource.APIVersion, bindingSpec.Resource.Kind)
|
||||||
return framework.NewResult(framework.Unschedulable, "no such API resource")
|
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have the API resource")
|
||||||
}
|
}
|
||||||
|
|
||||||
return framework.NewResult(framework.Success)
|
return framework.NewResult(framework.Success)
|
||||||
|
|
|
@ -39,7 +39,7 @@ func (p *ClusterAffinity) Filter(ctx context.Context, placement *policyv1alpha1.
|
||||||
if util.ClusterMatches(cluster, *affinity) {
|
if util.ClusterMatches(cluster, *affinity) {
|
||||||
return framework.NewResult(framework.Success)
|
return framework.NewResult(framework.Success)
|
||||||
}
|
}
|
||||||
return framework.NewResult(framework.Unschedulable, "cluster is not matched the placement cluster affinity constraint")
|
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't match the placement cluster affinity constraint")
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no clusters specified and it is not excluded, mark it matched
|
// If no clusters specified and it is not excluded, mark it matched
|
||||||
|
|
|
@ -34,11 +34,11 @@ func (p *SpreadConstraint) Filter(ctx context.Context, placement *policyv1alpha1
|
||||||
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
bindingSpec *workv1alpha2.ResourceBindingSpec, cluster *clusterv1alpha1.Cluster) *framework.Result {
|
||||||
for _, spreadConstraint := range placement.SpreadConstraints {
|
for _, spreadConstraint := range placement.SpreadConstraints {
|
||||||
if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldProvider && cluster.Spec.Provider == "" {
|
if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldProvider && cluster.Spec.Provider == "" {
|
||||||
return framework.NewResult(framework.Unschedulable, "No Provider Property in the Cluster.Spec")
|
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have provider property")
|
||||||
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldRegion && cluster.Spec.Region == "" {
|
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldRegion && cluster.Spec.Region == "" {
|
||||||
return framework.NewResult(framework.Unschedulable, "No Region Property in the Cluster.Spec")
|
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have region property")
|
||||||
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldZone && cluster.Spec.Zone == "" {
|
} else if spreadConstraint.SpreadByField == policyv1alpha1.SpreadByFieldZone && cluster.Spec.Zone == "" {
|
||||||
return framework.NewResult(framework.Unschedulable, "No Zone Property in the Cluster.Spec")
|
return framework.NewResult(framework.Unschedulable, "cluster(s) didn't have zone property")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,5 @@ func (p *TaintToleration) Filter(ctx context.Context, placement *policyv1alpha1.
|
||||||
return framework.NewResult(framework.Success)
|
return framework.NewResult(framework.Success)
|
||||||
}
|
}
|
||||||
|
|
||||||
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster had taint {%s: %s}, that the propagation policy didn't tolerate",
|
return framework.NewResult(framework.Unschedulable, fmt.Sprintf("cluster(s) had untolerated taint {%s: %s}", taint.Key, taint.Value))
|
||||||
taint.Key, taint.Value))
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,21 @@
|
||||||
package framework
|
package framework
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// NoClusterAvailableMsg is used to format message when no clusters available.
|
||||||
|
NoClusterAvailableMsg = "0/%v clusters are available"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ClusterToResultMap declares map from cluster name to its Result.
|
||||||
|
type ClusterToResultMap map[string]*Result
|
||||||
|
|
||||||
// ClusterInfo is cluster level aggregated information.
|
// ClusterInfo is cluster level aggregated information.
|
||||||
type ClusterInfo struct {
|
type ClusterInfo struct {
|
||||||
// Overall cluster information.
|
// Overall cluster information.
|
||||||
|
@ -24,3 +36,35 @@ func (n *ClusterInfo) Cluster() *clusterv1alpha1.Cluster {
|
||||||
}
|
}
|
||||||
return n.cluster
|
return n.cluster
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Diagnosis records the details to diagnose a scheduling failure.
|
||||||
|
type Diagnosis struct {
|
||||||
|
ClusterToResultMap ClusterToResultMap
|
||||||
|
}
|
||||||
|
|
||||||
|
// FitError describes a fit error of a object.
|
||||||
|
type FitError struct {
|
||||||
|
NumAllClusters int
|
||||||
|
Diagnosis Diagnosis
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error returns detailed information of why the object failed to fit on each cluster
|
||||||
|
func (f *FitError) Error() string {
|
||||||
|
reasons := make(map[string]int)
|
||||||
|
for _, result := range f.Diagnosis.ClusterToResultMap {
|
||||||
|
for _, reason := range result.Reasons() {
|
||||||
|
reasons[reason]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sortReasonsHistogram := func() []string {
|
||||||
|
var reasonStrings []string
|
||||||
|
for k, v := range reasons {
|
||||||
|
reasonStrings = append(reasonStrings, fmt.Sprintf("%v %v", v, k))
|
||||||
|
}
|
||||||
|
sort.Strings(reasonStrings)
|
||||||
|
return reasonStrings
|
||||||
|
}
|
||||||
|
reasonMsg := fmt.Sprintf(NoClusterAvailableMsg+": %v.", f.NumAllClusters, strings.Join(sortReasonsHistogram(), ", "))
|
||||||
|
return reasonMsg
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue