[MVP] add resourcequota plugin in scheduler-estimator: create framework for scheulder-estimator (#4534)

* [MVP] add resourcequota plugin in scheudler-estimator

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* framework only

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* fix lint error

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* address comments

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* add KubeClient in the Handle

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* - add snapshot as input argument of RunEstimateReplicasPlugins
- add Result to give clearer message

Signed-off-by: yweng14 <yweng14@bloomberg.net>

* fix unitest name due

Signed-off-by: yweng14 <yweng14@bloomberg.net>

---------

Signed-off-by: yweng14 <yweng14@bloomberg.net>
This commit is contained in:
Yao Weng 2024-01-19 09:14:41 -05:00 committed by GitHub
parent 0e501b607a
commit c3458310d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1008 additions and 21 deletions

View File

@ -200,7 +200,7 @@ helm install karmada-scheduler-estimator -n karmada-system ./charts/karmada
| Name | Description | Value | | Name | Description | Value |
| ---------------------------------------- |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `installMode` | InstallMode "host", "agent" and "component" are provided, "host" means install karmada in the control-cluster, "agent" means install agent client in the member cluster, "component" means install selected components in the control-cluster | `"host"` | | `installMode` | InstallMode "host", "agent" and "component" are provided, "host" means install karmada in the control-cluster, "agent" means install agent client in the member cluster, "component" means install selected components in the control-cluster | `"host"` |
| `clusterDomain` | Default cluster domain for karmada | `"cluster.local"` | | `clusterDomain` | Default cluster domain for karmada | `"cluster.local"` |
| `components` | Selected components list, selectable values: "schedulerEstimator" | `[]` | | `components` | Selected components list, selectable values: "schedulerEstimator" | `[]` |
@ -300,13 +300,13 @@ helm install karmada-scheduler-estimator -n karmada-system ./charts/karmada
| `controllerManager.controllers` | Controllers of the karmada-controller-manager | `""` | | `controllerManager.controllers` | Controllers of the karmada-controller-manager | `""` |
| `controllerManager.extraCommandArgs` | extra command args of the karmada-controller-manager | `{}` | | `controllerManager.extraCommandArgs` | extra command args of the karmada-controller-manager | `{}` |
| `apiServer.labels` | Labels of the karmada-apiserver deployment | `{"app": "karmada-apiserver"}` | | `apiServer.labels` | Labels of the karmada-apiserver deployment | `{"app": "karmada-apiserver"}` |
| `apiServer.serviceAnnotations` | Annotations of the karmada-apiserver service | `{}` | | `apiServer.serviceAnnotations` | Annotations of the karmada-apiserver service | `{}` |
| `apiServer.replicaCount` | Target replicas of the karmada-apiserver | `1` | | `apiServer.replicaCount` | Target replicas of the karmada-apiserver | `1` |
| `apiServer.podLabels` | Labels of the karmada-apiserver pods | `{}` | | `apiServer.podLabels` | Labels of the karmada-apiserver pods | `{}` |
| `apiServer.podAnnotations` | Annotations of the karmada-apiserver pods | `{}` | | `apiServer.podAnnotations` | Annotations of the karmada-apiserver pods | `{}` |
| `apiServer.imagePullSecrets` | Image pull secret of the karmada-apiserver | `[]` | | `apiServer.imagePullSecrets` | Image pull secret of the karmada-apiserver | `[]` |
| `apiServer.image.repository` | Image of the karmada-apiserver | `"registry.k8s.io/kube-apiserver"` | | `apiServer.image.repository` | Image of the karmada-apiserver | `"registry.k8s.io/kube-apiserver"` |
| `apiServer.image.tag` | Image tag of the karmada-apiserver | `"v1.26.12"` | | `apiServer.image.tag` | Image tag of the karmada-apiserver | `"v1.26.12"` |
| `apiServer.image.pullPolicy` | Image pull policy of the karmada-apiserver | `"IfNotPresent"` | | `apiServer.image.pullPolicy` | Image pull policy of the karmada-apiserver | `"IfNotPresent"` |
| `apiServer.resources` | Resource quota of the karmada-apiserver | `{}` | | `apiServer.resources` | Resource quota of the karmada-apiserver | `{}` |
| `apiServer.hostNetwork` | Deploy karmada-apiserver with hostNetwork. If there are multiple karmadas in one cluster, you'd better set it to "false" | `"false"` | | `apiServer.hostNetwork` | Deploy karmada-apiserver with hostNetwork. If there are multiple karmadas in one cluster, you'd better set it to "false" | `"false"` |
@ -335,7 +335,7 @@ helm install karmada-scheduler-estimator -n karmada-system ./charts/karmada
| `kubeControllerManager.podAnnotations` | Annotations of the kube-controller-manager pods | `{}` | | `kubeControllerManager.podAnnotations` | Annotations of the kube-controller-manager pods | `{}` |
| `kubeControllerManager.imagePullSecrets` | Image pull secret of the kube-controller-manager | `[]` | | `kubeControllerManager.imagePullSecrets` | Image pull secret of the kube-controller-manager | `[]` |
| `kubeControllerManager.image.repository` | Image of the kube-controller-manager | `"registry.k8s.io/kube-controller-manager"` | | `kubeControllerManager.image.repository` | Image of the kube-controller-manager | `"registry.k8s.io/kube-controller-manager"` |
| `kubeControllerManager.image.tag` | Image tag of the kube-controller-manager | `"v1.26.12"` | | `kubeControllerManager.image.tag` | Image tag of the kube-controller-manager | `"v1.26.12"` |
| `kubeControllerManager.image.pullPolicy` | Image pull policy of the kube-controller-manager | `"IfNotPresent"` | | `kubeControllerManager.image.pullPolicy` | Image pull policy of the kube-controller-manager | `"IfNotPresent"` |
| `kubeControllerManager.resources` | Resource quota of the kube-controller-manager | `{}` | | `kubeControllerManager.resources` | Resource quota of the kube-controller-manager | `{}` |
| `kubeControllerManager.nodeSelector` | Node selector of the kube-controller-manager | `{}` | | `kubeControllerManager.nodeSelector` | Node selector of the kube-controller-manager | `{}` |
@ -359,6 +359,7 @@ helm install karmada-scheduler-estimator -n karmada-system ./charts/karmada
| `schedulerEstimator.nodeSelector` | Node selector of the scheduler-estimator | `{}` | | `schedulerEstimator.nodeSelector` | Node selector of the scheduler-estimator | `{}` |
| `schedulerEstimator.affinity` | Affinity of the scheduler-estimator | `{}` | | `schedulerEstimator.affinity` | Affinity of the scheduler-estimator | `{}` |
| `schedulerEstimator.tolerations` | Tolerations of the scheduler-estimator | `[]` | | `schedulerEstimator.tolerations` | Tolerations of the scheduler-estimator | `[]` |
| `schedulerEstimator.featureGates` | FeatureGates of the scheduler-estimator | `{"FeatureGateName": "false"}` |
| `search.strategy` | Strategy of the scheduler-estimator | `{"type": "RollingUpdate", "rollingUpdate": {"maxUnavailable": "0", "maxSurge": "50%"} }` | | `search.strategy` | Strategy of the scheduler-estimator | `{"type": "RollingUpdate", "rollingUpdate": {"maxUnavailable": "0", "maxSurge": "50%"} }` |
| `descheduler.labels` | Labels of the descheduler deployment | `karmada-descheduler` | | `descheduler.labels` | Labels of the descheduler deployment | `karmada-descheduler` |
| `descheduler.replicaCount` | Target replicas of the descheduler | `2` | | `descheduler.replicaCount` | Target replicas of the descheduler | `2` |

View File

@ -543,6 +543,23 @@ Return the proper karmada kubectl image name
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{- define "karmada.schedulerEstimator.featureGates" -}}
{{- $featureGatesArg := index . "featureGatesArg" -}}
{{- if (not (empty $featureGatesArg)) }}
{{- $featureGatesFlag := "" -}}
{{- range $key, $value := $featureGatesArg -}}
{{- if not (empty (toString $value)) }}
{{- $featureGatesFlag = cat $featureGatesFlag $key "=" $value "," -}}
{{- end -}}
{{- end -}}
{{- if gt (len $featureGatesFlag) 0 }}
{{- $featureGatesFlag := trimSuffix "," $featureGatesFlag | nospace -}}
{{- printf "%s=%s" "--feature-gates" $featureGatesFlag -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{- define "karmada.controllerManager.extraCommandArgs" -}} {{- define "karmada.controllerManager.extraCommandArgs" -}}
{{- if .Values.controllerManager.extraCommandArgs }} {{- if .Values.controllerManager.extraCommandArgs }}
{{- range $key, $value := .Values.controllerManager.extraCommandArgs }} {{- range $key, $value := .Values.controllerManager.extraCommandArgs }}

View File

@ -48,6 +48,9 @@ spec:
- /bin/karmada-scheduler-estimator - /bin/karmada-scheduler-estimator
- --kubeconfig=/etc/{{ $clusterName }}-kubeconfig - --kubeconfig=/etc/{{ $clusterName }}-kubeconfig
- --cluster-name={{ $clusterName }} - --cluster-name={{ $clusterName }}
{{- with (include "karmada.schedulerEstimator.featureGates" (dict "featureGatesArg" $.Values.schedulerEstimator.featureGates)) }}
- {{ . }}
{{- end}}
livenessProbe: livenessProbe:
httpGet: httpGet:
path: /healthz path: /healthz

View File

@ -828,6 +828,9 @@ schedulerEstimator:
maxSurge: 50% maxSurge: 50%
## @param apiServer.podDisruptionBudget ## @param apiServer.podDisruptionBudget
podDisruptionBudget: *podDisruptionBudget podDisruptionBudget: *podDisruptionBudget
## @param featureGate to schedulerEstimator
# FooPluginName: true
featureGates: {}
## descheduler config ## descheduler config
descheduler: descheduler:

View File

@ -19,6 +19,7 @@ package options
import ( import (
"github.com/spf13/pflag" "github.com/spf13/pflag"
"github.com/karmada-io/karmada/pkg/features"
"github.com/karmada-io/karmada/pkg/sharedcli/profileflag" "github.com/karmada-io/karmada/pkg/sharedcli/profileflag"
) )
@ -67,5 +68,7 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) {
fs.Float32Var(&o.ClusterAPIQPS, "kube-api-qps", 20.0, "QPS to use while talking with apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") fs.Float32Var(&o.ClusterAPIQPS, "kube-api-qps", 20.0, "QPS to use while talking with apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.")
fs.IntVar(&o.ClusterAPIBurst, "kube-api-burst", 30, "Burst to use while talking with apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.") fs.IntVar(&o.ClusterAPIBurst, "kube-api-burst", 30, "Burst to use while talking with apiserver. Doesn't cover events and node heartbeat apis which rate limiting is controlled by a different set of flags.")
fs.IntVar(&o.Parallelism, "parallelism", o.Parallelism, "Parallelism defines the amount of parallelism in algorithms for estimating. Must be greater than 0. Defaults to 16.") fs.IntVar(&o.Parallelism, "parallelism", o.Parallelism, "Parallelism defines the amount of parallelism in algorithms for estimating. Must be greater than 0. Defaults to 16.")
features.FeatureGate.AddFlag(fs)
o.ProfileOpts.AddFlags(fs) o.ProfileOpts.AddFlags(fs)
} }

View File

@ -128,9 +128,14 @@ func run(ctx context.Context, opts *options.Options) error {
dynamicClient := dynamic.NewForConfigOrDie(restConfig) dynamicClient := dynamic.NewForConfigOrDie(restConfig)
discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(restConfig) discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(restConfig)
e := server.NewEstimatorServer(kubeClient, dynamicClient, discoveryClient, opts, ctx.Done()) e, err := server.NewEstimatorServer(kubeClient, dynamicClient, discoveryClient, opts, ctx.Done())
if err != nil {
klog.Errorf("Fail to create estimator server: %v", err)
return err
}
if err = e.Start(ctx); err != nil { if err = e.Start(ctx); err != nil {
klog.Errorf("estimator server exits unexpectedly: %v", err) klog.Errorf("Estimator server exits unexpectedly: %v", err)
return err return err
} }

View File

@ -39,6 +39,7 @@ missing_license_header_files="$($ADDLICENSE_BIN \
-ignore "**/*.yaml" \ -ignore "**/*.yaml" \
-ignore "**/*.yml" \ -ignore "**/*.yml" \
-ignore "**/*.json" \ -ignore "**/*.json" \
-ignore ".idea/**" \
.)" || true .)" || true
if [[ "$missing_license_header_files" ]]; then if [[ "$missing_license_header_files" ]]; then

View File

@ -18,6 +18,7 @@ package server
import ( import (
"context" "context"
"fmt"
"sync/atomic" "sync/atomic"
"time" "time"
@ -73,8 +74,11 @@ func (es *AccurateSchedulerEstimatorServer) estimateReplicas(
tolerations = requirements.NodeClaim.Tolerations tolerations = requirements.NodeClaim.Tolerations
} }
// TODO(Garrybest): design a framework and make filter and score plugins
var res int32 var res int32
replicas, ret := es.estimateFramework.RunEstimateReplicasPlugins(ctx, snapshot, &requirements)
if !ret.IsSuccess() && !ret.IsNoOperation() {
return replicas, fmt.Errorf(fmt.Sprintf("estimate replice plugins fails with %s", ret.Reasons()))
}
processNode := func(i int) { processNode := func(i int) {
node := allNodes[i] node := allNodes[i]
if !nodeutil.IsNodeAffinityMatched(node.Node(), affinity) || !nodeutil.IsTolerationMatched(node.Node(), tolerations) { if !nodeutil.IsNodeAffinityMatched(node.Node(), affinity) || !nodeutil.IsTolerationMatched(node.Node(), tolerations) {
@ -84,6 +88,10 @@ func (es *AccurateSchedulerEstimatorServer) estimateReplicas(
atomic.AddInt32(&res, maxReplica) atomic.AddInt32(&res, maxReplica)
} }
es.parallelizer.Until(ctx, len(allNodes), processNode) es.parallelizer.Until(ctx, len(allNodes), processNode)
if ret.IsSuccess() && replicas < res {
res = replicas
}
return res, nil return res, nil
} }

View File

@ -0,0 +1,196 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package framework
import (
"context"
"errors"
"strings"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"github.com/karmada-io/karmada/pkg/estimator/pb"
schedcache "github.com/karmada-io/karmada/pkg/util/lifted/scheduler/cache"
)
// Framework manages the set of plugins in use by the estimator.
type Framework interface {
Handle
// RunEstimateReplicasPlugins runs the set of configured EstimateReplicasPlugins
// for estimating replicas based on the given replicaRequirements.
// It returns an integer and an Result.
// The integer represents the minimum calculated value of estimated replicas from each EstimateReplicasPlugin.
// The Result contains code, reasons and error
// it is merged from all plugins returned result codes
RunEstimateReplicasPlugins(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *Result)
// TODO(wengyao04): we can add filter and score plugin extension points if needed in the future
}
// Plugin is the parent type for all the scheduling framework plugins.
type Plugin interface {
Name() string
}
// EstimateReplicasPlugin is an interface for replica estimation plugins.
// These estimators are used to estimate the replicas for a given pb.ReplicaRequirements
type EstimateReplicasPlugin interface {
Plugin
// Estimate is called for each MaxAvailableReplicas request.
// It returns an integer and an error
// The integer representing the number of calculated replica for the given replicaRequirements
// The Result contains code, reasons and error
// it is merged from all plugins returned result codes
Estimate(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *Result)
}
// Handle provides data and some tools that plugins can use. It is
// passed to the plugin factories at the time of plugin initialization. Plugins
// must store and use this handle to call framework functions.
// We follow the design pattern as kubernetes scheduler framework
type Handle interface {
ClientSet() clientset.Interface
SharedInformerFactory() informers.SharedInformerFactory
}
// Code is the Status code/type which is returned from plugins.
type Code int
// Result indicates the result of running a plugin. It consists of a code, a
// message and (optionally) an error. When the status code is not `Success`,
// the reasons should explain why.
type Result struct {
code Code
reasons []string
err error
}
// These are predefined codes used in a Status.
const (
// Success means that plugin ran correctly and found resource schedulable.
// NOTE: A nil status is also considered as "Success".
Success Code = iota
// Unschedulable is used when a plugin finds the resource unschedulable.
// The accompanying status message should explain why the it is unschedulable.
Unschedulable
// Nooperation is used when a plugin is disabled or the plugin list are empty
Noopperation
// Error is used for internal plugin errors, unexpected input, etc.
Error
)
// This list should be exactly the same as the codes iota defined above in the same order.
var codes = []string{"Success", "Unschedulable", "Nooperation", "Error"}
func (c Code) String() string {
return codes[c]
}
// NewResult makes a result out of the given arguments and returns its pointer.
func NewResult(code Code, reasons ...string) *Result {
s := &Result{
code: code,
reasons: reasons,
}
if code == Error {
s.err = errors.New(strings.Join(reasons, ","))
}
return s
}
// PluginToResult maps plugin name to Result.
type PluginToResult map[string]*Result
// Merge merges the statuses in the map into one. The resulting status code have the following
// precedence: Error, Unschedulable, Disabled.
func (p PluginToResult) Merge() *Result {
if len(p) == 0 {
return NewResult(Noopperation, "plugin results are empty")
}
finalStatus := NewResult(Success)
var hasUnschedulable bool
hasAllNoOp := true
for _, s := range p {
if s.code == Error {
finalStatus.err = s.err
} else if s.code == Unschedulable {
hasUnschedulable = true
}
if s.code != Noopperation {
hasAllNoOp = false
}
finalStatus.code = s.code
finalStatus.reasons = append(finalStatus.reasons, s.reasons...)
}
if finalStatus.err != nil {
finalStatus.code = Error
} else if hasUnschedulable {
finalStatus.code = Unschedulable
} else if hasAllNoOp {
finalStatus.code = Noopperation
} else {
finalStatus.code = Success
}
return finalStatus
}
// IsSuccess returns true if and only if "Result" is nil or Code is "Success".
func (s *Result) IsSuccess() bool {
return s == nil || s.code == Success
}
// IsNoOperation return true if "Result" is not nil and Code is "Nooperation"
// ToDo (wengyao04): we can remove it once we include node resource estimation as the default plugin in the future
func (s *Result) IsNoOperation() bool {
return s != nil && s.code == Noopperation
}
// AsError returns nil if the Result is a success; otherwise returns an "error" object
// with a concatenated message on reasons of the Result.
func (s *Result) AsError() error {
if s.IsSuccess() {
return nil
}
if s.err != nil {
return s.err
}
return errors.New(strings.Join(s.reasons, ", "))
}
// Reasons returns reasons of the Result.
func (s *Result) Reasons() []string {
return s.reasons
}
// Code returns code of the Result.
func (s *Result) Code() Code {
if s == nil {
return Success
}
return s.code
}
// AsResult wraps an error in a Result.
func AsResult(err error) *Result {
return &Result{
code: Error,
reasons: []string{err.Error()},
err: err,
}
}

View File

@ -0,0 +1,27 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package plugins
import (
"github.com/karmada-io/karmada/pkg/estimator/server/framework/runtime"
)
// NewInTreeRegistry builds the registry with all the in-tree plugins.
func NewInTreeRegistry() runtime.Registry {
registry := runtime.Registry{}
return registry
}

View File

@ -0,0 +1,146 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"fmt"
"math"
"reflect"
"time"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"github.com/karmada-io/karmada/pkg/estimator/pb"
"github.com/karmada-io/karmada/pkg/estimator/server/framework"
"github.com/karmada-io/karmada/pkg/estimator/server/metrics"
schedcache "github.com/karmada-io/karmada/pkg/util/lifted/scheduler/cache"
utilmetrics "github.com/karmada-io/karmada/pkg/util/metrics"
)
const (
estimator = "Estimator"
)
// frameworkImpl implements the Framework interface and is responsible for initializing and running scheduler
// plugins.
type frameworkImpl struct {
estimateReplicasPlugins []framework.EstimateReplicasPlugin
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
}
var _ framework.Framework = &frameworkImpl{}
type frameworkOptions struct {
clientSet clientset.Interface
informerFactory informers.SharedInformerFactory
}
// Option for the frameworkImpl.
type Option func(*frameworkOptions)
func defaultFrameworkOptions() frameworkOptions {
return frameworkOptions{}
}
// WithClientSet sets clientSet for the scheduling frameworkImpl.
func WithClientSet(clientSet clientset.Interface) Option {
return func(o *frameworkOptions) {
o.clientSet = clientSet
}
}
// WithInformerFactory sets informer factory for the scheduling frameworkImpl.
func WithInformerFactory(informerFactory informers.SharedInformerFactory) Option {
return func(o *frameworkOptions) {
o.informerFactory = informerFactory
}
}
// NewFramework creates a scheduling framework by registry.
func NewFramework(r Registry, opts ...Option) (framework.Framework, error) {
options := defaultFrameworkOptions()
for _, opt := range opts {
opt(&options)
}
f := &frameworkImpl{
informerFactory: options.informerFactory,
}
estimateReplicasPluginsList := reflect.ValueOf(&f.estimateReplicasPlugins).Elem()
estimateReplicasType := estimateReplicasPluginsList.Type().Elem()
for name, factory := range r {
p, err := factory(f)
if err != nil {
return nil, fmt.Errorf("failed to initialize plugin %q: %w", name, err)
}
addPluginToList(p, estimateReplicasType, &estimateReplicasPluginsList)
}
return f, nil
}
func addPluginToList(plugin framework.Plugin, pluginType reflect.Type, pluginList *reflect.Value) {
if reflect.TypeOf(plugin).Implements(pluginType) {
newPlugins := reflect.Append(*pluginList, reflect.ValueOf(plugin))
pluginList.Set(newPlugins)
}
}
// ClientSet returns a kubernetes clientset.
func (frw *frameworkImpl) ClientSet() clientset.Interface {
return frw.clientSet
}
// SharedInformerFactory returns a shared informer factory.
func (frw *frameworkImpl) SharedInformerFactory() informers.SharedInformerFactory {
return frw.informerFactory
}
// RunEstimateReplicasPlugins runs the set of configured EstimateReplicasPlugins
// for estimating replicas based on the given replicaRequirements.
// It returns an integer and an error.
// The integer represents the minimum calculated value of estimated replicas from each EstimateReplicasPlugin.
func (frw *frameworkImpl) RunEstimateReplicasPlugins(ctx context.Context, snapshot *schedcache.Snapshot, replicaRequirements *pb.ReplicaRequirements) (int32, *framework.Result) {
startTime := time.Now()
defer func() {
metrics.FrameworkExtensionPointDuration.WithLabelValues(estimator).Observe(utilmetrics.DurationInSeconds(startTime))
}()
var replica int32 = math.MaxInt32
results := make(framework.PluginToResult)
for _, pl := range frw.estimateReplicasPlugins {
plReplica, ret := frw.runEstimateReplicasPlugins(ctx, pl, snapshot, replicaRequirements)
if ret.IsSuccess() && plReplica < replica {
replica = plReplica
}
results[pl.Name()] = ret
}
return replica, results.Merge()
}
func (frw *frameworkImpl) runEstimateReplicasPlugins(
ctx context.Context,
pl framework.EstimateReplicasPlugin,
snapshot *schedcache.Snapshot,
replicaRequirements *pb.ReplicaRequirements,
) (int32, *framework.Result) {
startTime := time.Now()
replica, ret := pl.Estimate(ctx, snapshot, replicaRequirements)
metrics.PluginExecutionDuration.WithLabelValues(pl.Name(), estimator).Observe(utilmetrics.DurationInSeconds(startTime))
return replica, ret
}

View File

@ -0,0 +1,254 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"context"
"fmt"
"math"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/karmada-io/karmada/pkg/estimator/pb"
"github.com/karmada-io/karmada/pkg/estimator/server/framework"
schedcache "github.com/karmada-io/karmada/pkg/util/lifted/scheduler/cache"
)
type estimateReplicaResult struct {
replica int32
ret *framework.Result
}
type injectedResult struct {
estimateReplicaResult estimateReplicaResult
}
// TestPlugin implements all Plugin interfaces.
type TestPlugin struct {
name string
inj injectedResult
}
func (pl *TestPlugin) Name() string {
return pl.name
}
func (pl *TestPlugin) Estimate(_ context.Context, _ *schedcache.Snapshot, _ *pb.ReplicaRequirements) (int32, *framework.Result) {
return pl.inj.estimateReplicaResult.replica, pl.inj.estimateReplicaResult.ret
}
func Test_frameworkImpl_RunEstimateReplicasPlugins(t *testing.T) {
ctx := context.Background()
tests := []struct {
name string
plugins []*TestPlugin
expected estimateReplicaResult
}{
{
name: "no EstimateReplicasPlugins",
plugins: []*TestPlugin{},
expected: estimateReplicaResult{
replica: math.MaxInt32,
ret: framework.NewResult(framework.Noopperation, "plugin results are empty"),
},
},
{
name: "one EstimateReplicasPlugin plugin returned success, but another EstimateReplicasPlugin plugin returned error",
plugins: []*TestPlugin{
{
name: "success",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
{
name: "error",
inj: injectedResult{
estimateReplicaResult{
ret: framework.AsResult(fmt.Errorf("plugin 2 failed")),
},
},
},
},
expected: estimateReplicaResult{
replica: 1,
ret: framework.AsResult(fmt.Errorf("plugin 2 failed")),
},
},
{
name: "one EstimateReplicasPlugin plugin returned success, but another EstimateReplicasPlugin plugin returned unschedulable",
plugins: []*TestPlugin{
{
name: "success",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
{
name: "unschedulable",
inj: injectedResult{
estimateReplicaResult{
replica: 0,
ret: framework.NewResult(framework.Unschedulable, "plugin 2 is unschedulable"),
},
},
},
},
expected: estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Unschedulable, "plugin 2 is unschedulable"),
},
},
{
name: "one EstimateReplicasPlugin plugin returned unschedulable, but another EstimateReplicasPlugin plugin returned noop",
plugins: []*TestPlugin{
{
name: "unschedulable",
inj: injectedResult{
estimateReplicaResult{
replica: 0,
ret: framework.NewResult(framework.Unschedulable, "plugin 1 is unschedulable"),
},
},
},
{
name: "noop",
inj: injectedResult{
estimateReplicaResult{
replica: math.MaxInt32,
ret: framework.NewResult(framework.Noopperation, "plugin 2 is no operation"),
},
},
},
},
expected: estimateReplicaResult{
replica: math.MaxInt32,
ret: framework.NewResult(framework.Unschedulable, "plugin 1 is unschedulable", "plugin 2 is no operation"),
},
},
{
name: "one EstimateReplicasPlugin plugin returned success, but another EstimateReplicasPlugin plugin return no operation",
plugins: []*TestPlugin{
{
name: "success",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
{
name: "noop",
inj: injectedResult{
estimateReplicaResult{
ret: framework.NewResult(framework.Noopperation, "plugin 2 is disabled"),
},
},
},
},
expected: estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success, "plugin 2 is disabled"),
},
},
{
name: "all EstimateReplicasPlugins returned success and 1 replica",
plugins: []*TestPlugin{
{
name: "success1",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
{
name: "success2",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
},
expected: estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
{
name: "all EstimateReplicasPlugins returned success and but different replica",
plugins: []*TestPlugin{
{
name: "success1",
inj: injectedResult{
estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
},
{
name: "success2",
inj: injectedResult{
estimateReplicaResult{
replica: 2,
ret: framework.NewResult(framework.Success),
},
},
},
},
expected: estimateReplicaResult{
replica: 1,
ret: framework.NewResult(framework.Success),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := make(Registry)
for _, p := range tt.plugins {
p := p
if err := r.Register(p.name, func(fh framework.Handle) (framework.Plugin, error) {
return p, nil
}); err != nil {
t.Fatalf("fail to register PreScorePlugins plugin (%s)", p.Name())
}
}
f, err := NewFramework(r)
if err != nil {
t.Errorf("create frame work error:%v", err)
}
replica, ret := f.RunEstimateReplicasPlugins(ctx, nil, &pb.ReplicaRequirements{})
require.Equal(t, tt.expected.ret.Code(), ret.Code())
assert.ElementsMatch(t, tt.expected.ret.Reasons(), ret.Reasons())
require.Equal(t, tt.expected.replica, replica)
})
}
}

View File

@ -0,0 +1,61 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"fmt"
"github.com/karmada-io/karmada/pkg/estimator/server/framework"
)
// PluginFactory is a function that builds a plugin.
type PluginFactory = func(f framework.Handle) (framework.Plugin, error)
// Registry is a collection of all available plugins. The framework uses a
// registry to enable and initialize configured plugins.
// All plugins must be in the registry before initializing the framework.
type Registry map[string]PluginFactory
// Register adds a new plugin to the registry. If a plugin with the same name
// exists, it returns an error.
func (r Registry) Register(name string, factory PluginFactory) error {
if _, ok := r[name]; ok {
return fmt.Errorf("a plugin named %v already exists", name)
}
r[name] = factory
return nil
}
// Unregister removes an existing plugin from the registry. If no plugin with
// the provided name exists, it returns an error.
func (r Registry) Unregister(name string) error {
if _, ok := r[name]; !ok {
return fmt.Errorf("no plugin named %v exists", name)
}
delete(r, name)
return nil
}
// Merge merges the provided registry to the current one.
func (r Registry) Merge(in Registry) error {
for name, factory := range in {
if err := r.Register(name, factory); err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,224 @@
/*
Copyright 2024 The Karmada Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"testing"
"github.com/google/uuid"
"github.com/karmada-io/karmada/pkg/estimator/server/framework"
)
type mockNoopPlugin struct {
uuid string
}
func (p *mockNoopPlugin) Name() string {
return p.uuid
}
func NewMockNoopPluginFactory() PluginFactory {
uuid := uuid.New().String()
return func(_ framework.Handle) (framework.Plugin, error) {
return &mockNoopPlugin{uuid}, nil
}
}
// isRegistryEqual compares two registries for equality. This function is used in place of
// reflect.DeepEqual() and cmp() as they don't compare function values.
func isRegistryEqual(registryX, registryY Registry) bool {
for name, pluginFactory := range registryY {
if val, ok := registryX[name]; ok {
p1, _ := pluginFactory(nil)
p2, _ := val(nil)
if p1.Name() != p2.Name() {
// pluginFactory functions are not the same.
return false
}
} else {
// registryY contains an entry that is not present in registryX
return false
}
}
for name := range registryX {
if _, ok := registryY[name]; !ok {
// registryX contains an entry that is not present in registryY
return false
}
}
return true
}
// TestRegistry_Register is same as https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/runtime/registry_test.go#L174-L223
// because the framework registry design is same as kubernetes framework register
func TestRegistry_Register(t *testing.T) {
m1 := NewMockNoopPluginFactory()
m2 := NewMockNoopPluginFactory()
tests := []struct {
name string
registry Registry
nameToRegister string
factoryToRegister PluginFactory
expected Registry
shouldError bool
}{
{
name: "valid Register",
registry: Registry{},
nameToRegister: "pluginFactory1",
factoryToRegister: m1,
expected: Registry{
"pluginFactory1": m1,
},
shouldError: false,
},
{
name: "Register duplicate factories",
registry: Registry{
"pluginFactory1": m1,
},
nameToRegister: "pluginFactory1",
factoryToRegister: m2,
expected: Registry{
"pluginFactory1": m1,
},
shouldError: true,
},
}
for _, scenario := range tests {
t.Run(scenario.name, func(t *testing.T) {
err := scenario.registry.Register(scenario.nameToRegister, scenario.factoryToRegister)
if (err == nil) == scenario.shouldError {
t.Errorf("Register() shouldError is: %v however err is: %v.", scenario.shouldError, err)
return
}
if !isRegistryEqual(scenario.expected, scenario.registry) {
t.Errorf("Register(). Expected %v. Got %v instead.", scenario.expected, scenario.registry)
}
})
}
}
// TestRegistry_Unregister is same as https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/runtime/registry_test.go#L225-L270
// because the framework registry design is same as kubernetes framework register
func TestRegistry_Unregister(t *testing.T) {
m1 := NewMockNoopPluginFactory()
m2 := NewMockNoopPluginFactory()
tests := []struct {
name string
registry Registry
nameToUnregister string
expected Registry
shouldError bool
}{
{
name: "valid Unregister",
registry: Registry{
"pluginFactory1": m1,
"pluginFactory2": m2,
},
nameToUnregister: "pluginFactory1",
expected: Registry{
"pluginFactory2": m2,
},
shouldError: false,
},
{
name: "Unregister non-existent plugin factory",
registry: Registry{},
nameToUnregister: "pluginFactory1",
expected: Registry{},
shouldError: true,
},
}
for _, scenario := range tests {
t.Run(scenario.name, func(t *testing.T) {
err := scenario.registry.Unregister(scenario.nameToUnregister)
if (err == nil) == scenario.shouldError {
t.Errorf("Unregister() shouldError is: %v however err is: %v.", scenario.shouldError, err)
return
}
if !isRegistryEqual(scenario.expected, scenario.registry) {
t.Errorf("Unregister(). Expected %v. Got %v instead.", scenario.expected, scenario.registry)
}
})
}
}
// TestRegistry_Merge is same as https://github.com/kubernetes/kubernetes/blob/master/pkg/scheduler/framework/runtime/registry_test.go#L119-L172
// because the framework registry design is same as kubernetes framework register
func TestRegistry_Merge(t *testing.T) {
m1 := NewMockNoopPluginFactory()
m2 := NewMockNoopPluginFactory()
tests := []struct {
name string
primaryRegistry Registry
registryToMerge Registry
expected Registry
shouldError bool
}{
{
name: "valid Merge",
primaryRegistry: Registry{
"pluginFactory1": m1,
},
registryToMerge: Registry{
"pluginFactory2": m2,
},
expected: Registry{
"pluginFactory1": m1,
"pluginFactory2": m2,
},
shouldError: false,
},
{
name: "Merge duplicate factories",
primaryRegistry: Registry{
"pluginFactory1": m1,
},
registryToMerge: Registry{
"pluginFactory1": m2,
},
expected: Registry{
"pluginFactory1": m1,
},
shouldError: true,
},
}
for _, scenario := range tests {
t.Run(scenario.name, func(t *testing.T) {
err := scenario.primaryRegistry.Merge(scenario.registryToMerge)
if (err == nil) == scenario.shouldError {
t.Errorf("Merge() shouldError is: %v, however err is: %v.", scenario.shouldError, err)
return
}
if !isRegistryEqual(scenario.expected, scenario.primaryRegistry) {
t.Errorf("Merge(). Expected %v. Got %v instead.", scenario.expected, scenario.primaryRegistry)
}
})
}
}

View File

@ -64,9 +64,34 @@ var (
Buckets: prometheus.ExponentialBuckets(0.001, 2, 10), Buckets: prometheus.ExponentialBuckets(0.001, 2, 10),
}, []string{"result", "type", "step"}) }, []string{"result", "type", "step"})
// FrameworkExtensionPointDuration is the metrics which indicates the latency for running all plugins of a specific extension point.
FrameworkExtensionPointDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: SchedulerEstimatorSubsystem,
Name: "estimating_plugin_extension_point_duration_seconds",
Help: "Latency for running all plugins of a specific extension point.",
// Start with 0.1ms with the last bucket being [~200ms, Inf)
Buckets: prometheus.ExponentialBuckets(0.0001, 2, 12),
},
[]string{"estimating_plugin_extension_point"})
// PluginExecutionDuration is the metrics which indicates the duration for running a plugin at a specific extension point.
PluginExecutionDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: SchedulerEstimatorSubsystem,
Name: "estimating_plugin_execution_duration_seconds",
Help: "Duration for running a plugin at a specific extension point.",
// Start with 0.01ms with the last bucket being [~22ms, Inf). We use a small factor (1.5)
// so that we have better granularity since plugin latency is very sensitive.
Buckets: prometheus.ExponentialBuckets(0.00001, 1.5, 20),
},
[]string{"estimating_plugin", "estimating_plugin_extension_point"})
metrics = []prometheus.Collector{ metrics = []prometheus.Collector{
requestCount, requestCount,
estimatingAlgorithmLatency, estimatingAlgorithmLatency,
FrameworkExtensionPointDuration,
PluginExecutionDuration,
} }
) )

View File

@ -43,6 +43,9 @@ import (
"github.com/karmada-io/karmada/cmd/scheduler-estimator/app/options" "github.com/karmada-io/karmada/cmd/scheduler-estimator/app/options"
"github.com/karmada-io/karmada/pkg/estimator/pb" "github.com/karmada-io/karmada/pkg/estimator/pb"
"github.com/karmada-io/karmada/pkg/estimator/server/framework"
frameworkplugins "github.com/karmada-io/karmada/pkg/estimator/server/framework/plugins"
frameworkruntime "github.com/karmada-io/karmada/pkg/estimator/server/framework/runtime"
"github.com/karmada-io/karmada/pkg/estimator/server/metrics" "github.com/karmada-io/karmada/pkg/estimator/server/metrics"
"github.com/karmada-io/karmada/pkg/estimator/server/replica" "github.com/karmada-io/karmada/pkg/estimator/server/replica"
estimatorservice "github.com/karmada-io/karmada/pkg/estimator/service" estimatorservice "github.com/karmada-io/karmada/pkg/estimator/service"
@ -70,15 +73,16 @@ var (
// AccurateSchedulerEstimatorServer is the gRPC server of a cluster accurate scheduler estimator. // AccurateSchedulerEstimatorServer is the gRPC server of a cluster accurate scheduler estimator.
// Please see https://github.com/karmada-io/karmada/pull/580 (#580). // Please see https://github.com/karmada-io/karmada/pull/580 (#580).
type AccurateSchedulerEstimatorServer struct { type AccurateSchedulerEstimatorServer struct {
port int port int
clusterName string clusterName string
kubeClient kubernetes.Interface kubeClient kubernetes.Interface
restMapper meta.RESTMapper restMapper meta.RESTMapper
informerFactory informers.SharedInformerFactory informerFactory informers.SharedInformerFactory
nodeLister listv1.NodeLister nodeLister listv1.NodeLister
replicaLister *replica.ListerWrapper replicaLister *replica.ListerWrapper
informerManager genericmanager.SingleClusterInformerManager informerManager genericmanager.SingleClusterInformerManager
parallelizer parallelize.Parallelizer parallelizer parallelize.Parallelizer
estimateFramework framework.Framework
Cache schedcache.Cache Cache schedcache.Cache
} }
@ -90,7 +94,7 @@ func NewEstimatorServer(
discoveryClient discovery.DiscoveryInterface, discoveryClient discovery.DiscoveryInterface,
opts *options.Options, opts *options.Options,
stopChan <-chan struct{}, stopChan <-chan struct{},
) *AccurateSchedulerEstimatorServer { ) (*AccurateSchedulerEstimatorServer, error) {
cachedDiscoClient := cacheddiscovery.NewMemCacheClient(discoveryClient) cachedDiscoClient := cacheddiscovery.NewMemCacheClient(discoveryClient)
restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoClient) restMapper := restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscoClient)
informerFactory := informers.NewSharedInformerFactory(kubeClient, 0) informerFactory := informers.NewSharedInformerFactory(kubeClient, 0)
@ -120,9 +124,19 @@ func NewEstimatorServer(
es.informerManager.Lister(gvr) es.informerManager.Lister(gvr)
} }
registry := frameworkplugins.NewInTreeRegistry()
estimateFramework, err := frameworkruntime.NewFramework(registry,
frameworkruntime.WithClientSet(kubeClient),
frameworkruntime.WithInformerFactory(informerFactory),
)
if err != nil {
return es, err
}
es.estimateFramework = estimateFramework
addAllEventHandlers(es, informerFactory) addAllEventHandlers(es, informerFactory)
return es return es, nil
} }
// Start runs the accurate replica estimator server. // Start runs the accurate replica estimator server.
@ -193,7 +207,6 @@ func (es *AccurateSchedulerEstimatorServer) MaxAvailableReplicas(ctx context.Con
if request.Cluster != es.clusterName { if request.Cluster != es.clusterName {
return nil, fmt.Errorf("cluster name does not match, got: %s, desire: %s", request.Cluster, es.clusterName) return nil, fmt.Errorf("cluster name does not match, got: %s, desire: %s", request.Cluster, es.clusterName)
} }
maxReplicas, err := es.EstimateReplicas(ctx, object, request) maxReplicas, err := es.EstimateReplicas(ctx, object, request)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to estimate replicas: %v", err) return nil, fmt.Errorf("failed to estimate replicas: %v", err)

View File

@ -244,7 +244,7 @@ func TestAccurateSchedulerEstimatorServer_MaxAvailableReplicas(t *testing.T) {
}, },
} }
es := NewEstimatorServer(fake.NewSimpleClientset(tt.objs...), dynamicClient, discoveryClient, opt, ctx.Done()) es, _ := NewEstimatorServer(fake.NewSimpleClientset(tt.objs...), dynamicClient, discoveryClient, opt, ctx.Done())
es.informerFactory.Start(ctx.Done()) es.informerFactory.Start(ctx.Done())
es.informerFactory.WaitForCacheSync(ctx.Done()) es.informerFactory.WaitForCacheSync(ctx.Done())
@ -396,7 +396,7 @@ func BenchmarkAccurateSchedulerEstimatorServer_MaxAvailableReplicas(b *testing.B
objs = append(objs, pod) objs = append(objs, pod)
} }
es := NewEstimatorServer(fake.NewSimpleClientset(objs...), dynamicClient, discoveryClient, opt, ctx.Done()) es, _ := NewEstimatorServer(fake.NewSimpleClientset(objs...), dynamicClient, discoveryClient, opt, ctx.Done())
es.informerFactory.Start(ctx.Done()) es.informerFactory.Start(ctx.Done())
es.informerFactory.WaitForCacheSync(ctx.Done()) es.informerFactory.WaitForCacheSync(ctx.Done())