package scheduler import ( "context" "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" memclusterapi "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" "github.com/karmada-io/karmada/pkg/apis/policy/v1alpha1" karmadaclientset "github.com/karmada-io/karmada/pkg/generated/clientset/versioned" informerfactory "github.com/karmada-io/karmada/pkg/generated/informers/externalversions" lister "github.com/karmada-io/karmada/pkg/generated/listers/propagationstrategy/v1alpha1" schedulercache "github.com/karmada-io/karmada/pkg/scheduler/cache" "github.com/karmada-io/karmada/pkg/scheduler/core" "github.com/karmada-io/karmada/pkg/scheduler/framework/plugins/clusteraffinity" ) const ( // maxRetries is the number of times a service will be retried before it is dropped out of the queue. // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the // sequence of delays between successive queuings of a propagationbinding. // // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s maxRetries = 15 ) // Scheduler is the scheduler schema, which is used to schedule a specific resource to specific clusters type Scheduler struct { DynamicClient dynamic.Interface KarmadaClient karmadaclientset.Interface KubeClient kubernetes.Interface bindingInformer cache.SharedIndexInformer bindingLister lister.PropagationBindingLister policyInformer cache.SharedIndexInformer policyLister lister.PropagationPolicyLister informerFactory informerfactory.SharedInformerFactory // TODO: implement a priority scheduling queue queue workqueue.RateLimitingInterface Algorithm core.ScheduleAlgorithm schedulerCache schedulercache.Cache } // NewScheduler instantiates a scheduler func NewScheduler(dynamicClient dynamic.Interface, karmadaClient karmadaclientset.Interface, kubeClient kubernetes.Interface) *Scheduler { factory := informerfactory.NewSharedInformerFactory(karmadaClient, 0) bindingInformer := factory.Propagationstrategy().V1alpha1().PropagationBindings().Informer() bindingLister := factory.Propagationstrategy().V1alpha1().PropagationBindings().Lister() policyInformer := factory.Propagationstrategy().V1alpha1().PropagationPolicies().Informer() policyLister := factory.Propagationstrategy().V1alpha1().PropagationPolicies().Lister() queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()) schedulerCache := schedulercache.NewCache() // TODO: make plugins as a flag algorithm := core.NewGenericScheduler(schedulerCache, policyLister, []string{clusteraffinity.Name}) sched := &Scheduler{ DynamicClient: dynamicClient, KarmadaClient: karmadaClient, KubeClient: kubeClient, bindingInformer: bindingInformer, bindingLister: bindingLister, policyInformer: policyInformer, policyLister: policyLister, informerFactory: factory, queue: queue, Algorithm: algorithm, schedulerCache: schedulerCache, } bindingInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: sched.onPropagationBindingAdd, UpdateFunc: sched.onPropagationBindingUpdate, }) memclusterInformer := factory.Cluster().V1alpha1().Clusters().Informer() memclusterInformer.AddEventHandler( cache.ResourceEventHandlerFuncs{ AddFunc: sched.addCluster, UpdateFunc: sched.updateCluster, DeleteFunc: sched.deleteCluster, }, ) return sched } // Run runs the scheduler func (s *Scheduler) Run(ctx context.Context) { stopCh := ctx.Done() klog.Infof("Starting karmada scheduler") defer klog.Infof("Shutting down karmada scheduler") s.informerFactory.Start(stopCh) if !cache.WaitForCacheSync(stopCh, s.bindingInformer.HasSynced) { return } go wait.Until(s.worker, time.Second, stopCh) <-stopCh } func (s *Scheduler) onPropagationBindingAdd(obj interface{}) { propagationBinding := obj.(*v1alpha1.PropagationBinding) if len(propagationBinding.Spec.Clusters) > 0 { return } key, err := cache.MetaNamespaceKeyFunc(obj) if err != nil { klog.Errorf("couldn't get key for object %#v: %v", obj, err) return } s.queue.Add(key) } func (s *Scheduler) onPropagationBindingUpdate(old, cur interface{}) { s.onPropagationBindingAdd(cur) } func (s *Scheduler) worker() { for s.scheduleNext() { } } func (s *Scheduler) scheduleNext() bool { key, shutdown := s.queue.Get() if shutdown { klog.Errorf("Fail to pop item from queue") return false } defer s.queue.Done(key) err := s.scheduleOne(key.(string)) s.handleErr(err, key) return true } func (s *Scheduler) scheduleOne(key string) (err error) { klog.V(4).Infof("begin scheduling PropagationBinding %s", key) defer klog.V(4).Infof("end scheduling PropagationBinding %s: %v", key, err) ns, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { return err } propagationBinding, err := s.bindingLister.PropagationBindings(ns).Get(name) if errors.IsNotFound(err) { return nil } scheduleResult, err := s.Algorithm.Schedule(context.TODO(), propagationBinding) if err != nil { klog.V(2).Infof("failed scheduling PropagationBinding %s: %v", key, err) return err } klog.V(4).Infof("PropagationBinding %s scheduled to clusters %v", key, scheduleResult.SuggestedClusters) binding := propagationBinding.DeepCopy() targetClusters := make([]v1alpha1.TargetCluster, len(scheduleResult.SuggestedClusters)) for i, cluster := range scheduleResult.SuggestedClusters { targetClusters[i] = v1alpha1.TargetCluster{Name: cluster} } binding.Spec.Clusters = targetClusters _, err = s.KarmadaClient.PropagationstrategyV1alpha1().PropagationBindings(ns).Update(context.TODO(), binding, metav1.UpdateOptions{}) if err != nil { return err } return nil } func (s *Scheduler) handleErr(err error, key interface{}) { if err == nil || errors.HasStatusCause(err, v1.NamespaceTerminatingCause) { s.queue.Forget(key) return } if s.queue.NumRequeues(key) < maxRetries { s.queue.AddRateLimited(key) return } utilruntime.HandleError(err) klog.V(2).Infof("Dropping propagationbinding %q out of the queue: %v", key, err) s.queue.Forget(key) } func (s *Scheduler) addCluster(obj interface{}) { cluster, ok := obj.(*memclusterapi.Cluster) if !ok { klog.Errorf("cannot convert to Cluster: %v", obj) return } klog.V(3).Infof("add event for cluster %s", cluster.Name) s.schedulerCache.AddCluster(cluster) } func (s *Scheduler) updateCluster(_, newObj interface{}) { newCluster, ok := newObj.(*memclusterapi.Cluster) if !ok { klog.Errorf("cannot convert newObj to Cluster: %v", newObj) return } klog.V(3).Infof("update event for cluster %s", newCluster.Name) s.schedulerCache.UpdateCluster(newCluster) } func (s *Scheduler) deleteCluster(obj interface{}) { var cluster *memclusterapi.Cluster switch t := obj.(type) { case *memclusterapi.Cluster: cluster = t case cache.DeletedFinalStateUnknown: var ok bool cluster, ok = t.Obj.(*memclusterapi.Cluster) if !ok { klog.Errorf("cannot convert to memclusterapi.Cluster: %v", t.Obj) return } default: klog.Errorf("cannot convert to memclusterapi.Cluster: %v", t) return } klog.V(3).Infof("delete event for cluster %s", cluster.Name) s.schedulerCache.DeleteCluster(cluster) }