Add controller-manager framework

This commit is contained in:
RainbowMango 2020-11-11 12:05:06 +08:00 committed by Hongcai Ren
parent 6d926f23f0
commit 135b0dbdda
6 changed files with 516 additions and 0 deletions

View File

@ -0,0 +1,113 @@
package app
import (
"context"
"errors"
"flag"
"fmt"
"io/ioutil"
"os"
"strings"
"time"
"github.com/spf13/cobra"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/component-base/logs"
"k8s.io/klog/v2"
"github.com/huawei-cloudnative/karmada/cmd/controller-manager/app/leaderelection"
"github.com/huawei-cloudnative/karmada/cmd/controller-manager/app/options"
"github.com/huawei-cloudnative/karmada/pkg/controllers/membercluster"
"github.com/huawei-cloudnative/karmada/pkg/controllers/util"
)
// NewControllerManagerCommand creates a *cobra.Command object with default parameters
func NewControllerManagerCommand(stopChan <-chan struct{}) *cobra.Command {
verFlag := false
opts := options.NewOptions()
cmd := &cobra.Command{
Use: "controller-manager",
Long: `The controller manager runs a bunch of controllers`,
Run: func(cmd *cobra.Command, args []string) {
if verFlag {
os.Exit(0)
}
opts.Complete()
if err := Run(opts, stopChan); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
},
}
cmd.Flags().AddGoFlagSet(flag.CommandLine)
cmd.Flags().BoolVar(&verFlag, "version", false, "Prints the version info of controller manager.")
return cmd
}
// Run runs the controller-manager with options. This should never exit.
func Run(opts *options.Options, stopChan <-chan struct{}) error {
logs.InitLogs()
defer logs.FlushLogs()
var err error
// TODO(RainbowMango): need to change to shim kube-apiserver config.
opts.KubeConfig, err = clientcmd.BuildConfigFromFlags("", "")
if err != nil {
panic(err)
}
if len(opts.HostNamespace) == 0 {
// For in-cluster deployment set the namespace associated with the service account token
data, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
if err != nil {
klog.Fatalf("An error occurred while attempting to discover the namespace from the service account: %v", err)
}
opts.HostNamespace = strings.TrimSpace(string(data))
}
// Validate if the namespace is configured
if len(opts.HostNamespace) == 0 {
klog.Fatalf("The namespace must be specified")
}
elector, err := leaderelection.NewLeaderElector(opts, startControllers)
if err != nil {
panic(err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
select {
case <-stopChan:
cancel()
case <-ctx.Done():
}
}()
elector.Run(ctx)
klog.Errorf("lost lease")
return errors.New("lost lease")
}
func startControllers(opts *options.Options, stopChan <-chan struct{}) {
controllerConfig := &util.ControllerConfig{
HeadClusterConfig: opts.KubeConfig,
}
if err := membercluster.StartMemberClusterController(controllerConfig, stopChan); err != nil {
klog.Fatalf("Failed to start member cluster controller. error: %v", err)
}
for {
klog.Info("starting controller.")
time.Sleep(5 * time.Second)
}
}

View File

@ -0,0 +1,70 @@
package leaderelection
import (
"context"
"os"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/uuid"
kubeclient "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"github.com/huawei-cloudnative/karmada/cmd/controller-manager/app/options"
)
func NewLeaderElector(opts *options.Options, fnStartControllers func(*options.Options, <-chan struct{})) (*leaderelection.LeaderElector, error) {
const component = "controller-manager"
rest.AddUserAgent(opts.KubeConfig, "leader-election")
leaderElectionClient := kubeclient.NewForConfigOrDie(opts.KubeConfig)
// Prepare event clients.
broadcaster := record.NewBroadcaster()
broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: leaderElectionClient.CoreV1().Events(opts.HostNamespace)})
eventRecorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: component})
// add a uniquifier so that two processes on the same host don't accidentally both become active
hostname, err := os.Hostname()
if err != nil {
klog.Infof("unable to get hostname: %v", err)
return nil, err
}
id := hostname + "_" + string(uuid.NewUUID())
rl, err := resourcelock.New(opts.LeaderElection.ResourceLock,
opts.HostNamespace,
component,
leaderElectionClient.CoreV1(),
leaderElectionClient.CoordinationV1(),
resourcelock.ResourceLockConfig{
Identity: id,
EventRecorder: eventRecorder,
})
if err != nil {
klog.Infof("couldn't create resource lock: %v", err)
return nil, err
}
return leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{
Lock: rl,
LeaseDuration: opts.LeaderElection.LeaseDuration.Duration,
RenewDeadline: opts.LeaderElection.RenewDeadline.Duration,
RetryPeriod: opts.LeaderElection.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: func(ctx context.Context) {
klog.Info("promoted as leader")
stopChan := ctx.Done()
fnStartControllers(opts, stopChan)
<-stopChan
},
OnStoppedLeading: func() {
klog.Info("leader election lost")
},
},
})
}

View File

@ -0,0 +1,56 @@
package options
import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/leaderelection/resourcelock"
componentbaseconfig "k8s.io/component-base/config"
"k8s.io/klog/v2"
)
var (
defaultElectionLeaseDuration = metav1.Duration{Duration: 15 * time.Second}
defaultElectionRenewDeadline = metav1.Duration{Duration: 10 * time.Second}
defaultElectionRetryPeriod = metav1.Duration{Duration: 2 * time.Second}
)
// Options contains everything necessary to create and run controller-manager.
type Options struct {
KubeConfig *rest.Config
HostNamespace string
LeaderElection componentbaseconfig.LeaderElectionConfiguration
}
func NewOptions() *Options {
return &Options{}
}
// Complete fills in any fields not set that are required to have valid data. It's mutating the receiver.
func (o *Options) Complete() {
if len(o.HostNamespace) == 0 {
o.HostNamespace = "default"
klog.Infof("Set default value: Options.HostNamespace = %s", "default")
}
if len(o.LeaderElection.ResourceLock) == 0 {
o.LeaderElection.ResourceLock = resourcelock.EndpointsLeasesResourceLock
klog.Infof("Set default value: Options.LeaderElection.ResourceLock = %s", resourcelock.EndpointsLeasesResourceLock)
}
if o.LeaderElection.LeaseDuration.Duration.Seconds() == 0 {
o.LeaderElection.LeaseDuration = defaultElectionLeaseDuration
klog.Infof("Set default value: Options.LeaderElection.LeaseDuration = %s", defaultElectionLeaseDuration.Duration.String())
}
if o.LeaderElection.RenewDeadline.Duration.Seconds() == 0 {
o.LeaderElection.RenewDeadline = defaultElectionRenewDeadline
klog.Infof("Set default value: Options.LeaderElection.RenewDeadline = %s", defaultElectionRenewDeadline.Duration.String())
}
if o.LeaderElection.RetryPeriod.Duration.Seconds() == 0 {
o.LeaderElection.RetryPeriod = defaultElectionRetryPeriod
klog.Infof("Set default value: Options.LeaderElection.RetryPeriod = %s", defaultElectionRetryPeriod.Duration.String())
}
}

View File

@ -0,0 +1,24 @@
package main
import (
"fmt"
"os"
apiserver "k8s.io/apiserver/pkg/server"
"k8s.io/component-base/logs"
"github.com/huawei-cloudnative/karmada/cmd/controller-manager/app"
)
// Controller-manager main.
func main() {
logs.InitLogs()
defer logs.FlushLogs()
stopChan := apiserver.SetupSignalHandler()
if err := app.NewControllerManagerCommand(stopChan).Execute(); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
os.Exit(1)
}
}

View File

@ -0,0 +1,244 @@
package membercluster
import (
"fmt"
"time"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"github.com/huawei-cloudnative/karmada/pkg/controllers/util"
clientset "github.com/huawei-cloudnative/karmada/pkg/generated/clientset/versioned"
multikubecheme "github.com/huawei-cloudnative/karmada/pkg/generated/clientset/versioned/scheme"
informers "github.com/huawei-cloudnative/karmada/pkg/generated/informers/externalversions"
listers "github.com/huawei-cloudnative/karmada/pkg/generated/listers/membercluster/v1alpha1"
)
const controllerAgentName = "membercluster-controller"
// Controller is the controller implementation for membercluster resources
type Controller struct {
// karmadaClientSet is the clientset for our own API group.
karmadaClientSet clientset.Interface
// kubeClientSet is a standard kubernetes clientset.
kubeClientSet kubernetes.Interface
memberclusterLister listers.MemberClusterLister
memberclusterSynced cache.InformerSynced
// workqueue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
// means we can ensure we only process a fixed amount of resources at a
// time, and makes it easy to ensure we are never processing the same item
// simultaneously in two different workers.
workqueue workqueue.RateLimitingInterface
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
eventRecorder record.EventRecorder
}
// StartMemberClusterController starts a new cluster controller.
func StartMemberClusterController(config *util.ControllerConfig, stopChan <-chan struct{}) error {
controller, err := newMemberClusterController(config)
if err != nil {
return err
}
klog.Infof("Starting member cluster controller")
go wait.Until(func() {
if err := controller.Run(1, stopChan); err != nil {
klog.Errorf("controller exit unexpected! will restart later, controller: %s, error: %v", controllerAgentName, err)
}
}, 1*time.Second, stopChan)
return nil
}
// newMemberClusterController returns a new controller.
func newMemberClusterController(config *util.ControllerConfig) (*Controller, error) {
headClusterConfig := rest.CopyConfig(config.HeadClusterConfig)
kubeClientSet := kubernetes.NewForConfigOrDie(headClusterConfig)
multikubeClientSet := clientset.NewForConfigOrDie(headClusterConfig)
memberclusterInformer := informers.NewSharedInformerFactory(multikubeClientSet, 0).Membercluster().V1alpha1().MemberClusters()
// Add multikube types to the default Kubernetes Scheme so Events can be logged for karmada types.
utilruntime.Must(multikubecheme.AddToScheme(scheme.Scheme))
// Create event broadcaster
klog.V(1).Infof("Creating event broadcaster for %s", controllerAgentName)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClientSet.CoreV1().Events("")})
controller := &Controller{
karmadaClientSet: multikubeClientSet,
kubeClientSet: kubeClientSet,
memberclusterLister: memberclusterInformer.Lister(),
memberclusterSynced: memberclusterInformer.Informer().HasSynced,
workqueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), controllerAgentName),
eventRecorder: eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerAgentName}),
}
klog.Info("Setting up event handlers")
memberclusterInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
klog.Infof("Received add event. just add to queue.")
controller.enqueueEventResource(obj)
},
UpdateFunc: func(old, new interface{}) {
klog.Infof("Received update event. just add to queue.")
controller.enqueueEventResource(new)
},
DeleteFunc: func(obj interface{}) {
klog.Infof("Received delete event. Do nothing just log.")
},
})
return controller, nil
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the workqueue and wait for
// workers to finish processing their current work items.
func (c *Controller) Run(workerNumber int, stopCh <-chan struct{}) error {
defer utilruntime.HandleCrash()
defer c.workqueue.ShutDown()
klog.Infof("Starting controller: %s", controllerAgentName)
// Wait for the caches to be synced before starting workers
klog.Info("Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(stopCh, c.memberclusterSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
klog.Infof("Starting workers for controller. worker number: %d, controller: %s", workerNumber, controllerAgentName)
for i := 0; i < workerNumber; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
// Controller will block here until stopCh is closed.
<-stopCh
klog.Info("Shutting down workers")
return nil
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// workqueue.
func (c *Controller) runWorker() {
for c.processNextWorkItem() {
}
}
// processNextWorkItem will read a single work item off the workqueue and
// attempt to process it, by calling the syncHandler.
func (c *Controller) processNextWorkItem() bool {
obj, shutdown := c.workqueue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.workqueue.Done.
err := func(obj interface{}) error {
// We call Done here so the workqueue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the workqueue and attempted again after a back-off
// period.
defer c.workqueue.Done(obj)
var key string
var ok bool
// We expect strings to come off the workqueue. These are of the
// form namespace/name. We do this as the delayed nature of the
// workqueue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// workqueue.
if key, ok = obj.(string); !ok {
// As the item in the workqueue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.workqueue.Forget(obj)
utilruntime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
// Run the syncHandler, passing it the namespace/name string of the
// PropagateStrategy resource to be synced.
if err := c.syncHandler(key); err != nil {
// Put the item back on the workqueue to handle any transient errors.
c.workqueue.AddRateLimited(key)
return fmt.Errorf("error syncing '%s': %s, requeuing", key, err.Error())
}
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.workqueue.Forget(obj)
klog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
utilruntime.HandleError(err)
return true
}
return true
}
// syncHandler compares the actual state with the desired, and attempts to
// converge the two. It then updates the Status block of the PropagateStrategy resource
// with the current status of the resource.
func (c *Controller) syncHandler(key string) error {
// Convert the namespace/name string into a distinct namespace and name
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
utilruntime.HandleError(fmt.Errorf("invalid resource key: %s", key))
return nil
}
// Get the resource with this namespace/name
membercluster, err := c.memberclusterLister.MemberClusters(namespace).Get(name)
if err != nil {
// The membercluster resource may no longer exist, in which case we stop
// processing.
if errors.IsNotFound(err) {
utilruntime.HandleError(fmt.Errorf("membercluster '%s' in work queue no longer exists", key))
return nil
}
return err
}
klog.Infof("Sync member cluster: %s/%s", membercluster.Namespace, membercluster.Name)
return nil
}
// enqueueFoo takes a resource and converts it into a namespace/name
// string which is then put onto the work queue. This method should *not* be
// passed resources of any type other than membercluster.
func (c *Controller) enqueueEventResource(obj interface{}) {
var key string
var err error
if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
utilruntime.HandleError(err)
return
}
c.workqueue.Add(key)
}

View File

@ -0,0 +1,9 @@
package util
import "k8s.io/client-go/rest"
// ControllerConfig defines the common configuration shared by most of controllers.
type ControllerConfig struct {
// HeadClusterConfig holds the configuration of head cluster.
HeadClusterConfig *rest.Config
}