mirror of https://github.com/linkerd/linkerd2.git
404 lines
13 KiB
Go
404 lines
13 KiB
Go
package servicemirror
|
|
|
|
import (
|
|
"context"
|
|
"flag"
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/linkerd/linkerd2/controller/gen/apis/link/v1alpha3"
|
|
l5dcrdclient "github.com/linkerd/linkerd2/controller/gen/client/clientset/versioned"
|
|
controllerK8s "github.com/linkerd/linkerd2/controller/k8s"
|
|
servicemirror "github.com/linkerd/linkerd2/multicluster/service-mirror"
|
|
"github.com/linkerd/linkerd2/pkg/admin"
|
|
"github.com/linkerd/linkerd2/pkg/flags"
|
|
"github.com/linkerd/linkerd2/pkg/k8s"
|
|
sm "github.com/linkerd/linkerd2/pkg/servicemirror"
|
|
log "github.com/sirupsen/logrus"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/client-go/kubernetes"
|
|
"k8s.io/client-go/tools/clientcmd"
|
|
"k8s.io/client-go/tools/leaderelection"
|
|
"k8s.io/client-go/tools/leaderelection/resourcelock"
|
|
)
|
|
|
|
const (
|
|
linkWatchRestartAfter = 10 * time.Second
|
|
// Duration of the lease
|
|
LEASE_DURATION = 30 * time.Second
|
|
// Deadline for the leader to refresh its lease. Defaults to the same value
|
|
// used by core controllers
|
|
LEASE_RENEW_DEADLINE = 10 * time.Second
|
|
// Duration leader elector clients should wait between action re-tries.
|
|
// Defaults to the same value used by core controllers
|
|
LEASE_RETRY_PERIOD = 2 * time.Second
|
|
)
|
|
|
|
var (
|
|
clusterWatcher *servicemirror.RemoteClusterServiceWatcher
|
|
probeWorker *servicemirror.ProbeWorker
|
|
)
|
|
|
|
// Main executes the service-mirror controller
|
|
func Main(args []string) {
|
|
cmd := flag.NewFlagSet("service-mirror", flag.ExitOnError)
|
|
|
|
kubeConfigPath := cmd.String("kubeconfig", "", "path to the local kube config")
|
|
requeueLimit := cmd.Int("event-requeue-limit", 3, "requeue limit for events")
|
|
metricsAddr := cmd.String("metrics-addr", ":9999", "address to serve scrapable metrics on")
|
|
namespace := cmd.String("namespace", "", "namespace containing Link and credentials Secret")
|
|
repairPeriod := cmd.Duration("endpoint-refresh-period", 1*time.Minute, "frequency to refresh endpoint resolution")
|
|
enableHeadlessSvc := cmd.Bool("enable-headless-services", false, "toggle support for headless service mirroring")
|
|
enableNamespaceCreation := cmd.Bool("enable-namespace-creation", false, "toggle support for namespace creation")
|
|
enablePprof := cmd.Bool("enable-pprof", false, "Enable pprof endpoints on the admin server")
|
|
localMirror := cmd.Bool("local-mirror", false, "watch the local cluster for federated service members")
|
|
federatedServiceSelector := cmd.String("federated-service-selector", k8s.DefaultFederatedServiceSelector, "Selector (label query) for federated service members in the local cluster")
|
|
probeSvc := cmd.String("probe-service", "", "Name of the target cluster probe service")
|
|
|
|
flags.ConfigureAndParse(cmd, args)
|
|
linkName := cmd.Arg(0)
|
|
|
|
ready := false
|
|
adminServer := admin.NewServer(*metricsAddr, *enablePprof, &ready)
|
|
|
|
go func() {
|
|
log.Infof("starting admin server on %s", *metricsAddr)
|
|
if err := adminServer.ListenAndServe(); err != nil {
|
|
log.Errorf("failed to start service mirror admin server: %s", err)
|
|
}
|
|
}()
|
|
|
|
rootCtx, cancel := context.WithCancel(context.Background())
|
|
|
|
stop := make(chan os.Signal, 1)
|
|
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
|
|
go func() {
|
|
<-stop
|
|
log.Info("Received shutdown signal")
|
|
// Cancel root context. Cancellation will be propagated to all other
|
|
// contexts that are children of the root context.
|
|
cancel()
|
|
}()
|
|
|
|
// We create two different kubernetes API clients for the local cluster:
|
|
// l5dClient is used for watching Link resources and updating their
|
|
// statuses.
|
|
//
|
|
// controllerK8sAPI is used by the cluster watcher to manage
|
|
// mirror resources such as services, namespaces, and endpoints.
|
|
|
|
controllerK8sAPI, err := controllerK8s.InitializeAPI(
|
|
rootCtx,
|
|
*kubeConfigPath,
|
|
false,
|
|
"local",
|
|
controllerK8s.NS,
|
|
controllerK8s.Svc,
|
|
controllerK8s.Endpoint,
|
|
controllerK8s.Link,
|
|
)
|
|
if err != nil {
|
|
log.Fatalf("Failed to initialize K8s API: %s", err)
|
|
}
|
|
config, err := k8s.GetConfig(*kubeConfigPath, "")
|
|
if err != nil {
|
|
log.Fatalf("error configuring Kubernetes API client: %s", err)
|
|
}
|
|
l5dClient, err := controllerK8s.NewL5DCRDClient(config)
|
|
if err != nil {
|
|
log.Fatalf("Failed to initialize K8s API: %s", err)
|
|
}
|
|
|
|
metrics := servicemirror.NewProbeMetricVecs()
|
|
controllerK8sAPI.Sync(nil)
|
|
ready = true
|
|
|
|
var run func(context.Context)
|
|
|
|
if *localMirror {
|
|
run = func(ctx context.Context) {
|
|
err = startLocalClusterWatcher(ctx, *namespace, controllerK8sAPI, l5dClient, *requeueLimit, *repairPeriod, *enableHeadlessSvc, *enableNamespaceCreation, *federatedServiceSelector)
|
|
if err != nil {
|
|
log.Fatalf("Failed to start local cluster watcher: %s", err)
|
|
}
|
|
|
|
// ctx.Done() is a one-shot channel that will be closed once
|
|
// the context has been cancelled. Receiving from a closed
|
|
// channel yields the value immediately.
|
|
<-ctx.Done()
|
|
// The channel will be closed by the leader elector when a
|
|
// lease is lost, or by a background task handling SIGTERM.
|
|
// Before terminating the loop, stop the workers and set
|
|
// them to nil to release memory.
|
|
cleanupWorkers()
|
|
}
|
|
} else {
|
|
run = func(ctx context.Context) {
|
|
// Use a small buffered channel for Link updates to avoid dropping
|
|
// updates if there is an update burst.
|
|
results := make(chan *v1alpha3.Link, 100)
|
|
|
|
_, err := controllerK8sAPI.Link().Informer().AddEventHandler(servicemirror.GetLinkHandlers(results, linkName))
|
|
if err != nil {
|
|
log.Fatalf("Failed to add event handler to Link informer: %s", err)
|
|
}
|
|
|
|
// Each time the link resource is updated, reload the config and restart the
|
|
// cluster watcher.
|
|
for {
|
|
select {
|
|
// ctx.Done() is a one-shot channel that will be closed once
|
|
// the context has been cancelled. Receiving from a closed
|
|
// channel yields the value immediately.
|
|
case <-ctx.Done():
|
|
// The channel will be closed by the leader elector when a
|
|
// lease is lost, or by a background task handling SIGTERM.
|
|
// Before terminating the loop, stop the workers and set
|
|
// them to nil to release memory.
|
|
cleanupWorkers()
|
|
case link := <-results:
|
|
if link != nil {
|
|
log.Infof("Got updated link %s: %+v", linkName, link)
|
|
creds, err := loadCredentials(ctx, link, *namespace, controllerK8sAPI.Client)
|
|
if err != nil {
|
|
log.Errorf("Failed to load remote cluster credentials: %s", err)
|
|
}
|
|
err = restartClusterWatcher(ctx, link, *namespace, *probeSvc, creds, controllerK8sAPI, l5dClient, *requeueLimit, *repairPeriod, metrics, *enableHeadlessSvc, *enableNamespaceCreation)
|
|
if err != nil {
|
|
// failed to restart cluster watcher; give a bit of slack
|
|
// and requeue the link to give it another try
|
|
log.Error(err)
|
|
time.Sleep(linkWatchRestartAfter)
|
|
results <- link
|
|
}
|
|
} else {
|
|
log.Infof("Link %s deleted", linkName)
|
|
cleanupWorkers()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
hostname, found := os.LookupEnv("HOSTNAME")
|
|
if !found {
|
|
log.Fatal("Failed to fetch 'HOSTNAME' environment variable")
|
|
}
|
|
|
|
leaseName := fmt.Sprintf("service-mirror-write-%s", linkName)
|
|
if *localMirror {
|
|
leaseName = "local-service-mirror-write"
|
|
}
|
|
lock := &resourcelock.LeaseLock{
|
|
LeaseMeta: metav1.ObjectMeta{
|
|
Name: leaseName,
|
|
Namespace: *namespace,
|
|
},
|
|
Client: controllerK8sAPI.Client.CoordinationV1(),
|
|
LockConfig: resourcelock.ResourceLockConfig{
|
|
Identity: hostname,
|
|
},
|
|
}
|
|
|
|
election:
|
|
for {
|
|
// RunOrDie will block until the lease is lost.
|
|
//
|
|
// When a lease is acquired, the OnStartedLeading callback will be
|
|
// triggered, and a main watcher loop will be established to watch Link
|
|
// resources.
|
|
//
|
|
// When the lease is lost, all watchers will be cleaned-up and we will
|
|
// loop then attempt to re-acquire the lease.
|
|
leaderelection.RunOrDie(rootCtx, leaderelection.LeaderElectionConfig{
|
|
// When runtime context is cancelled, lock will be released. Implies any
|
|
// code guarded by the lease _must_ finish before cancelling.
|
|
ReleaseOnCancel: true,
|
|
Lock: lock,
|
|
LeaseDuration: LEASE_DURATION,
|
|
RenewDeadline: LEASE_RENEW_DEADLINE,
|
|
RetryPeriod: LEASE_RETRY_PERIOD,
|
|
Callbacks: leaderelection.LeaderCallbacks{
|
|
OnStartedLeading: func(ctx context.Context) {
|
|
// When a lease is lost, RunOrDie will cancel the context
|
|
// passed into the OnStartedLeading callback. This will in
|
|
// turn cause us to cancel the work in the run() function,
|
|
// effectively terminating and cleaning-up the watches.
|
|
log.Info("Starting controller loop")
|
|
run(ctx)
|
|
},
|
|
OnStoppedLeading: func() {
|
|
log.Infof("%s released lease", hostname)
|
|
},
|
|
OnNewLeader: func(identity string) {
|
|
if identity == hostname {
|
|
log.Infof("%s acquired lease", hostname)
|
|
}
|
|
},
|
|
},
|
|
})
|
|
|
|
select {
|
|
// If the lease has been lost, and we have received a shutdown signal,
|
|
// break the loop and gracefully exit. We can guarantee at this point
|
|
// resources have been released.
|
|
case <-rootCtx.Done():
|
|
break election
|
|
// If the lease has been lost, loop and attempt to re-acquire it.
|
|
default:
|
|
|
|
}
|
|
}
|
|
log.Info("Shutting down")
|
|
}
|
|
|
|
// cleanupWorkers is a utility function that checks whether the worker pointers
|
|
// (clusterWatcher and probeWorker) are instantiated, and if they are, stops
|
|
// their execution and sets the pointers to a nil value so that memory may be
|
|
// garbage collected.
|
|
func cleanupWorkers() {
|
|
if clusterWatcher != nil {
|
|
// release, but do not clean-up services created
|
|
// the `unlink` command will take care of that
|
|
clusterWatcher.Stop(false)
|
|
clusterWatcher = nil
|
|
}
|
|
|
|
if probeWorker != nil {
|
|
probeWorker.Stop()
|
|
probeWorker = nil
|
|
}
|
|
}
|
|
|
|
func loadCredentials(ctx context.Context, link *v1alpha3.Link, namespace string, k8sAPI kubernetes.Interface) ([]byte, error) {
|
|
// Load the credentials secret
|
|
secret, err := k8sAPI.CoreV1().Secrets(namespace).Get(ctx, link.Spec.ClusterCredentialsSecret, metav1.GetOptions{})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to load credentials secret %s: %w", link.Spec.ClusterCredentialsSecret, err)
|
|
}
|
|
return sm.ParseRemoteClusterSecret(secret)
|
|
}
|
|
|
|
func restartClusterWatcher(
|
|
ctx context.Context,
|
|
link *v1alpha3.Link,
|
|
namespace,
|
|
probeSvc string,
|
|
creds []byte,
|
|
controllerK8sAPI *controllerK8s.API,
|
|
linkClient l5dcrdclient.Interface,
|
|
requeueLimit int,
|
|
repairPeriod time.Duration,
|
|
metrics servicemirror.ProbeMetricVecs,
|
|
enableHeadlessSvc bool,
|
|
enableNamespaceCreation bool,
|
|
) error {
|
|
|
|
cleanupWorkers()
|
|
|
|
workerMetrics, err := metrics.NewWorkerMetrics(link.Spec.TargetClusterName)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create metrics for cluster watcher: %w", err)
|
|
}
|
|
|
|
// If linked against a cluster that has a gateway, start a probe and
|
|
// initialise the liveness channel
|
|
var ch chan bool
|
|
if link.Spec.ProbeSpec.Path != "" {
|
|
probeWorker = servicemirror.NewProbeWorker(probeSvc, &link.Spec.ProbeSpec, workerMetrics, link.Spec.TargetClusterName)
|
|
probeWorker.Start()
|
|
ch = probeWorker.Liveness
|
|
}
|
|
|
|
// Start cluster watcher
|
|
cfg, err := clientcmd.RESTConfigFromKubeConfig(creds)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to parse kube config: %w", err)
|
|
}
|
|
remoteAPI, err := controllerK8s.InitializeAPIForConfig(ctx, cfg, false, link.Spec.TargetClusterName, controllerK8s.Svc, controllerK8s.Endpoint)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot initialize api for target cluster %s: %w", link.Spec.TargetClusterName, err)
|
|
}
|
|
cw, err := servicemirror.NewRemoteClusterServiceWatcher(
|
|
ctx,
|
|
namespace,
|
|
controllerK8sAPI,
|
|
remoteAPI,
|
|
probeSvc,
|
|
linkClient,
|
|
link,
|
|
requeueLimit,
|
|
repairPeriod,
|
|
ch,
|
|
enableHeadlessSvc,
|
|
enableNamespaceCreation,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to create cluster watcher: %w", err)
|
|
}
|
|
clusterWatcher = cw
|
|
err = clusterWatcher.Start(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to start cluster watcher: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func startLocalClusterWatcher(
|
|
ctx context.Context,
|
|
namespace string,
|
|
controllerK8sAPI *controllerK8s.API,
|
|
linkClient l5dcrdclient.Interface,
|
|
requeueLimit int,
|
|
repairPeriod time.Duration,
|
|
enableHeadlessSvc bool,
|
|
enableNamespaceCreation bool,
|
|
federatedServiceSelector string,
|
|
) error {
|
|
federatedLabelSelector, err := metav1.ParseToLabelSelector(federatedServiceSelector)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to parse federated service selector: %w", err)
|
|
}
|
|
|
|
link := v1alpha3.Link{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: "local",
|
|
Namespace: namespace,
|
|
},
|
|
Spec: v1alpha3.LinkSpec{
|
|
TargetClusterName: "",
|
|
Selector: nil,
|
|
RemoteDiscoverySelector: nil,
|
|
FederatedServiceSelector: federatedLabelSelector,
|
|
},
|
|
}
|
|
cw, err := servicemirror.NewRemoteClusterServiceWatcher(
|
|
ctx,
|
|
namespace,
|
|
controllerK8sAPI,
|
|
controllerK8sAPI,
|
|
"",
|
|
linkClient,
|
|
&link,
|
|
requeueLimit,
|
|
repairPeriod,
|
|
make(chan bool),
|
|
enableHeadlessSvc,
|
|
enableNamespaceCreation,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("unable to create cluster watcher: %w", err)
|
|
}
|
|
clusterWatcher = cw
|
|
err = clusterWatcher.Start(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to start cluster watcher: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|