Service Mirroring Component (#4028)

This PR introduces a service mirroring component that is responsible for watching remote clusters and mirroring their services locally.

Signed-off-by: Zahari Dichev <zaharidichev@gmail.com>
This commit is contained in:
Zahari Dichev 2020-03-02 21:16:08 +02:00 committed by GitHub
parent 71d6a00faa
commit edd7fd203d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 1556 additions and 54 deletions

View File

@ -5,6 +5,7 @@ set -e
setValues() {
sed -i "s/$1/$2/" charts/linkerd2/values.yaml
sed -i "s/$1/$2/" charts/linkerd2-cni/values.yaml
sed -i "s/$1/$2/" charts/linkerd2-service-mirror/values.yaml
}
showErr() {
@ -19,6 +20,7 @@ bindir=$( cd "${BASH_SOURCE[0]%/*}" && pwd )
rootdir=$( cd "$bindir"/.. && pwd )
"$bindir"/helm init --client-only
"$bindir"/helm lint "$rootdir"/charts/linkerd2-service-mirror
"$bindir"/helm lint "$rootdir"/charts/partials
"$bindir"/helm dep up "$rootdir"/charts/linkerd2-cni
"$bindir"/helm lint "$rootdir"/charts/linkerd2-cni
@ -48,6 +50,8 @@ if [ "$1" = package ]; then
"$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2
"$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-cni
# TODO: When ready to publish, uncomment
#"$bindir"/helm --version $version --app-version $tag -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-service-mirror
mv "$rootdir"/target/helm/index-pre.yaml "$rootdir"/target/helm/index-pre-"$version".yaml
"$bindir"/helm repo index --url "https://helm.linkerd.io/$repo/" --merge "$rootdir"/target/helm/index-pre-"$version".yaml "$rootdir"/target/helm

View File

@ -0,0 +1,22 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
OWNERS
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj

View File

@ -0,0 +1,8 @@
apiVersion: v1
appVersion: edge-XX.X.X
description: A helm chart containing the resources needed by the Linkerd Service Mirror component.
kubeVersion: ">=1.13.0-0"
icon: https://linkerd.io/images/logo-only-200h.png
name: "linkerd2-service-mirror"
version: 0.1.0

View File

@ -0,0 +1,21 @@
# Linkerd2-service-mirror Helm Chart
Linkerd is a *service mesh*, designed to give platform-wide observability,
reliability, and security without requiring configuration or code changes.
The Linkerd Service Mirror makes it possible to mirror services located
on remote clusters with the purpose of routing traffic to them.
## Configuration
The following table lists the configurable parameters of the linkerd2-service-mirror chart and their default values.
| Parameter | Description | Default |
|--------------------------------------|-----------------------------------------------------------------------------------|-------------------------------|
|`controllerImage` | Docker image for the Service mirror component (uses the Linkerd controller image) |`gcr.io/linkerd-io/controller`|
|`controllerImageVersion` | Tag for the Service Mirror container Docker image |latest version|
|`namespace` | Service Mirror component namespace |`linkerd-service-mirror`|
|`serviceMirrorUID` | User id under which the Service Mirror shall be ran |`2103`|
|`logLevel` | Log level for the Service Mirror component |`info`|
|`eventRequeueLimit` | Number of times update from the remote cluster is allowed to be requeued (retried)|`3`|

View File

@ -0,0 +1,63 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: linkerd-service-mirror
namespace: {{.Values.namespace}}
labels:
ControllerComponentLabel: service-mirror
rules:
- apiGroups: [""]
resources: ["endpoints", "services", "secrets", "namespaces"]
verbs: ["list", "get", "watch", "create", "delete", "update"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: linkerd-service-mirror
namespace: {{.Values.namespace}}
labels:
ControllerComponentLabel: service-mirror
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: linkerd-service-mirror
subjects:
- kind: ServiceAccount
name: linkerd-service-mirror
namespace: {{.Values.namespace}}
---
kind: ServiceAccount
apiVersion: v1
metadata:
name: linkerd-service-mirror
namespace: {{.Values.namespace}}
labels:
ControllerComponentLabel: service-mirror
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
ControllerComponentLabel: service-mirror
name: linkerd-service-mirror
namespace: {{.Values.namespace}}
spec:
replicas: 1
selector:
matchLabels:
ControllerComponentLabel: service-mirror
template:
metadata:
labels:
ControllerComponentLabel: service-mirror
spec:
containers:
- args:
- service-mirror
- -log-level={{.Values.logLevel}}
- -event-requeue-limit={{.Values.eventRequeueLimit}}
image: {{.Values.controllerImage}}:{{.Values.controllerImageVersion}}
name: service-mirror
securityContext:
runAsUser: {{.Values.serviceMirrorUID}}
serviceAccountName: linkerd-service-mirror

View File

@ -0,0 +1,6 @@
namespace: default
serviceMirrorUID: 2103
logLevel: info
eventRequeueLimit: 3
controllerImage: gcr.io/linkerd-io/controller
controllerImageVersion: {version}

View File

@ -0,0 +1,139 @@
package cmd
import (
"errors"
"fmt"
"io"
"os"
"github.com/linkerd/linkerd2/pkg/charts"
"github.com/linkerd/linkerd2/pkg/charts/servicemirror"
"github.com/linkerd/linkerd2/pkg/version"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"k8s.io/helm/pkg/chartutil"
"sigs.k8s.io/yaml"
)
type installServiceMirrorOptions struct {
namespace string
controlPlaneVersion string
dockerRegistry string
logLevel string
uid int64
requeueLimit int32
}
const helmServiceMirrorDefaultChartName = "linkerd2-service-mirror"
func newCmdInstallServiceMirror() *cobra.Command {
options, err := newInstallServiceMirrorOptionsWithDefaults()
if err != nil {
fmt.Fprint(os.Stderr, err)
os.Exit(1)
}
cmd := &cobra.Command{
Use: "install-service-mirror [flags]",
Short: "Output Kubernetes configs to install Linkerd Service Mirror",
Long: "Output Kubernetes configs to install Linkerd Service Mirror",
RunE: func(cmd *cobra.Command, args []string) error {
return renderServiceMirror(os.Stdout, options)
},
Hidden: true,
}
cmd.PersistentFlags().StringVarP(&options.controlPlaneVersion, "control-plane-version", "", options.controlPlaneVersion, "(Development) Tag to be used for the control plane component images")
cmd.PersistentFlags().StringVar(&options.dockerRegistry, "registry", options.dockerRegistry, "Docker registry to pull images from")
cmd.PersistentFlags().StringVarP(&options.logLevel, "log-level", "", options.logLevel, "Log level for the Service Mirror Component")
cmd.PersistentFlags().Int64Var(&options.uid, "uid", options.uid, "Run the Service Mirror component under this user ID")
cmd.PersistentFlags().Int32Var(&options.requeueLimit, "event-requeue-limit", options.requeueLimit, "The number of times a failed update from the remote cluster is allowed to be requeued (retried)")
cmd.PersistentFlags().StringVarP(&options.namespace, "namespace", "", options.namespace, "The namespace in which the Service Mirror Component is to be installed")
return cmd
}
func newInstallServiceMirrorOptionsWithDefaults() (*installServiceMirrorOptions, error) {
defaults, err := servicemirror.NewValues()
if err != nil {
return nil, err
}
return &installServiceMirrorOptions{
namespace: defaults.Namespace,
controlPlaneVersion: version.Version,
dockerRegistry: defaultDockerRegistry,
logLevel: defaults.LogLevel,
uid: defaults.ServiceMirrorUID,
requeueLimit: defaults.EventRequeueLimit,
}, nil
}
func (options *installServiceMirrorOptions) buildValues() (*servicemirror.Values, error) {
installValues, err := servicemirror.NewValues()
if err != nil {
return nil, err
}
installValues.Namespace = options.namespace
installValues.LogLevel = options.logLevel
installValues.ControllerImageVersion = options.controlPlaneVersion
installValues.ControllerImage = fmt.Sprintf("%s/controller", options.dockerRegistry)
installValues.ServiceMirrorUID = options.uid
installValues.EventRequeueLimit = options.requeueLimit
return installValues, nil
}
func (options *installServiceMirrorOptions) validate() error {
if !alphaNumDashDot.MatchString(options.controlPlaneVersion) {
return fmt.Errorf("%s is not a valid version", options.controlPlaneVersion)
}
if options.namespace == "" {
return errors.New("you need to specify a namespace")
}
if _, err := log.ParseLevel(options.logLevel); err != nil {
return fmt.Errorf("--log-level must be one of: panic, fatal, error, warn, info, debug")
}
return nil
}
func renderServiceMirror(w io.Writer, config *installServiceMirrorOptions) error {
if err := config.validate(); err != nil {
return err
}
values, err := config.buildValues()
if err != nil {
return err
}
// Render raw values and create chart config
rawValues, err := yaml.Marshal(values)
if err != nil {
return err
}
files := []*chartutil.BufferedFile{
{Name: chartutil.ChartfileName},
{Name: "templates/service-mirror.yaml"},
}
chart := &charts.Chart{
Name: helmServiceMirrorDefaultChartName,
Dir: helmServiceMirrorDefaultChartName,
Namespace: controlPlaneNamespace,
RawValues: rawValues,
Files: files,
}
buf, err := chart.RenderServiceMirror()
if err != nil {
return err
}
w.Write(buf.Bytes())
w.Write([]byte("---\n"))
return nil
}

View File

@ -123,6 +123,7 @@ func init() {
RootCmd.AddCommand(newCmdUninject())
RootCmd.AddCommand(newCmdUpgrade())
RootCmd.AddCommand(newCmdVersion())
RootCmd.AddCommand(newCmdInstallServiceMirror())
}
type statOptionsBase struct {

View File

@ -99,7 +99,7 @@ spec:
}
log := logging.WithField("test", t.Name())
k8sAPI.Sync()
k8sAPI.Sync(nil)
endpoints := watcher.NewEndpointsWatcher(k8sAPI, log)
profiles := watcher.NewProfileWatcher(k8sAPI, log)

View File

@ -472,7 +472,7 @@ status:
watcher := NewEndpointsWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newBufferingEndpointListener()
@ -593,7 +593,7 @@ status:
watcher := NewEndpointsWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newBufferingEndpointListener()

View File

@ -434,7 +434,7 @@ status:
endpoints := NewEndpointsWatcher(k8sAPI, logging.WithField("test", t.Name()))
watcher := NewIPWatcher(k8sAPI, endpoints, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newBufferingEndpointListener()
@ -572,7 +572,7 @@ status:
endpoints := NewEndpointsWatcher(k8sAPI, logging.WithField("test", t.Name()))
watcher := NewIPWatcher(k8sAPI, endpoints, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newBufferingEndpointListener()

View File

@ -86,7 +86,7 @@ func TestProfileWatcherUpdates(t *testing.T) {
watcher := NewProfileWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := NewBufferingProfileListener()
@ -135,7 +135,7 @@ func TestProfileWatcherDeletes(t *testing.T) {
}
watcher := NewProfileWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := NewDeletingProfileListener()

View File

@ -116,7 +116,7 @@ func TestTrafficSplitWatcher(t *testing.T) {
watcher := NewTrafficSplitWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newBufferingTrafficSplitListener()
@ -172,7 +172,7 @@ func TestTrafficSplitWatcherDelete(t *testing.T) {
watcher := NewTrafficSplitWatcher(k8sAPI, logging.WithField("test", t.Name()))
k8sAPI.Sync()
k8sAPI.Sync(nil)
listener := newDeletingTrafficSplitListener()

View File

@ -405,7 +405,7 @@ status:
[]string{},
)
k8sAPI.Sync()
k8sAPI.Sync(nil)
rsp, err := fakeGrpcServer.ListPods(context.TODO(), exp.req)
if !reflect.DeepEqual(err, exp.err) {
@ -507,7 +507,7 @@ metadata:
[]string{},
)
k8sAPI.Sync()
k8sAPI.Sync(nil)
rsp, err := fakeGrpcServer.ListServices(context.TODO(), &pb.ListServicesRequest{})
if err != exp.err {
@ -540,7 +540,7 @@ func TestConfig(t *testing.T) {
fakeGrpcServer.mountPathProxyConfig = "testdata/proxy.conf.json"
fakeGrpcServer.mountPathInstallConfig = "testdata/install.conf.json"
k8sAPI.Sync()
k8sAPI.Sync(nil)
rsp, err := fakeGrpcServer.Config(context.Background(), &pb.Empty{})
if err != nil {

View File

@ -571,7 +571,7 @@ func newMockGrpcServer(exp expectedStatRPC) (*MockProm, *grpcServer, error) {
[]string{},
)
k8sAPI.Sync()
k8sAPI.Sync(nil)
return mockProm, fakeGrpcServer, nil
}

View File

@ -36,7 +36,7 @@ func Main(args []string) {
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
k8sAPI, err := k8s.InitializeAPI(
*kubeConfigPath,
*kubeConfigPath, true,
k8s.Endpoint, k8s.Pod, k8s.RS, k8s.Svc, k8s.SP, k8s.TS, k8s.Job,
)
if err != nil {
@ -85,7 +85,7 @@ func Main(args []string) {
done,
)
k8sAPI.Sync() // blocks until caches are synced
k8sAPI.Sync(nil) // blocks until caches are synced
go func() {
log.Infof("starting gRPC server on %s", *addr)

View File

@ -4,6 +4,8 @@ import (
"fmt"
"os"
servicemirror "github.com/linkerd/linkerd2/controller/cmd/service-mirror"
"github.com/linkerd/linkerd2/controller/cmd/destination"
"github.com/linkerd/linkerd2/controller/cmd/heartbeat"
"github.com/linkerd/linkerd2/controller/cmd/identity"
@ -34,6 +36,8 @@ func main() {
spvalidator.Main(os.Args[2:])
case "tap":
tap.Main(os.Args[2:])
case "service-mirror":
servicemirror.Main(os.Args[2:])
default:
fmt.Printf("unknown subcommand: %s", os.Args[1])
os.Exit(1)

View File

@ -46,7 +46,7 @@ func Main(args []string) {
defer destinationConn.Close()
k8sAPI, err := k8s.InitializeAPI(
*kubeConfigPath,
*kubeConfigPath, true,
k8s.CJ, k8s.DS, k8s.Deploy, k8s.Job, k8s.NS, k8s.Pod, k8s.RC, k8s.RS, k8s.Svc, k8s.SS, k8s.SP, k8s.TS,
)
if err != nil {
@ -84,7 +84,7 @@ func Main(args []string) {
strings.Split(*ignoredNamespaces, ","),
)
k8sAPI.Sync() // blocks until caches are synced
k8sAPI.Sync(nil) // blocks until caches are synced
go func() {
log.Infof("starting HTTP server on %+v", *addr)

View File

@ -0,0 +1,877 @@
package servicemirror
import (
"errors"
"fmt"
"strings"
"github.com/linkerd/linkerd2/controller/k8s"
consts "github.com/linkerd/linkerd2/pkg/k8s"
logging "github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
)
type (
// RemoteClusterServiceWatcher is a watcher instantiated for every cluster that is being watched
// Its main job is to listen to events coming from the remote cluster and react accordingly, keeping
// the state of the mirrored services in sync. This is achieved by maintaining a SharedInformer
// on the remote cluster. The basic add/update/delete operations are mapped to a more domain specific
// events, put onto a work queue and handled by the processing loop. In case processing an event fails
// it can be requeued up to N times, to ensure that the failure is not due to some temporary network
// problems or general glitch in the Matrix.
RemoteClusterServiceWatcher struct {
clusterName string
clusterDomain string
remoteAPIClient *k8s.API
localAPIClient *k8s.API
stopper chan struct{}
log *logging.Entry
eventsQueue workqueue.RateLimitingInterface
requeueLimit int
}
// RemoteServiceCreated is generated whenever a remote service is created Observing
// this event means that the service in question is not mirrored atm
RemoteServiceCreated struct {
service *corev1.Service
gatewayData *gatewayMetadata
newResourceVersion string
}
// RemoteServiceUpdated is generated when we see something about an already
// mirrored service change on the remote cluster. In that case we need to
// reconcile. Most importantly we need to keep track of exposed ports
// and gateway association changes.
RemoteServiceUpdated struct {
localService *corev1.Service
localEndpoints *corev1.Endpoints
remoteUpdate *corev1.Service
gatewayData *gatewayMetadata
}
// RemoteServiceDeleted when a remote service is going away or it is not
// considered mirrored anymore
RemoteServiceDeleted struct {
Name string
Namespace string
}
// RemoteGatewayDeleted is observed when a service that is a gateway to at least
// one already mirrored service is deleted
RemoteGatewayDeleted struct {
gatewayData *gatewayMetadata
affectedEndpoints []*corev1.Endpoints
}
// RemoteGatewayUpdated happens when a service that is a gateway to at least
// one already mirrored service is updated. This might mean an IP change,
// incoming port change, etc...
RemoteGatewayUpdated struct {
newPort int32
newEndpointAddresses []corev1.EndpointAddress
gatewayData *gatewayMetadata
newResourceVersion string
affectedServices []*corev1.Service
}
// ConsiderGatewayUpdateDispatch is issued when we are receiving an update for a
// service but we are not sure that this is a gateway. We need to hit the local
// API and make sure there are services that have this service as a gateway. Since
// this is an operation that might fail (a glitch in the local api connectivity)
// we want to represent that as a separate event that can be requeued
ConsiderGatewayUpdateDispatch struct {
maybeGateway *corev1.Service
}
// ClusterUnregistered is issued when the secret containing the remote cluster
// access information is deleted
ClusterUnregistered struct{}
// OprhanedServicesGcTriggered is a self-triggered event which aims to delete any
// orphaned services that are no longer on the remote cluster. It is emitted every
// time a new remote cluster is registered for monitoring. The need for this arises
// because the following might happen.
//
// 1. A cluster is registered for monitoring
// 2. Services A,B,C are created and mirrored
// 3. Then this component crashes, leaving the mirrors around
// 4. In the meantime services B and C are deleted on the remote cluster
// 5. When the controller starts up again it registers to listen for mirrored services
// 6. It receives an ADD for A but not a DELETE for B and C
//
// This event indicates that we need to make a diff with all services on the remote
// cluster, ensuring that we do not keep any mirrors that are not relevant anymore
OprhanedServicesGcTriggered struct{}
// OnAddCalled is issued when the onAdd function of the
// shared informer is called
OnAddCalled struct {
svc *corev1.Service
}
// OnUpdateCalled is issued when the onUpdate function of the
// shared informer is called
OnUpdateCalled struct {
svc *corev1.Service
}
// OnDeleteCalled is issued when the onDelete function of the
// shared informer is called
OnDeleteCalled struct {
svc *corev1.Service
}
gatewayMetadata struct {
Name string
Namespace string
}
// RetryableError is an error that should be retried through requeuing events
RetryableError struct{ Inner []error }
)
func (re RetryableError) Error() string {
var errorStrings []string
for _, err := range re.Inner {
errorStrings = append(errorStrings, err.Error())
}
return fmt.Sprintf("Inner errors:\n\t%s", strings.Join(errorStrings, "\n\t"))
}
func (rcsw *RemoteClusterServiceWatcher) extractGatewayInfo(gateway *corev1.Service) ([]corev1.EndpointAddress, int32, string, error) {
if len(gateway.Status.LoadBalancer.Ingress) == 0 {
return nil, 0, "", errors.New("expected gateway to have at lest 1 external Ip address but it has none")
}
var foundPort = false
var port int32
for _, p := range gateway.Spec.Ports {
if p.Name == consts.GatewayPortName {
foundPort = true
port = p.Port
break
}
}
if !foundPort {
return nil, 0, "", fmt.Errorf("cannot find port named %s on gateway", consts.GatewayPortName)
}
var gatewayEndpoints []corev1.EndpointAddress
for _, ingress := range gateway.Status.LoadBalancer.Ingress {
gatewayEndpoints = append(gatewayEndpoints, corev1.EndpointAddress{
IP: ingress.IP,
Hostname: ingress.Hostname,
})
}
return gatewayEndpoints, port, gateway.ResourceVersion, nil
}
// When the gateway is resolved we need to produce a set of endpoint addresses that that
// contain the external IPs that this gateway exposes. Therefore we return the IP addresses
// as well as a single port on which the gateway is accessible.
func (rcsw *RemoteClusterServiceWatcher) resolveGateway(metadata *gatewayMetadata) ([]corev1.EndpointAddress, int32, string, error) {
gateway, err := rcsw.remoteAPIClient.Svc().Lister().Services(metadata.Namespace).Get(metadata.Name)
if err != nil {
return nil, 0, "", err
}
return rcsw.extractGatewayInfo(gateway)
}
// NewRemoteClusterServiceWatcher constructs a new cluster watcher
func NewRemoteClusterServiceWatcher(localAPI *k8s.API, cfg *rest.Config, clusterName string, requeueLimit int, clusterDomain string) (*RemoteClusterServiceWatcher, error) {
remoteAPI, err := k8s.InitializeAPIForConfig(cfg, false, k8s.Svc)
if err != nil {
return nil, fmt.Errorf("cannot initialize remote api for cluster %s: %s", clusterName, err)
}
stopper := make(chan struct{})
return &RemoteClusterServiceWatcher{
clusterName: clusterName,
clusterDomain: clusterDomain,
remoteAPIClient: remoteAPI,
localAPIClient: localAPI,
stopper: stopper,
log: logging.WithFields(logging.Fields{
"cluster": clusterName,
"apiAddress": cfg.Host,
}),
eventsQueue: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
requeueLimit: requeueLimit,
}, nil
}
func (rcsw *RemoteClusterServiceWatcher) mirroredResourceName(remoteName string) string {
return fmt.Sprintf("%s-%s", remoteName, rcsw.clusterName)
}
func (rcsw *RemoteClusterServiceWatcher) originalResourceName(mirroredName string) string {
return strings.TrimSuffix(mirroredName, fmt.Sprintf("-%s", rcsw.clusterName))
}
func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceLabels(gatewayData *gatewayMetadata) map[string]string {
return map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteClusterNameLabel: rcsw.clusterName,
consts.RemoteGatewayNameLabel: gatewayData.Name,
consts.RemoteGatewayNsLabel: gatewayData.Namespace,
}
}
func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceAnnotations(remoteService *corev1.Service) map[string]string {
return map[string]string{
consts.RemoteResourceVersionAnnotation: remoteService.ResourceVersion, // needed to detect real changes
consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.clusterDomain),
}
}
func (rcsw *RemoteClusterServiceWatcher) mirrorNamespaceIfNecessary(namespace string) error {
// if the namespace is already present we do not need to change it.
// if we are creating it we want to put a label indicating this is a
// mirrored resource
if _, err := rcsw.localAPIClient.NS().Lister().Get(namespace); err != nil {
if kerrors.IsNotFound(err) {
// if the namespace is not found, we can just create it
ns := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteClusterNameLabel: rcsw.clusterName,
},
Name: namespace,
},
}
_, err := rcsw.localAPIClient.Client.CoreV1().Namespaces().Create(ns)
if err != nil {
// something went wrong with the create, we can just retry as well
return RetryableError{[]error{err}}
}
} else {
// something else went wrong, so we can just retry
return RetryableError{[]error{err}}
}
}
return nil
}
// This method takes care of port remapping. What it does essentially is get the one gateway port
// that we should send traffic to and create endpoint ports that bind to the mirrored service ports
// (same name, etc) but send traffic to the gateway port. This way we do not need to do any remapping
// on the service side of things. It all happens in the endpoints.
func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Service, gatewayPort int32) []corev1.EndpointPort {
var endpointsPorts []corev1.EndpointPort
for _, remotePort := range service.Spec.Ports {
endpointsPorts = append(endpointsPorts, corev1.EndpointPort{
Name: remotePort.Name,
Protocol: remotePort.Protocol,
Port: gatewayPort,
})
}
return endpointsPorts
}
func (rcsw *RemoteClusterServiceWatcher) cleanupOrphanedServices() error {
matchLabels := map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteClusterNameLabel: rcsw.clusterName,
}
servicesOnLocalCluster, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector())
if err != nil {
innerErr := fmt.Errorf("failed obtaining local services while GC-ing: %s", err)
if kerrors.IsNotFound(err) {
return innerErr
}
// if it is something else, we can just retry
return RetryableError{[]error{innerErr}}
}
var errors []error
for _, srv := range servicesOnLocalCluster {
_, err := rcsw.remoteAPIClient.Svc().Lister().Services(srv.Namespace).Get(rcsw.originalResourceName(srv.Name))
if err != nil {
if kerrors.IsNotFound(err) {
// service does not exist anymore. Need to delete
if err := rcsw.localAPIClient.Client.CoreV1().Services(srv.Namespace).Delete(srv.Name, &metav1.DeleteOptions{}); err != nil {
// something went wrong with deletion, we need to retry
errors = append(errors, err)
} else {
rcsw.log.Debugf("Deleted service %s/%s as part of GC process", srv.Namespace, srv.Name)
}
} else {
// something went wrong getting the service, we can retry
errors = append(errors, err)
}
}
}
if len(errors) > 0 {
return RetryableError{errors}
}
return nil
}
// Whenever we stop watching a cluster, we need to cleanup everything that we have
// created. This piece of code is responsible for doing just that. It takes care of
// services, endpoints and namespaces (if needed)
func (rcsw *RemoteClusterServiceWatcher) cleanupMirroredResources() error {
matchLabels := map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteClusterNameLabel: rcsw.clusterName,
}
services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector())
if err != nil {
innerErr := fmt.Errorf("could not retrieve mirrored services that need cleaning up: %s", err)
if kerrors.IsNotFound(err) {
return innerErr
}
// if its not notFound then something else went wrong, so we can retry
return RetryableError{[]error{innerErr}}
}
var errors []error
for _, svc := range services {
if err := rcsw.localAPIClient.Client.CoreV1().Services(svc.Namespace).Delete(svc.Name, &metav1.DeleteOptions{}); err != nil {
if kerrors.IsNotFound(err) {
continue
}
errors = append(errors, fmt.Errorf("Could not delete service %s/%s: %s", svc.Namespace, svc.Name, err))
} else {
rcsw.log.Debugf("Deleted service %s/%s", svc.Namespace, svc.Name)
}
}
endpoints, err := rcsw.localAPIClient.Endpoint().Lister().List(labels.Set(matchLabels).AsSelector())
if err != nil {
innerErr := fmt.Errorf("could not retrieve Endpoints that need cleaning up: %s", err)
if kerrors.IsNotFound(err) {
return innerErr
}
return RetryableError{[]error{innerErr}}
}
for _, endpt := range endpoints {
if err := rcsw.localAPIClient.Client.CoreV1().Endpoints(endpt.Namespace).Delete(endpt.Name, &metav1.DeleteOptions{}); err != nil {
if kerrors.IsNotFound(err) {
continue
}
errors = append(errors, fmt.Errorf("Could not delete Endpoints %s/%s: %s", endpt.Namespace, endpt.Name, err))
} else {
rcsw.log.Debugf("Deleted Endpoints %s/%s", endpt.Namespace, endpt.Name)
}
}
if len(errors) > 0 {
return RetryableError{errors}
}
return nil
}
// Deletes a locally mirrored service as it is not present on the remote cluster anymore
func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteServiceDeleted) error {
localServiceName := rcsw.mirroredResourceName(ev.Name)
rcsw.log.Debugf("Deleting mirrored service %s/%s and its corresponding Endpoints", ev.Namespace, localServiceName)
if err := rcsw.localAPIClient.Client.CoreV1().Services(ev.Namespace).Delete(localServiceName, &metav1.DeleteOptions{}); err != nil {
if kerrors.IsNotFound(err) {
return nil
}
// we can try deleting it again
return RetryableError{[]error{fmt.Errorf("could not delete Service: %s/%s: %s", ev.Namespace, localServiceName, err)}}
}
rcsw.log.Debugf("Successfully deleted Service: %s/%s", ev.Namespace, localServiceName)
return nil
}
// Updates a locally mirrored service. There might have been some pretty fundamental changes such as
// new gateway being assigned or additional ports exposed. This method takes care of that.
func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceUpdated(ev *RemoteServiceUpdated) error {
serviceInfo := fmt.Sprintf("%s/%s", ev.remoteUpdate.Namespace, ev.remoteUpdate.Name)
rcsw.log.Debugf("Updating remote mirrored service %s/%s", ev.localService.Namespace, ev.localService.Name)
gatewayEndpoints, gatewayPort, resVersion, err := rcsw.resolveGateway(ev.gatewayData)
if err == nil {
ev.localEndpoints.Subsets = []corev1.EndpointSubset{
{
Addresses: gatewayEndpoints,
Ports: rcsw.getEndpointsPorts(ev.remoteUpdate, gatewayPort),
},
}
} else {
rcsw.log.Warnf("Could not resolve gateway for %s: %s, nulling endpoints", serviceInfo, err)
ev.localEndpoints.Subsets = nil
}
if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ev.localEndpoints.Namespace).Update(ev.localEndpoints); err != nil {
return RetryableError{[]error{err}}
}
ev.localService.Labels = rcsw.getMirroredServiceLabels(ev.gatewayData)
ev.localService.Annotations = rcsw.getMirroredServiceAnnotations(ev.remoteUpdate)
ev.localService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = resVersion
ev.localService.Spec.Ports = remapRemoteServicePorts(ev.remoteUpdate.Spec.Ports)
if _, err := rcsw.localAPIClient.Client.CoreV1().Services(ev.localService.Namespace).Update(ev.localService); err != nil {
return RetryableError{[]error{err}}
}
return nil
}
func remapRemoteServicePorts(ports []corev1.ServicePort) []corev1.ServicePort {
// We ignore the NodePort here as its not relevant
// to the local cluster
var newPorts []corev1.ServicePort
for _, port := range ports {
newPorts = append(newPorts, corev1.ServicePort{
Name: port.Name,
Protocol: port.Protocol,
Port: port.Port,
TargetPort: port.TargetPort,
})
}
return newPorts
}
func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteServiceCreated) error {
remoteService := ev.service.DeepCopy()
serviceInfo := fmt.Sprintf("%s/%s", remoteService.Namespace, remoteService.Name)
localServiceName := rcsw.mirroredResourceName(remoteService.Name)
if err := rcsw.mirrorNamespaceIfNecessary(remoteService.Namespace); err != nil {
return err
}
// here we always create both a service and endpoints, even if we cannot resolve the gateway
serviceToCreate := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: localServiceName,
Namespace: remoteService.Namespace,
Annotations: rcsw.getMirroredServiceAnnotations(remoteService),
Labels: rcsw.getMirroredServiceLabels(ev.gatewayData),
},
Spec: corev1.ServiceSpec{
Ports: remapRemoteServicePorts(remoteService.Spec.Ports),
},
}
endpointsToCreate := &corev1.Endpoints{
ObjectMeta: metav1.ObjectMeta{
Name: localServiceName,
Namespace: ev.service.Namespace,
Labels: map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteClusterNameLabel: rcsw.clusterName,
consts.RemoteGatewayNameLabel: ev.gatewayData.Name,
consts.RemoteGatewayNsLabel: ev.gatewayData.Namespace,
},
},
}
// Now we try to resolve the remote gateway
gatewayEndpoints, gatewayPort, resVersion, err := rcsw.resolveGateway(ev.gatewayData)
if err == nil {
// only if we resolve it, we are updating the endpoints addresses and ports
rcsw.log.Debugf("Resolved remote gateway [%v:%d] for %s", gatewayEndpoints, gatewayPort, serviceInfo)
endpointsToCreate.Subsets = []corev1.EndpointSubset{
{
Addresses: gatewayEndpoints,
Ports: rcsw.getEndpointsPorts(ev.service, gatewayPort),
},
}
serviceToCreate.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = resVersion
} else {
rcsw.log.Warnf("Could not resolve gateway for %s: %s, skipping subsets", serviceInfo, err)
endpointsToCreate.Subsets = nil
}
rcsw.log.Debugf("Creating a new service mirror for %s", serviceInfo)
if _, err := rcsw.localAPIClient.Client.CoreV1().Services(remoteService.Namespace).Create(serviceToCreate); err != nil {
if !kerrors.IsAlreadyExists(err) {
// we might have created it during earlier attempt, if that is not the case, we retry
return RetryableError{[]error{err}}
}
}
rcsw.log.Debugf("Creating a new Endpoints for %s", serviceInfo)
if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ev.service.Namespace).Create(endpointsToCreate); err != nil {
// we clean up after ourselves
rcsw.localAPIClient.Client.CoreV1().Services(ev.service.Namespace).Delete(localServiceName, &metav1.DeleteOptions{})
// and retry
return RetryableError{[]error{err}}
}
return nil
}
func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayDeleted(ev *RemoteGatewayDeleted) error {
affectedEndpoints, err := rcsw.endpointsForGateway(ev.gatewayData)
if err != nil {
// if we cannot find the endpoints, we can give up
if kerrors.IsNotFound(err) {
return err
}
// if it is another error, just retry
return RetryableError{[]error{err}}
}
var errors []error
if len(affectedEndpoints) > 0 {
rcsw.log.Debugf("Nulling %d endpoints due to remote gateway [%s/%s] deletion", len(affectedEndpoints), ev.gatewayData.Namespace, ev.gatewayData.Name)
for _, ep := range affectedEndpoints {
updated := ep.DeepCopy()
updated.Subsets = nil
if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Update(updated); err != nil {
errors = append(errors, err)
}
}
}
if len(errors) > 0 {
// if we have encountered any errors, we can retry the whole operation
return RetryableError{errors}
}
return nil
}
func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayUpdated(ev *RemoteGatewayUpdated) error {
rcsw.log.Debugf("Updating %d services due to remote gateway [%s/%s] update", len(ev.affectedServices), ev.gatewayData.Namespace, ev.gatewayData.Name)
var errors []error
for _, svc := range ev.affectedServices {
updatedService := svc.DeepCopy()
if updatedService.Labels != nil {
updatedService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = ev.newResourceVersion
}
endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(svc.Namespace).Get(svc.Name)
if err != nil {
errors = append(errors, fmt.Errorf("Could not get endpoints: %s", err))
continue
}
updatedEndpoints := endpoints.DeepCopy()
updatedEndpoints.Subsets = []corev1.EndpointSubset{
{
Addresses: ev.newEndpointAddresses,
Ports: rcsw.getEndpointsPorts(updatedService, ev.newPort),
},
}
_, err = rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Update(updatedService)
if err != nil {
errors = append(errors, err)
continue
}
_, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(updatedService.Namespace).Update(updatedEndpoints)
if err != nil {
rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Delete(updatedService.Name, &metav1.DeleteOptions{})
errors = append(errors, err)
}
}
if len(errors) > 0 {
return RetryableError{errors}
}
return nil
}
// Retrieves the annotations that indicate this service can be mirrored.
// The values of these annotations help us resolve the gateway to which
// traffic should be sent.
func getGatewayMetadata(annotations map[string]string) *gatewayMetadata {
remoteGatewayName, hasGtwName := annotations[consts.GatewayNameAnnotation]
remoteGatewayNs, hasGtwNs := annotations[consts.GatewayNsAnnotation]
if hasGtwName && hasGtwNs {
return &gatewayMetadata{
Name: remoteGatewayName,
Namespace: remoteGatewayNs,
}
}
return nil
}
func (rcsw *RemoteClusterServiceWatcher) handleConsiderGatewayUpdateDispatch(event *ConsiderGatewayUpdateDispatch) error {
gtwMetadata := &gatewayMetadata{
Name: event.maybeGateway.Name,
Namespace: event.maybeGateway.Namespace,
}
services, err := rcsw.mirroredServicesForGateway(gtwMetadata)
if err != nil {
// we can fail and requeue here in case there is a problem obtaining these...
if kerrors.IsNotFound(err) {
return err
}
return RetryableError{[]error{err}}
}
if len(services) > 0 {
gatewayMeta := &gatewayMetadata{
Name: event.maybeGateway.Name,
Namespace: event.maybeGateway.Namespace,
}
if endpoints, port, resVersion, err := rcsw.extractGatewayInfo(event.maybeGateway); err != nil {
rcsw.log.Warnf("Gateway [%s/%s] is not a compliant gateway anymore, dispatching GatewayDeleted event: %s", event.maybeGateway.Namespace, event.maybeGateway.Name, err)
// in case something changed about this gateway and it is not really a gateway anymore,
// simply dispatch deletion event so all endpoints are nulled
endpoints, err := rcsw.endpointsForGateway(gatewayMeta)
if err != nil {
return RetryableError{[]error{err}}
}
rcsw.eventsQueue.AddRateLimited(&RemoteGatewayDeleted{gatewayMeta, endpoints})
} else {
affectedServices, err := rcsw.affectedMirroredServicesForGatewayUpdate(gtwMetadata, event.maybeGateway.ResourceVersion)
if err != nil {
if kerrors.IsNotFound(err) {
return err
}
return RetryableError{[]error{err}}
}
if len(affectedServices) > 0 {
rcsw.eventsQueue.Add(&RemoteGatewayUpdated{
newPort: port,
newEndpointAddresses: endpoints,
gatewayData: gatewayMeta,
newResourceVersion: resVersion,
affectedServices: affectedServices,
})
}
}
}
return nil
}
// this method is common to both CREATE and UPDATE because if we have been
// offline for some time due to a crash a CREATE for a service that we have
// observed before is simply a case of UPDATE
func (rcsw *RemoteClusterServiceWatcher) createOrUpdateService(service *corev1.Service) error {
localName := rcsw.mirroredResourceName(service.Name)
localService, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName)
gtwData := getGatewayMetadata(service.Annotations)
if err != nil {
if kerrors.IsNotFound(err) {
if gtwData != nil {
// at this point we know that this is a service that
// we are not mirroring but has gateway data, so we need
// to create it
rcsw.eventsQueue.Add(&RemoteServiceCreated{
service: service,
gatewayData: gtwData,
newResourceVersion: service.ResourceVersion,
})
} else {
// at this point we know that we do not have such a service
// and the remote service does not have metadata. So we try to
// dispatch a gateway update as the remote service might be a
/// gateway for some of our already mirrored services
rcsw.eventsQueue.Add(&ConsiderGatewayUpdateDispatch{maybeGateway: service})
}
} else {
// we can retry the operation here
return RetryableError{[]error{err}}
}
} else {
if gtwData != nil {
// at this point we know this is an update to a service that we already
// have locally, so we try and see whether the res version has changed
// and if so, dispatch an RemoteServiceUpdated event
lastMirroredRemoteVersion, ok := localService.Annotations[consts.RemoteResourceVersionAnnotation]
if ok && lastMirroredRemoteVersion != service.ResourceVersion {
endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(service.Namespace).Get(localName)
if err == nil {
rcsw.eventsQueue.Add(&RemoteServiceUpdated{
localService: localService,
localEndpoints: endpoints,
remoteUpdate: service,
gatewayData: gtwData,
})
} else {
return RetryableError{[]error{err}}
}
}
} else {
// if this is missing gateway metadata, but we have the
// service we can dispatch a RemoteServiceDeleted event
// because at some point in time we mirrored this service,
// however it is not mirrorable anymore
rcsw.eventsQueue.Add(&RemoteServiceDeleted{
Name: service.Name,
Namespace: service.Namespace,
})
}
}
return nil
}
func (rcsw *RemoteClusterServiceWatcher) affectedMirroredServicesForGatewayUpdate(gatewayData *gatewayMetadata, latestResourceVersion string) ([]*corev1.Service, error) {
services, err := rcsw.mirroredServicesForGateway(gatewayData)
if err != nil {
return nil, err
}
affectedServices := []*corev1.Service{}
for _, srv := range services {
ver, ok := srv.Annotations[consts.RemoteGatewayResourceVersionAnnotation]
if ok && ver != latestResourceVersion {
affectedServices = append(affectedServices, srv)
}
}
return affectedServices, nil
}
func (rcsw *RemoteClusterServiceWatcher) mirroredServicesForGateway(gatewayData *gatewayMetadata) ([]*corev1.Service, error) {
matchLabels := map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteGatewayNameLabel: gatewayData.Name,
consts.RemoteGatewayNsLabel: gatewayData.Namespace,
}
services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector())
if err != nil {
return nil, err
}
return services, nil
}
func (rcsw *RemoteClusterServiceWatcher) endpointsForGateway(gatewayData *gatewayMetadata) ([]*corev1.Endpoints, error) {
matchLabels := map[string]string{
consts.MirroredResourceLabel: "true",
consts.RemoteGatewayNameLabel: gatewayData.Name,
consts.RemoteGatewayNsLabel: gatewayData.Namespace,
}
endpoints, err := rcsw.localAPIClient.Endpoint().Lister().List(labels.Set(matchLabels).AsSelector())
if err != nil {
return nil, err
}
return endpoints, nil
}
func (rcsw *RemoteClusterServiceWatcher) handleOnDelete(service *corev1.Service) {
if gtwData := getGatewayMetadata(service.Annotations); gtwData != nil {
rcsw.eventsQueue.Add(&RemoteServiceDeleted{
Name: service.Name,
Namespace: service.Namespace,
})
} else {
rcsw.eventsQueue.Add(&RemoteGatewayDeleted{
gatewayData: &gatewayMetadata{
Name: service.Name,
Namespace: service.Namespace,
}})
}
}
// the main processing loop in which we handle more domain specific events
// and deal with retries
func (rcsw *RemoteClusterServiceWatcher) processEvents() {
for {
event, done := rcsw.eventsQueue.Get()
var err error
switch ev := event.(type) {
case *OnAddCalled:
err = rcsw.createOrUpdateService(ev.svc)
case *OnUpdateCalled:
err = rcsw.createOrUpdateService(ev.svc)
case *OnDeleteCalled:
rcsw.handleOnDelete(ev.svc)
case *RemoteServiceCreated:
err = rcsw.handleRemoteServiceCreated(ev)
case *RemoteServiceUpdated:
err = rcsw.handleRemoteServiceUpdated(ev)
case *RemoteServiceDeleted:
err = rcsw.handleRemoteServiceDeleted(ev)
case *RemoteGatewayUpdated:
err = rcsw.handleRemoteGatewayUpdated(ev)
case *RemoteGatewayDeleted:
err = rcsw.handleRemoteGatewayDeleted(ev)
case *ConsiderGatewayUpdateDispatch:
err = rcsw.handleConsiderGatewayUpdateDispatch(ev)
case *ClusterUnregistered:
err = rcsw.cleanupMirroredResources()
case *OprhanedServicesGcTriggered:
err = rcsw.cleanupOrphanedServices()
default:
if ev != nil || !done { // we get a nil in case we are shutting down...
rcsw.log.Warnf("Received unknown event: %v", ev)
}
}
// the logic here is that there might have been an API
// connectivity glitch or something. So its not a bad idea to requeue
// the event and try again up to a number of limits, just to ensure
// that we are not diverging in states due to bad luck...
if err == nil {
rcsw.eventsQueue.Forget(event)
} else {
switch e := err.(type) {
case RetryableError:
{
if (rcsw.eventsQueue.NumRequeues(event) < rcsw.requeueLimit) && !done {
rcsw.log.Errorf("Error processing %s (will retry): %s", event, e)
rcsw.eventsQueue.Add(event)
} else {
rcsw.log.Errorf("Error processing %s (giving up): %s", event, e)
rcsw.eventsQueue.Forget(event)
}
}
default:
rcsw.log.Errorf("Error processing %s (will not retry): %s", event, e)
rcsw.log.Error(e)
}
}
if done {
rcsw.log.Debug("Shutting down events processor")
return
}
}
}
// Start starts watching the remote cluster
func (rcsw *RemoteClusterServiceWatcher) Start() {
rcsw.remoteAPIClient.Sync(rcsw.stopper)
rcsw.eventsQueue.Add(&OprhanedServicesGcTriggered{})
rcsw.remoteAPIClient.Svc().Informer().AddEventHandler(
cache.ResourceEventHandlerFuncs{
AddFunc: func(svc interface{}) {
rcsw.eventsQueue.Add(&OnAddCalled{svc.(*corev1.Service)})
},
DeleteFunc: func(obj interface{}) {
service, ok := obj.(*corev1.Service)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
rcsw.log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj)
return
}
service, ok = tombstone.Obj.(*corev1.Service)
if !ok {
rcsw.log.Errorf("DeletedFinalStateUnknown contained object that is not a Service %#v", obj)
return
}
}
rcsw.eventsQueue.Add(&OnDeleteCalled{service})
},
UpdateFunc: func(old, new interface{}) {
rcsw.eventsQueue.Add(&OnUpdateCalled{new.(*corev1.Service)})
},
},
)
go rcsw.processEvents()
}
// Stop stops watching the cluster and cleans up all mirrored resources
func (rcsw *RemoteClusterServiceWatcher) Stop(cleanupState bool) {
close(rcsw.stopper)
if cleanupState {
rcsw.eventsQueue.Add(&ClusterUnregistered{})
}
rcsw.eventsQueue.ShutDown()
}

View File

@ -0,0 +1,167 @@
package servicemirror
import (
"fmt"
"sync"
"github.com/linkerd/linkerd2/controller/k8s"
log "github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
consts "github.com/linkerd/linkerd2/pkg/k8s"
)
// RemoteClusterConfigWatcher watches for secrets of type MirrorSecretType
// and upon the detection of such secret created starts a RemoteClusterServiceWatcher
type RemoteClusterConfigWatcher struct {
k8sAPI *k8s.API
clusterWatchers map[string]*RemoteClusterServiceWatcher
requeueLimit int
sync.RWMutex
}
// NewRemoteClusterConfigWatcher Creates a new config watcher
func NewRemoteClusterConfigWatcher(k8sAPI *k8s.API, requeueLimit int) *RemoteClusterConfigWatcher {
rcw := &RemoteClusterConfigWatcher{
k8sAPI: k8sAPI,
clusterWatchers: map[string]*RemoteClusterServiceWatcher{},
requeueLimit: requeueLimit,
}
k8sAPI.Secret().Informer().AddEventHandler(
cache.FilteringResourceEventHandler{
FilterFunc: func(obj interface{}) bool {
switch object := obj.(type) {
case *corev1.Secret:
return object.Type == consts.MirrorSecretType
case cache.DeletedFinalStateUnknown:
if secret, ok := object.Obj.(*corev1.Secret); ok {
return secret.Type == consts.MirrorSecretType
}
return false
default:
return false
}
},
Handler: cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
secret := obj.(*corev1.Secret)
if err := rcw.registerRemoteCluster(secret); err != nil {
log.Errorf("Cannot register remote cluster: %s", err)
}
},
DeleteFunc: func(obj interface{}) {
secret, ok := obj.(*corev1.Secret)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj)
return
}
secret, ok = tombstone.Obj.(*corev1.Secret)
if !ok {
log.Errorf("DeletedFinalStateUnknown contained object that is not a Secret %#v", obj)
return
}
}
if err := rcw.unregisterRemoteCluster(secret, true); err != nil {
log.Errorf("Cannot unregister remote cluster: %s", err)
}
},
UpdateFunc: func(old, new interface{}) {
oldSecret := old.(*corev1.Secret)
newSecret := new.(*corev1.Secret)
if oldSecret.ResourceVersion != newSecret.ResourceVersion {
if err := rcw.unregisterRemoteCluster(oldSecret, false); err != nil {
log.Errorf("Cannot unregister remote cluster: %s", err)
return
}
if err := rcw.registerRemoteCluster(newSecret); err != nil {
log.Errorf("Cannot register remote cluster: %s", err)
}
}
//TODO: Handle update (it might be that the credentials have changed...)
},
},
},
)
return rcw
}
// Stop Shuts down all created config and cluster watchers
func (rcw *RemoteClusterConfigWatcher) Stop() {
rcw.Lock()
defer rcw.Unlock()
for _, watcher := range rcw.clusterWatchers {
watcher.Stop(false)
}
}
func (rcw *RemoteClusterConfigWatcher) registerRemoteCluster(secret *corev1.Secret) error {
config, name, domain, err := parseRemoteClusterSecret(secret)
if err != nil {
return err
}
clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config)
if err != nil {
return fmt.Errorf("unable to parse kube config: %s", err)
}
rcw.Lock()
defer rcw.Unlock()
if _, ok := rcw.clusterWatchers[name]; ok {
return fmt.Errorf("there is already a cluster with name %s being watcher. Please delete its config before attempting to register a new one", name)
}
watcher, err := NewRemoteClusterServiceWatcher(rcw.k8sAPI, clientConfig, name, rcw.requeueLimit, domain)
if err != nil {
return err
}
rcw.clusterWatchers[name] = watcher
watcher.Start()
return nil
}
func (rcw *RemoteClusterConfigWatcher) unregisterRemoteCluster(secret *corev1.Secret, cleanState bool) error {
_, name, _, err := parseRemoteClusterSecret(secret)
if err != nil {
return err
}
rcw.Lock()
defer rcw.Unlock()
if watcher, ok := rcw.clusterWatchers[name]; ok {
watcher.Stop(cleanState)
} else {
return fmt.Errorf("cannot find watcher for cluser: %s", name)
}
delete(rcw.clusterWatchers, name)
return nil
}
func parseRemoteClusterSecret(secret *corev1.Secret) ([]byte, string, string, error) {
clusterName, hasClusterName := secret.Annotations[consts.RemoteClusterNameLabel]
config, hasConfig := secret.Data[consts.ConfigKeyName]
domain, hasDomain := secret.Annotations[consts.RemoteClusterDomainAnnotation]
if !hasClusterName {
return nil, "", "", fmt.Errorf("secret of type %s should contain key %s", consts.MirrorSecretType, consts.ConfigKeyName)
}
if !hasConfig {
return nil, "", "", fmt.Errorf("secret should contain remote cluster name as annotation %s", consts.RemoteClusterNameLabel)
}
if !hasDomain {
return nil, "", "", fmt.Errorf("secret should contain remote cluster domain as annotation %s", consts.RemoteClusterDomainAnnotation)
}
return config, clusterName, domain, nil
}

View File

@ -0,0 +1,47 @@
package servicemirror
import (
"flag"
"os"
"os/signal"
"syscall"
"github.com/linkerd/linkerd2/controller/k8s"
"github.com/linkerd/linkerd2/pkg/flags"
log "github.com/sirupsen/logrus"
)
// Main executes the tap service-mirror
func Main(args []string) {
cmd := flag.NewFlagSet("service-mirror", flag.ExitOnError)
kubeConfigPath := cmd.String("kubeconfig", "", "path to the local kube config")
requeueLimit := cmd.Int("event-requeue-limit", 3, "requeue limit for events")
flags.ConfigureAndParse(cmd, args)
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
k8sAPI, err := k8s.InitializeAPI(
*kubeConfigPath,
false,
k8s.Secret,
k8s.Svc,
k8s.NS,
k8s.Endpoint,
)
if err != nil {
log.Fatalf("Failed to initialize K8s API: %s", err)
}
k8sAPI.Sync(nil)
watcher := NewRemoteClusterConfigWatcher(k8sAPI, *requeueLimit)
log.Info("Started cluster config watcher")
<-stop
log.Info("Stopping cluster config watcher")
watcher.Stop()
}

View File

@ -42,6 +42,7 @@ func Main(args []string) {
k8sAPI, err := k8s.InitializeAPI(
*kubeConfigPath,
true,
k8s.CJ,
k8s.DS,
k8s.SS,
@ -86,7 +87,7 @@ func Main(args []string) {
log.Fatal(err.Error())
}
k8sAPI.Sync() // blocks until caches are synced
k8sAPI.Sync(nil) // blocks until caches are synced
go func() {
log.Infof("starting APIServer on %s", *apiServerAddr)

View File

@ -6,6 +6,8 @@ import (
"strings"
"time"
"k8s.io/client-go/rest"
tsclient "github.com/deislabs/smi-sdk-go/pkg/gen/client/split/clientset/versioned"
ts "github.com/deislabs/smi-sdk-go/pkg/gen/client/split/informers/externalversions"
tsinformers "github.com/deislabs/smi-sdk-go/pkg/gen/client/split/informers/externalversions/split/v1alpha1"
@ -58,6 +60,7 @@ const (
Svc
TS
Node
Secret
)
// API provides shared informers for all Kubernetes objects
@ -80,6 +83,7 @@ type API struct {
svc coreinformers.ServiceInformer
ts tsinformers.TrafficSplitInformer
node coreinformers.NodeInformer
secret coreinformers.SecretInformer
syncChecks []cache.InformerSynced
sharedInformers informers.SharedInformerFactory
@ -88,18 +92,41 @@ type API struct {
}
// InitializeAPI creates Kubernetes clients and returns an initialized API wrapper.
func InitializeAPI(kubeConfig string, resources ...APIResource) (*API, error) {
k8sClient, err := k8s.NewAPI(kubeConfig, "", "", []string{}, 0)
func InitializeAPI(kubeConfig string, ensureClusterWideAccess bool, resources ...APIResource) (*API, error) {
config, err := k8s.GetConfig(kubeConfig, "")
if err != nil {
return nil, fmt.Errorf("error configuring Kubernetes API client: %v", err)
}
k8sClient, err := k8s.NewAPIForConfig(config, "", []string{}, 0)
if err != nil {
return nil, err
}
// check for cluster-wide access
err = k8s.ClusterAccess(k8sClient)
return initAPI(k8sClient, config, ensureClusterWideAccess, resources...)
}
// InitializeAPIForConfig creates Kubernetes clients and returns an initialized API wrapper.
func InitializeAPIForConfig(kubeConfig *rest.Config, ensureClusterWideAccess bool, resources ...APIResource) (*API, error) {
k8sClient, err := k8s.NewAPIForConfig(kubeConfig, "", []string{}, 0)
if err != nil {
return nil, err
}
return initAPI(k8sClient, kubeConfig, ensureClusterWideAccess, resources...)
}
func initAPI(k8sClient *k8s.KubernetesAPI, kubeConfig *rest.Config, ensureClusterWideAccess bool, resources ...APIResource) (*API, error) {
// check for cluster-wide access
var err error
if ensureClusterWideAccess {
err := k8s.ClusterAccess(k8sClient)
if err != nil {
return nil, err
}
}
// check for need and access to ServiceProfiles
var spClient *spclient.Clientset
for _, res := range resources {
@ -210,17 +237,19 @@ func NewAPI(
case Node:
api.node = sharedInformers.Core().V1().Nodes()
api.syncChecks = append(api.syncChecks, api.node.Informer().HasSynced)
case Secret:
api.secret = sharedInformers.Core().V1().Secrets()
api.syncChecks = append(api.syncChecks, api.secret.Informer().HasSynced)
}
}
return api
}
// Sync waits for all informers to be synced.
func (api *API) Sync() {
api.sharedInformers.Start(nil)
api.spSharedInformers.Start(nil)
api.tsSharedInformers.Start(nil)
func (api *API) Sync(stopCh <-chan struct{}) {
api.sharedInformers.Start(stopCh)
api.spSharedInformers.Start(stopCh)
api.tsSharedInformers.Start(stopCh)
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
@ -359,6 +388,14 @@ func (api *API) Node() coreinformers.NodeInformer {
return api.node
}
// Secret provides access to a shared informer and lister for Secrets.
func (api *API) Secret() coreinformers.SecretInformer {
if api.secret == nil {
panic("Secret informer not configured")
}
return api.secret
}
// CJ provides access to a shared informer and lister for CronJobs.
func (api *API) CJ() batchv1beta1informers.CronJobInformer {
if api.cj == nil {

View File

@ -42,7 +42,7 @@ func newAPI(retry bool, resourceConfigs []string, extraConfigs ...string) (*API,
}
if retry {
api.Sync()
api.Sync(nil)
}
return api, k8sResults, nil

View File

@ -3,7 +3,6 @@ package k8s
import (
tsclient "github.com/deislabs/smi-sdk-go/pkg/gen/client/split/clientset/versioned"
spclient "github.com/linkerd/linkerd2/controller/gen/client/clientset/versioned"
"github.com/linkerd/linkerd2/pkg/k8s"
"github.com/linkerd/linkerd2/pkg/prometheus"
"k8s.io/client-go/rest"
@ -11,35 +10,20 @@ import (
_ "k8s.io/client-go/plugin/pkg/client/auth"
)
func newConfig(kubeConfig string, telemetryName string) (*rest.Config, error) {
config, err := k8s.GetConfig(kubeConfig, "")
if err != nil {
return nil, err
}
func wrapTransport(config *rest.Config, telemetryName string) *rest.Config {
wt := config.WrapTransport
config.WrapTransport = prometheus.ClientWithTelemetry(telemetryName, wt)
return config, nil
return config
}
// NewSpClientSet returns a Kubernetes ServiceProfile client for the given
// configuration.
func NewSpClientSet(kubeConfig string) (*spclient.Clientset, error) {
config, err := newConfig(kubeConfig, "sp")
if err != nil {
return nil, err
}
return spclient.NewForConfig(config)
func NewSpClientSet(kubeConfig *rest.Config) (*spclient.Clientset, error) {
return spclient.NewForConfig(wrapTransport(kubeConfig, "sp"))
}
// NewTsClientSet returns a Kubernetes TrafficSplit client for the given
// configuration.
func NewTsClientSet(kubeConfig string) (*tsclient.Clientset, error) {
config, err := newConfig(kubeConfig, "ts")
if err != nil {
return nil, err
}
return tsclient.NewForConfig(config)
func NewTsClientSet(kubeConfig *rest.Config) (*tsclient.Clientset, error) {
return tsclient.NewForConfig(wrapTransport(kubeConfig, "ts"))
}

View File

@ -379,7 +379,7 @@ status:
fakeGrpcServer := newGRPCTapServer(uint(tapPort), "controller-ns", "cluster.local", k8sAPI)
k8sAPI.Sync()
k8sAPI.Sync(nil)
err = fakeGrpcServer.TapByResource(&exp.req, &stream)
if !reflect.DeepEqual(err, exp.err) {
@ -609,7 +609,7 @@ status:
t.Fatalf("NewFakeAPI returned an error: %s", err)
}
s := NewGrpcTapServer(4190, "controller-ns", "cluster.local", k8sAPI)
k8sAPI.Sync()
k8sAPI.Sync(nil)
labels := make(map[string]string)
ip, err := addr.ParsePublicIPV4(exp.requestedIP)

View File

@ -31,7 +31,7 @@ func Launch(APIResources []k8s.APIResource, metricsPort uint32, handler handlerF
defer close(stop)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
k8sAPI, err := k8s.InitializeAPI(*kubeconfig, APIResources...)
k8sAPI, err := k8s.InitializeAPI(*kubeconfig, true, APIResources...)
if err != nil {
log.Fatalf("failed to initialize Kubernetes API: %s", err)
}
@ -46,7 +46,7 @@ func Launch(APIResources []k8s.APIResource, metricsPort uint32, handler handlerF
log.Fatalf("failed to initialize the webhook server: %s", err)
}
k8sAPI.Sync()
k8sAPI.Sync(nil)
go s.Start()
go admin.StartServer(*metricsAddr)

View File

@ -99,6 +99,11 @@ func (chart *Chart) RenderCNI() (bytes.Buffer, error) {
return chart.render(cniPartials)
}
// RenderServiceMirror returns a bytes buffer with the result of rendering a Helm chart
func (chart *Chart) RenderServiceMirror() (bytes.Buffer, error) {
return chart.render([]*chartutil.BufferedFile{})
}
// ReadFile updates the buffered file with the data read from disk
func ReadFile(dir string, f *chartutil.BufferedFile) error {
filename := dir + f.Name

View File

@ -0,0 +1,49 @@
package servicemirror
import (
"fmt"
"github.com/linkerd/linkerd2/pkg/charts"
"k8s.io/helm/pkg/chartutil"
"sigs.k8s.io/yaml"
)
const (
helmDefaultServiceMirrorChartDir = "linkerd2-service-mirror"
)
// Values contains the top-level elements in the Helm charts
type Values struct {
Namespace string `json:"namespace"`
ControllerImage string `json:"controllerImage"`
ControllerImageVersion string `json:"controllerImageVersion"`
ServiceMirrorUID int64 `json:"serviceMirrorUID"`
LogLevel string `json:"logLevel"`
EventRequeueLimit int32 `json:"eventRequeueLimit"`
}
// NewValues returns a new instance of the Values type.
func NewValues() (*Values, error) {
chartDir := fmt.Sprintf("%s/", helmDefaultServiceMirrorChartDir)
v, err := readDefaults(chartDir)
if err != nil {
return nil, err
}
return v, nil
}
// readDefaults read all the default variables from the values.yaml file.
// chartDir is the root directory of the Helm chart where values.yaml is.
func readDefaults(chartDir string) (*Values, error) {
file := &chartutil.BufferedFile{
Name: chartutil.ValuesfileName,
}
if err := charts.ReadFile(chartDir, file); err != nil {
return nil, err
}
values := Values{}
if err := yaml.Unmarshal(charts.InsertVersion(file.Data), &values); err != nil {
return nil, err
}
return &values, nil
}

View File

@ -43,6 +43,12 @@ func NewAPI(configPath, kubeContext string, impersonate string, impersonateGroup
if err != nil {
return nil, fmt.Errorf("error configuring Kubernetes API client: %v", err)
}
return NewAPIForConfig(config, impersonate, impersonateGroup, timeout)
}
// NewAPIForConfig uses a Kubernetes config to construct a client for accessing
// the configured cluster
func NewAPIForConfig(config *rest.Config, impersonate string, impersonateGroup []string, timeout time.Duration) (*KubernetesAPI, error) {
// k8s' client-go doesn't support injecting context
// https://github.com/kubernetes/kubernetes/issues/46503

View File

@ -319,6 +319,67 @@ const (
//
// In the future, this should be changed to a time- and audience-scoped secret.
IdentityServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token"
/*
* Service mirror constants
*/
// SvcMirrorPrefix is the prefix common to all labels and annotations
// and types used by the service mirror component
SvcMirrorPrefix = "mirror.linkerd.io"
// MirrorSecretType is the type of secret that is supposed to contain
// the access information for remote clusters.
MirrorSecretType = SvcMirrorPrefix + "/remote-kubeconfig"
// GatewayNameAnnotation is the annotation that is present on the remote
// service, indicating which gateway is supposed to route traffic to it
GatewayNameAnnotation = SvcMirrorPrefix + "/gateway-name"
// RemoteGatewayNameLabel is same as GatewayNameAnnotation but on the local,
// mirrored service. It's used for quick querying when we want to figure out
// the services that are being associated with a certain gateway
RemoteGatewayNameLabel = SvcMirrorPrefix + "/remote-gateway-name"
// GatewayNsAnnotation is present on the remote service, indicating the ns
// in which we can find the gateway
GatewayNsAnnotation = SvcMirrorPrefix + "/gateway-ns"
// RemoteGatewayNsLabel follows the same kind of logic as RemoteGatewayNameLabel
RemoteGatewayNsLabel = SvcMirrorPrefix + "/remote-gateway-ns"
// MirroredResourceLabel indicates that this resource is the result
// of a mirroring operation (can be a namespace or a service)
MirroredResourceLabel = SvcMirrorPrefix + "/mirrored-service"
// RemoteClusterNameLabel put on a local mirrored service, it
// allows us to associate a mirrored service with a remote cluster
RemoteClusterNameLabel = SvcMirrorPrefix + "/cluster-name"
// RemoteClusterDomainAnnotation is present on the secret
// carrying the config of the remote cluster, to allow for
// using custom cluster domains
RemoteClusterDomainAnnotation = SvcMirrorPrefix + "/remote-cluster-domain"
// RemoteResourceVersionAnnotation is the last observed remote resource
// version of a mirrored resource. Useful when doing updates
RemoteResourceVersionAnnotation = SvcMirrorPrefix + "/remote-resource-version"
// RemoteServiceFqName is the fully qualified name of the mirrored service
// on the remote cluster
RemoteServiceFqName = SvcMirrorPrefix + "/remote-svc-fq-name"
// RemoteGatewayResourceVersionAnnotation is the last observed remote resource
// version of the gateway for a particular mirrored service. It is used
// in cases we detect a change in a remote gateway
RemoteGatewayResourceVersionAnnotation = SvcMirrorPrefix + "/remote-gateway-resource-version"
// ConfigKeyName is the key in the secret that stores the kubeconfig needed to connect
// to a remote cluster
ConfigKeyName = "kubeconfig"
// GatewayPortName is the name of the incoming port of the gateway
GatewayPortName = "incoming-port"
)
// CreatedByAnnotationValue returns the value associated with