linkerd2/controller/api/destination/server.go

686 lines
21 KiB
Go

package destination
import (
"context"
"encoding/json"
"fmt"
"net"
"strconv"
"strings"
pb "github.com/linkerd/linkerd2-proxy-api/go/destination"
"github.com/linkerd/linkerd2/controller/api/destination/watcher"
sp "github.com/linkerd/linkerd2/controller/gen/apis/serviceprofile/v1alpha2"
"github.com/linkerd/linkerd2/controller/k8s"
labels "github.com/linkerd/linkerd2/pkg/k8s"
"github.com/linkerd/linkerd2/pkg/prometheus"
"github.com/linkerd/linkerd2/pkg/util"
logging "github.com/sirupsen/logrus"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/peer"
"google.golang.org/grpc/status"
corev1 "k8s.io/api/core/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
)
type (
server struct {
endpoints *watcher.EndpointsWatcher
opaquePorts *watcher.OpaquePortsWatcher
profiles *watcher.ProfileWatcher
trafficSplits *watcher.TrafficSplitWatcher
nodes coreinformers.NodeInformer
enableH2Upgrade bool
controllerNS string
identityTrustDomain string
clusterDomain string
defaultOpaquePorts map[uint32]struct{}
k8sAPI *k8s.API
log *logging.Entry
shutdown <-chan struct{}
}
)
// NewServer returns a new instance of the destination server.
//
// The destination server serves service discovery and other information to the
// proxy. This implementation supports the "k8s" destination scheme and expects
// destination paths to be of the form:
// <service>.<namespace>.svc.cluster.local:<port>
//
// If the port is omitted, 80 is used as a default. If the namespace is
// omitted, "default" is used as a default.append
//
// Addresses for the given destination are fetched from the Kubernetes Endpoints
// API.
func NewServer(
addr string,
controllerNS string,
identityTrustDomain string,
enableH2Upgrade bool,
enableEndpointSlices bool,
k8sAPI *k8s.API,
clusterDomain string,
defaultOpaquePorts map[uint32]struct{},
shutdown <-chan struct{},
) (*grpc.Server, error) {
log := logging.WithFields(logging.Fields{
"addr": addr,
"component": "server",
})
// Initialize indexers that are used across watchers
err := watcher.InitializeIndexers(k8sAPI)
if err != nil {
return nil, err
}
endpoints := watcher.NewEndpointsWatcher(k8sAPI, log, enableEndpointSlices)
opaquePorts := watcher.NewOpaquePortsWatcher(k8sAPI, log, defaultOpaquePorts)
profiles := watcher.NewProfileWatcher(k8sAPI, log)
trafficSplits := watcher.NewTrafficSplitWatcher(k8sAPI, log)
srv := server{
endpoints,
opaquePorts,
profiles,
trafficSplits,
k8sAPI.Node(),
enableH2Upgrade,
controllerNS,
identityTrustDomain,
clusterDomain,
defaultOpaquePorts,
k8sAPI,
log,
shutdown,
}
s := prometheus.NewGrpcServer()
// linkerd2-proxy-api/destination.Destination (proxy-facing)
pb.RegisterDestinationServer(s, &srv)
return s, nil
}
func (s *server) Get(dest *pb.GetDestination, stream pb.Destination_GetServer) error {
client, _ := peer.FromContext(stream.Context())
log := s.log
if client != nil {
log = s.log.WithField("remote", client.Addr)
}
log.Debugf("Get %s", dest.GetPath())
var token contextToken
if dest.GetContextToken() != "" {
token = s.parseContextToken(dest.GetContextToken())
log.Debugf("Dest token: %v", token)
}
translator := newEndpointTranslator(
s.controllerNS,
s.identityTrustDomain,
s.enableH2Upgrade,
dest.GetPath(),
token.NodeName,
s.defaultOpaquePorts,
s.nodes,
stream,
log,
)
// The host must be fully-qualified or be an IP address.
host, port, err := getHostAndPort(dest.GetPath())
if err != nil {
log.Debugf("Invalid service %s", dest.GetPath())
return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
}
// Return error for an IP query
if ip := net.ParseIP(host); ip != nil {
return status.Errorf(codes.InvalidArgument, "IP queries not supported by Get API: host=%s", host)
}
service, instanceID, err := parseK8sServiceName(host, s.clusterDomain)
if err != nil {
log.Debugf("Invalid service %s", dest.GetPath())
return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
}
err = s.endpoints.Subscribe(service, port, instanceID, translator)
if err != nil {
if _, ok := err.(watcher.InvalidService); ok {
log.Debugf("Invalid service %s", dest.GetPath())
return status.Errorf(codes.InvalidArgument, "Invalid authority: %s", dest.GetPath())
}
log.Errorf("Failed to subscribe to %s: %s", dest.GetPath(), err)
return err
}
defer s.endpoints.Unsubscribe(service, port, instanceID, translator)
select {
case <-s.shutdown:
case <-stream.Context().Done():
log.Debugf("Get %s cancelled", dest.GetPath())
}
return nil
}
func (s *server) GetProfile(dest *pb.GetDestination, stream pb.Destination_GetProfileServer) error {
log := s.log
client, _ := peer.FromContext(stream.Context())
if client != nil {
log = log.WithField("remote", client.Addr)
}
log.Debugf("GetProfile(%+v)", dest)
path := dest.GetPath()
// The host must be fully-qualified or be an IP address.
host, port, err := getHostAndPort(path)
if err != nil {
log.Debugf("Invalid authority %s", path)
return status.Errorf(codes.InvalidArgument, "invalid authority: %s", err)
}
// The stream will subscribe to profile updates for `service`.
var service watcher.ServiceID
// If `host` is an IP, `fqn` must be constructed from the namespace and
// name of the service that the IP maps to.
var fqn string
if ip := net.ParseIP(host); ip != nil {
// Get the service that the IP currently maps to.
svcID, err := getSvcID(s.k8sAPI, ip.String(), log)
if err != nil {
return err
}
if svcID != nil {
service = *svcID
fqn = fmt.Sprintf("%s.%s.svc.%s", service.Name, service.Namespace, s.clusterDomain)
} else {
// If the IP does not map to a service, check if it maps to a pod
pod, err := getPodByIP(s.k8sAPI, ip.String(), port, log)
if err != nil {
return err
}
// The IP may or may not map to a pod (pod argument can be nil). If
// pod is not nil we will return a single endpoint in the
// DestinationProfile response, otherwise we return a default
// profile response.
err = s.sendEndpointProfile(stream, pod, port)
if err != nil {
log.Debugf("Failed to send profile response for endpoint %s:%d: %v", ip.String(), port, err)
return err
}
select {
case <-s.shutdown:
case <-stream.Context().Done():
log.Debugf("GetProfile(%+v) cancelled", dest)
}
return nil
}
} else {
var hostname string
service, hostname, err = parseK8sServiceName(host, s.clusterDomain)
if err != nil {
log.Debugf("Invalid service %s", path)
return status.Errorf(codes.InvalidArgument, "invalid service: %s", err)
}
// If the pod name (instance ID) is not empty, it means we parsed a DNS
// name. When we fetch the profile using a pod's DNS name, we want to
// return an endpoint in the profile response.
if hostname != "" {
pod, err := getPodByHostname(s.k8sAPI, hostname, service)
if err != nil {
log.Errorf("Failed to get pod for hostname %s: %v", hostname, err)
}
err = s.sendEndpointProfile(stream, pod, port)
if err != nil {
log.Debugf("Failed to send profile response for host %s: %v", hostname, err)
return err
}
select {
case <-s.shutdown:
case <-stream.Context().Done():
log.Debugf("GetProfile(%+v) cancelled", dest)
}
return nil
}
fqn = host
}
// We build up the pipeline of profile updaters backwards, starting from
// the translator which takes profile updates, translates them to protobuf
// and pushes them onto the gRPC stream.
translator := newProfileTranslator(stream, log, fqn, port, nil)
// The traffic split adaptor merges profile updates with traffic split
// updates and publishes the result to the profile translator.
tsAdaptor := newTrafficSplitAdaptor(translator, service, port, s.clusterDomain)
// Subscribe the adaptor to traffic split updates.
err = s.trafficSplits.Subscribe(service, tsAdaptor)
if err != nil {
log.Warnf("Failed to subscribe to traffic split for %s: %s", path, err)
return err
}
defer s.trafficSplits.Unsubscribe(service, tsAdaptor)
// The opaque ports adaptor merges profile updates with service opaque
// port annotation updates; it then publishes the result to the traffic
// split adaptor.
opaquePortsAdaptor := newOpaquePortsAdaptor(tsAdaptor)
// Subscribe the adaptor to service updates.
err = s.opaquePorts.Subscribe(service, opaquePortsAdaptor)
if err != nil {
log.Warnf("Failed to subscribe to service updates for %s: %s", service, err)
return err
}
defer s.opaquePorts.Unsubscribe(service, opaquePortsAdaptor)
// The fallback accepts updates from a primary and secondary source and
// passes the appropriate profile updates to the adaptor.
primary, secondary := newFallbackProfileListener(opaquePortsAdaptor)
// If we have a context token, we create two subscriptions: one with the
// context token which sends updates to the primary listener and one without
// the context token which sends updates to the secondary listener. It is
// up to the fallbackProfileListener to merge updates from the primary and
// secondary listeners and send the appropriate updates to the stream.
if dest.GetContextToken() != "" {
ctxToken := s.parseContextToken(dest.GetContextToken())
profile, err := profileID(fqn, ctxToken, s.clusterDomain)
if err != nil {
log.Debugf("Invalid service %s", path)
return status.Errorf(codes.InvalidArgument, "invalid profile ID: %s", err)
}
err = s.profiles.Subscribe(profile, primary)
if err != nil {
log.Warnf("Failed to subscribe to profile %s: %s", path, err)
return err
}
defer s.profiles.Unsubscribe(profile, primary)
}
profile, err := profileID(fqn, contextToken{}, s.clusterDomain)
if err != nil {
log.Debugf("Invalid service %s", path)
return status.Errorf(codes.InvalidArgument, "invalid profile ID: %s", err)
}
err = s.profiles.Subscribe(profile, secondary)
if err != nil {
log.Warnf("Failed to subscribe to profile %s: %s", path, err)
return err
}
defer s.profiles.Unsubscribe(profile, secondary)
select {
case <-s.shutdown:
case <-stream.Context().Done():
log.Debugf("GetProfile(%+v) cancelled", dest)
}
return nil
}
// sendEndpointProfile sends a DestinationProfile response back to the client.
// If the pod argument is provided, the profile sent to the client will
// include an endpoint. Otherwise, the default profile is sent.
func (s *server) sendEndpointProfile(stream pb.Destination_GetProfileServer, pod *corev1.Pod, port uint32) error {
log := s.log
var endpoint *pb.WeightedAddr
opaquePorts := make(map[uint32]struct{})
var err error
if pod != nil {
podSet := podToAddressSet(s.k8sAPI, pod).WithPort(port)
podID := watcher.PodID{
Namespace: pod.Namespace,
Name: pod.Name,
}
var ok bool
opaquePorts, ok, err = getPodOpaquePortsAnnotations(pod)
if err != nil {
log.Errorf("failed to get opaque ports annotation for pod: %s", err)
}
// If the opaque ports annotation was not set, then set the
// endpoint's opaque ports to the default value.
if !ok {
opaquePorts = s.defaultOpaquePorts
}
skippedInboundPorts, err := getPodSkippedInboundPortsAnnotations(pod)
if err != nil {
log.Errorf("failed to get ignored inbound ports annotation for pod: %s", err)
}
endpoint, err = toWeightedAddr(podSet.Addresses[podID], opaquePorts, skippedInboundPorts, s.enableH2Upgrade, s.identityTrustDomain, s.controllerNS, s.log)
if err != nil {
return err
}
// `Get` doesn't include the namespace in the per-endpoint
// metadata, so it needs to be special-cased.
endpoint.MetricLabels["namespace"] = pod.Namespace
}
// Send the default profile without subscribing for future updates. The
// profile response will also include an endpoint if the IP (or hostname)
// sent in the profile request maps to a pod.
translator := newProfileTranslator(stream, log, "", port, endpoint)
// If there are opaque ports then update the profile translator
// with a service profile that has those values
if len(opaquePorts) != 0 {
sp := sp.ServiceProfile{}
sp.Spec.OpaquePorts = opaquePorts
translator.Update(&sp)
} else {
translator.Update(nil)
}
return nil
}
// getSvcID returns the service that corresponds to a Cluster IP address if one
// exists.
func getSvcID(k8sAPI *k8s.API, clusterIP string, log *logging.Entry) (*watcher.ServiceID, error) {
objs, err := k8sAPI.Svc().Informer().GetIndexer().ByIndex(watcher.PodIPIndex, clusterIP)
if err != nil {
return nil, status.Error(codes.Unknown, err.Error())
}
services := make([]*corev1.Service, 0)
for _, obj := range objs {
service := obj.(*corev1.Service)
services = append(services, service)
}
if len(services) > 1 {
conflictingServices := []string{}
for _, service := range services {
conflictingServices = append(conflictingServices, fmt.Sprintf("%s:%s", service.Namespace, service.Name))
}
log.Warnf("found conflicting %s cluster IP: %s", clusterIP, strings.Join(conflictingServices, ","))
return nil, status.Errorf(codes.FailedPrecondition, "found %d services with conflicting cluster IP %s", len(services), clusterIP)
}
if len(services) == 0 {
return nil, nil
}
service := &watcher.ServiceID{
Namespace: services[0].Namespace,
Name: services[0].Name,
}
return service, nil
}
// getPodByHostname returns a pod that maps to the given hostname (or an
// instanceID). The hostname is generally the prefix of the pod's DNS name;
// since it may be arbitrary we need to look at the corresponding service's
// Endpoints object to see whether the hostname matches a pod.
func getPodByHostname(k8sAPI *k8s.API, hostname string, svcID watcher.ServiceID) (*corev1.Pod, error) {
ep, err := k8sAPI.Endpoint().Lister().Endpoints(svcID.Namespace).Get(svcID.Name)
if err != nil {
return nil, err
}
//TODO: add support for headless services with non-pod endpoints.
for _, subset := range ep.Subsets {
for _, addr := range subset.Addresses {
if addr.TargetRef == nil || addr.TargetRef.Kind != "Pod" {
continue
}
if hostname == addr.Hostname {
podName := addr.TargetRef.Name
podNamespace := addr.TargetRef.Namespace
return k8sAPI.Pod().Lister().Pods(podNamespace).Get(podName)
}
}
}
return nil, fmt.Errorf("no pod found in Endpoints %s/%s for hostname %s", svcID.Namespace, svcID.Name, hostname)
}
// getPodByIP returns a pod that maps to the given IP address. The pod can either
// be in the host network or the pod network. If the pod is in the host
// network, then it must have a container port that exposes `port` as a host
// port.
func getPodByIP(k8sAPI *k8s.API, podIP string, port uint32, log *logging.Entry) (*corev1.Pod, error) {
// First we check if the address maps to a pod in the host network.
addr := fmt.Sprintf("%s:%d", podIP, port)
hostIPPods, err := getIndexedPods(k8sAPI, watcher.HostIPIndex, addr)
if err != nil {
return nil, status.Error(codes.Unknown, err.Error())
}
if len(hostIPPods) == 1 {
log.Debugf("found %s:%d on the host network", podIP, port)
return hostIPPods[0], nil
}
if len(hostIPPods) > 1 {
conflictingPods := []string{}
for _, pod := range hostIPPods {
conflictingPods = append(conflictingPods, fmt.Sprintf("%s:%s", pod.Namespace, pod.Name))
}
log.Warnf("found conflicting %s:%d endpoint on the host network: %s", podIP, port, strings.Join(conflictingPods, ","))
return nil, status.Errorf(codes.FailedPrecondition, "found %d pods with a conflicting host network endpoint %s:%d", len(hostIPPods), podIP, port)
}
// The address did not map to a pod in the host network, so now we check
// if the IP maps to a pod IP in the pod network.
podIPPods, err := getIndexedPods(k8sAPI, watcher.PodIPIndex, podIP)
if err != nil {
return nil, status.Error(codes.Unknown, err.Error())
}
if len(podIPPods) == 1 {
log.Debugf("found %s on the pod network", podIP)
return podIPPods[0], nil
}
if len(podIPPods) > 1 {
conflictingPods := []string{}
for _, pod := range podIPPods {
conflictingPods = append(conflictingPods, fmt.Sprintf("%s:%s", pod.Namespace, pod.Name))
}
log.Warnf("found conflicting %s IP on the pod network: %s", podIP, strings.Join(conflictingPods, ","))
return nil, status.Errorf(codes.FailedPrecondition, "found %d pods with a conflicting pod network IP %s", len(podIPPods), podIP)
}
log.Debugf("no pod found for %s:%d", podIP, port)
return nil, nil
}
func getIndexedPods(k8sAPI *k8s.API, indexName string, podIP string) ([]*corev1.Pod, error) {
objs, err := k8sAPI.Pod().Informer().GetIndexer().ByIndex(indexName, podIP)
if err != nil {
return nil, fmt.Errorf("failed getting %s indexed pods: %s", indexName, err)
}
pods := make([]*corev1.Pod, 0)
for _, obj := range objs {
pod := obj.(*corev1.Pod)
if !podReceivingTraffic(pod) {
continue
}
pods = append(pods, pod)
}
return pods, nil
}
func podReceivingTraffic(pod *corev1.Pod) bool {
phase := pod.Status.Phase
podTerminated := phase == corev1.PodSucceeded || phase == corev1.PodFailed
podTerminating := pod.DeletionTimestamp != nil
return !podTerminating && !podTerminated
}
// podToAddressSet converts a Pod spec into a set of Addresses.
func podToAddressSet(k8sAPI *k8s.API, pod *corev1.Pod) *watcher.AddressSet {
ownerKind, ownerName := k8sAPI.GetOwnerKindAndName(context.Background(), pod, true)
return &watcher.AddressSet{
Addresses: map[watcher.PodID]watcher.Address{
{
Name: pod.Name,
Namespace: pod.Namespace,
}: {
IP: pod.Status.PodIP,
Port: 0, // Will be set by individual subscriptions
Pod: pod,
OwnerName: ownerName,
OwnerKind: ownerKind,
},
},
Labels: map[string]string{"namespace": pod.Namespace},
}
}
////////////
/// util ///
////////////
type contextToken struct {
Ns string `json:"ns,omitempty"`
NodeName string `json:"nodeName,omitempty"`
}
func (s *server) parseContextToken(token string) contextToken {
ctxToken := contextToken{}
if err := json.Unmarshal([]byte(token), &ctxToken); err != nil {
// if json is invalid, means token can have ns:<namespace> form
parts := strings.Split(token, ":")
if len(parts) == 2 && parts[0] == "ns" {
s.log.Warnf("context token %s using old token format", token)
ctxToken = contextToken{
Ns: parts[1],
}
} else {
s.log.Errorf("context token %s is invalid: %s", token, err)
}
}
return ctxToken
}
func profileID(authority string, ctxToken contextToken, clusterDomain string) (watcher.ProfileID, error) {
host, _, err := getHostAndPort(authority)
if err != nil {
return watcher.ProfileID{}, fmt.Errorf("invalid authority: %s", err)
}
service, _, err := parseK8sServiceName(host, clusterDomain)
if err != nil {
return watcher.ProfileID{}, fmt.Errorf("invalid k8s service name: %s", err)
}
id := watcher.ProfileID{
Name: fmt.Sprintf("%s.%s.svc.%s", service.Name, service.Namespace, clusterDomain),
Namespace: service.Namespace,
}
if ctxToken.Ns != "" {
id.Namespace = ctxToken.Ns
}
return id, nil
}
func getHostAndPort(authority string) (string, watcher.Port, error) {
hostPort := strings.Split(authority, ":")
if len(hostPort) > 2 {
return "", 0, fmt.Errorf("Invalid destination %s", authority)
}
host := hostPort[0]
port := 80
if len(hostPort) == 2 {
var err error
port, err = strconv.Atoi(hostPort[1])
if err != nil || port <= 0 || port > 65535 {
return "", 0, fmt.Errorf("Invalid port %s", hostPort[1])
}
}
return host, watcher.Port(port), nil
}
type instanceID = string
// parseK8sServiceName is a utility that destructures a Kubernetes service hostname into its constituent components.
//
// If the authority does not represent a Kubernetes service, an error is returned.
//
// If the hostname is a pod DNS name, then the pod's name (instanceID) is returned
// as well. See https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/.
func parseK8sServiceName(fqdn, clusterDomain string) (watcher.ServiceID, instanceID, error) {
labels := strings.Split(fqdn, ".")
suffix := append([]string{"svc"}, strings.Split(clusterDomain, ".")...)
if !hasSuffix(labels, suffix) {
return watcher.ServiceID{}, "", fmt.Errorf("name %s does not match cluster domain %s", fqdn, clusterDomain)
}
n := len(labels)
if n == 2+len(suffix) {
// <service>.<namespace>.<suffix>
service := watcher.ServiceID{
Name: labels[0],
Namespace: labels[1],
}
return service, "", nil
}
if n == 3+len(suffix) {
// <instance-id>.<service>.<namespace>.<suffix>
instanceID := labels[0]
service := watcher.ServiceID{
Name: labels[1],
Namespace: labels[2],
}
return service, instanceID, nil
}
return watcher.ServiceID{}, "", fmt.Errorf("invalid k8s service %s", fqdn)
}
func hasSuffix(slice []string, suffix []string) bool {
if len(slice) < len(suffix) {
return false
}
for i, s := range slice[len(slice)-len(suffix):] {
if s != suffix[i] {
return false
}
}
return true
}
func getPodOpaquePortsAnnotations(pod *corev1.Pod) (map[uint32]struct{}, bool, error) {
annotation, ok := pod.Annotations[labels.ProxyOpaquePortsAnnotation]
if !ok {
return nil, false, nil
}
opaquePorts := make(map[uint32]struct{})
if annotation != "" {
for _, portStr := range util.ParseContainerOpaquePorts(annotation, pod.Spec.Containers) {
port, err := strconv.ParseUint(portStr, 10, 32)
if err != nil {
return nil, true, err
}
opaquePorts[uint32(port)] = struct{}{}
}
}
return opaquePorts, true, nil
}
func getPodSkippedInboundPortsAnnotations(pod *corev1.Pod) (map[uint32]struct{}, error) {
annotation, ok := pod.Annotations[labels.ProxyIgnoreInboundPortsAnnotation]
if !ok || annotation == "" {
return nil, nil
}
return util.ParsePorts(annotation)
}