package inject import ( "encoding/json" "fmt" "strconv" "strings" "github.com/linkerd/linkerd2/controller/gen/config" "github.com/linkerd/linkerd2/pkg/k8s" log "github.com/sirupsen/logrus" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" v1 "k8s.io/api/core/v1" "k8s.io/api/extensions/v1beta1" k8sMeta "k8s.io/apimachinery/pkg/api/meta" k8sResource "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" "sigs.k8s.io/yaml" ) const ( // localhostDNSNameOverride allows override of the controlPlaneDNS. This // must be in absolute form for the proxy to special-case it. localhostDNSNameOverride = "localhost." // controlPlanePodName default control plane pod name. controlPlanePodName = "linkerd-controller" // podNamespaceEnvVarName is the name of the variable used to pass the pod's namespace. podNamespaceEnvVarName = "LINKERD2_PROXY_POD_NAMESPACE" // defaultKeepaliveMs is used in the proxy configuration for remote connections defaultKeepaliveMs = 10000 // destinationAPIPort is the port exposed by the linkerd-destination service destinationAPIPort = 8086 ) var injectableKinds = []string{ k8s.DaemonSet, k8s.Deployment, k8s.Job, k8s.Pod, k8s.ReplicaSet, k8s.ReplicationController, k8s.StatefulSet, } // objMeta provides a generic struct to parse the names of Kubernetes objects type objMeta struct { *metav1.ObjectMeta `json:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"` } // ResourceConfig contains the parsed information for a given workload type ResourceConfig struct { globalConfig *config.Global proxyConfig *config.Proxy nsAnnotations map[string]string meta metav1.TypeMeta obj runtime.Object workLoadMeta *metav1.ObjectMeta podMeta objMeta podLabels map[string]string podSpec *v1.PodSpec dnsNameOverride string proxyOutboundCapacity map[string]uint } // NewResourceConfig creates and initializes a ResourceConfig func NewResourceConfig(globalConfig *config.Global, proxyConfig *config.Proxy) *ResourceConfig { return &ResourceConfig{ globalConfig: globalConfig, proxyConfig: proxyConfig, podLabels: map[string]string{k8s.ControllerNSLabel: globalConfig.GetLinkerdNamespace()}, proxyOutboundCapacity: map[string]uint{}, } } // WithKind enriches ResourceConfig with the workload kind func (conf *ResourceConfig) WithKind(kind string) *ResourceConfig { conf.meta = metav1.TypeMeta{Kind: kind} return conf } // WithNsAnnotations enriches ResourceConfig with the namespace annotations, that can // be used in shouldInject() func (conf *ResourceConfig) WithNsAnnotations(m map[string]string) *ResourceConfig { conf.nsAnnotations = m return conf } // WithProxyOutboundCapacity enriches ResourceConfig with a map of image names // to capacities, which can be used by the install code to modify the outbound // capacity for the prometheus container in the control plane install func (conf *ResourceConfig) WithProxyOutboundCapacity(m map[string]uint) *ResourceConfig { conf.proxyOutboundCapacity = m return conf } // YamlMarshalObj returns the yaml for the workload in conf func (conf *ResourceConfig) YamlMarshalObj() ([]byte, error) { return yaml.Marshal(conf.obj) } // ParseMetaAndYaml fills conf fields with both the metatada and the workload contents func (conf *ResourceConfig) ParseMetaAndYaml(bytes []byte) (*Report, error) { if _, err := conf.ParseMeta(bytes); err != nil { return nil, err } r := newReport(conf) return &r, conf.parse(bytes) } // ParseMeta extracts metadata from bytes. // It returns false if the workload's payload is empty func (conf *ResourceConfig) ParseMeta(bytes []byte) (bool, error) { if err := yaml.Unmarshal(bytes, &conf.meta); err != nil { return false, err } if err := yaml.Unmarshal(bytes, &conf.podMeta); err != nil { return false, err } return conf.podMeta.ObjectMeta != nil, nil } // GetPatch returns the JSON patch containing the proxy and init containers specs, if any func (conf *ResourceConfig) GetPatch( bytes []byte, shouldInject func(*ResourceConfig, Report) bool, ) (*Patch, []Report, error) { report := newReport(conf) log.Infof("working on %s %s..", strings.ToLower(conf.meta.Kind), report.Name) if err := conf.parse(bytes); err != nil { return nil, nil, err } var patch *Patch if strings.ToLower(conf.meta.Kind) == k8s.Pod { patch = NewPatchPod() } else { patch = NewPatchDeployment() } // If we don't inject anything into the pod template then output the // original serialization of the original object. Otherwise, output the // serialization of the modified object. if conf.podSpec != nil { metaAccessor, err := k8sMeta.Accessor(conf.obj) if err != nil { return nil, nil, err } // The namespace isn't necessarily in the input so it has to be substituted // at runtime. The proxy recognizes the "$NAME" syntax for this variable // but not necessarily other variables. identity := k8s.TLSIdentity{ Name: metaAccessor.GetName(), Kind: strings.ToLower(conf.meta.Kind), Namespace: "$" + podNamespaceEnvVarName, ControllerNamespace: conf.globalConfig.GetLinkerdNamespace(), } report.update(conf) if shouldInject(conf, report) { conf.injectPodSpec(patch, identity) conf.injectObjectMeta(patch) } } else { report.UnsupportedResource = true } return patch, []Report{report}, nil } // KindInjectable returns true if the resource in conf can be injected with a proxy func (conf *ResourceConfig) KindInjectable() bool { for _, kind := range injectableKinds { if strings.ToLower(conf.meta.Kind) == kind { return true } } return false } // Note this switch must be kept in sync with injectableKinds (declared above) func (conf *ResourceConfig) getFreshWorkloadObj() runtime.Object { switch strings.ToLower(conf.meta.Kind) { case k8s.Deployment: return &v1beta1.Deployment{} case k8s.ReplicationController: return &v1.ReplicationController{} case k8s.ReplicaSet: return &v1beta1.ReplicaSet{} case k8s.Job: return &batchv1.Job{} case k8s.DaemonSet: return &v1beta1.DaemonSet{} case k8s.StatefulSet: return &appsv1.StatefulSet{} case k8s.Pod: return &v1.Pod{} } return nil } // JSONToYAML is a replacement for the same function in sigs.k8s.io/yaml // that does conserve the field order as portrayed in k8s' api structs func (conf *ResourceConfig) JSONToYAML(bytes []byte) ([]byte, error) { obj := conf.getFreshWorkloadObj() if err := json.Unmarshal(bytes, obj); err != nil { return nil, err } return yaml.Marshal(obj) } func (conf *ResourceConfig) parse(bytes []byte) error { // The Kubernetes API is versioned and each version has an API modeled // with its own distinct Go types. If we tell `yaml.Unmarshal()` which // version we support then it will provide a representation of that // object using the given type if possible. However, it only allows us // to supply one object (of one type), so first we have to determine // what kind of object `bytes` represents so we can pass an object of // the correct type to `yaml.Unmarshal()`. // --------------------------------------- // Note: bytes is expected to be YAML and will only modify it when a // supported type is found. Otherwise, conf is left unmodified. // When injecting the linkerd proxy into a linkerd controller pod. The linkerd proxy's // LINKERD2_PROXY_CONTROL_URL variable must be set to localhost for the following reasons: // 1. According to https://github.com/kubernetes/minikube/issues/1568, minikube has an issue // where pods are unable to connect to themselves through their associated service IP. // Setting the LINKERD2_PROXY_CONTROL_URL to localhost allows the proxy to bypass kube DNS // name resolution as a workaround to this issue. // 2. We avoid the TLS overhead in encrypting and decrypting intra-pod traffic i.e. traffic // between containers in the same pod. // 3. Using a Service IP instead of localhost would mean intra-pod traffic would be load-balanced // across all controller pod replicas. This is undesirable as we would want all traffic between // containers to be self contained. // 4. We skip recording telemetry for intra-pod traffic within the control plane. obj := conf.getFreshWorkloadObj() switch v := obj.(type) { case *v1beta1.Deployment: if err := yaml.Unmarshal(bytes, v); err != nil { return err } if v.Name == controlPlanePodName && v.Namespace == conf.globalConfig.GetLinkerdNamespace() { conf.dnsNameOverride = localhostDNSNameOverride } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyDeploymentLabel] = v.Name conf.complete(&v.Spec.Template) case *v1.ReplicationController: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyReplicationControllerLabel] = v.Name conf.complete(v.Spec.Template) case *v1beta1.ReplicaSet: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyReplicaSetLabel] = v.Name conf.complete(&v.Spec.Template) case *batchv1.Job: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyJobLabel] = v.Name conf.complete(&v.Spec.Template) case *v1beta1.DaemonSet: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyDaemonSetLabel] = v.Name conf.complete(&v.Spec.Template) case *appsv1.StatefulSet: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.workLoadMeta = &v.ObjectMeta conf.podLabels[k8s.ProxyStatefulSetLabel] = v.Name conf.complete(&v.Spec.Template) case *v1.Pod: if err := yaml.Unmarshal(bytes, v); err != nil { return err } conf.obj = v conf.podSpec = &v.Spec conf.podMeta = objMeta{&v.ObjectMeta} } return nil } func (conf *ResourceConfig) complete(template *v1.PodTemplateSpec) { conf.podSpec = &template.Spec conf.podMeta = objMeta{&template.ObjectMeta} } // injectPodSpec adds linkerd sidecars to the provided PodSpec. func (conf *ResourceConfig) injectPodSpec(patch *Patch, identity k8s.TLSIdentity) { f := false inboundSkipPorts := append(conf.proxyConfig.GetIgnoreInboundPorts(), conf.proxyConfig.GetControlPort(), conf.proxyConfig.GetMetricsPort()) inboundSkipPortsStr := make([]string, len(inboundSkipPorts)) for i, p := range inboundSkipPorts { inboundSkipPortsStr[i] = strconv.Itoa(int(p.GetPort())) } outboundSkipPortsStr := make([]string, len(conf.proxyConfig.GetIgnoreOutboundPorts())) for i, p := range conf.proxyConfig.GetIgnoreOutboundPorts() { outboundSkipPortsStr[i] = strconv.Itoa(int(p.GetPort())) } initArgs := []string{ "--incoming-proxy-port", fmt.Sprintf("%d", conf.proxyConfig.GetInboundPort().GetPort()), "--outgoing-proxy-port", fmt.Sprintf("%d", conf.proxyConfig.GetOutboundPort().GetPort()), "--proxy-uid", fmt.Sprintf("%d", conf.proxyConfig.GetProxyUid()), } if len(inboundSkipPortsStr) > 0 { initArgs = append(initArgs, "--inbound-ports-to-ignore") initArgs = append(initArgs, strings.Join(inboundSkipPortsStr, ",")) } if len(outboundSkipPortsStr) > 0 { initArgs = append(initArgs, "--outbound-ports-to-ignore") initArgs = append(initArgs, strings.Join(outboundSkipPortsStr, ",")) } controlPlaneDNS := fmt.Sprintf("linkerd-destination.%s.svc.cluster.local", conf.globalConfig.GetLinkerdNamespace()) if conf.dnsNameOverride != "" { controlPlaneDNS = conf.dnsNameOverride } metricsPort := intstr.IntOrString{ IntVal: int32(conf.proxyConfig.GetMetricsPort().GetPort()), } proxyProbe := v1.Probe{ Handler: v1.Handler{ HTTPGet: &v1.HTTPGetAction{ Path: "/metrics", Port: metricsPort, }, }, InitialDelaySeconds: 10, } resources := v1.ResourceRequirements{ Requests: v1.ResourceList{}, Limits: v1.ResourceList{}, } if request := conf.proxyConfig.GetResource().GetRequestCpu(); request != "" { resources.Requests["cpu"] = k8sResource.MustParse(request) } if request := conf.proxyConfig.GetResource().GetRequestMemory(); request != "" { resources.Requests["memory"] = k8sResource.MustParse(request) } if limit := conf.proxyConfig.GetResource().GetLimitCpu(); limit != "" { resources.Limits["cpu"] = k8sResource.MustParse(limit) } if limit := conf.proxyConfig.GetResource().GetLimitMemory(); limit != "" { resources.Limits["memory"] = k8sResource.MustParse(limit) } profileSuffixes := "." if conf.proxyConfig.GetDisableExternalProfiles() { profileSuffixes = "svc.cluster.local." } proxyUID := conf.proxyConfig.GetProxyUid() sidecar := v1.Container{ Name: k8s.ProxyContainerName, Image: conf.taggedProxyImage(), ImagePullPolicy: v1.PullPolicy(conf.proxyConfig.GetProxyImage().GetPullPolicy()), TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, SecurityContext: &v1.SecurityContext{ RunAsUser: &proxyUID, }, Ports: []v1.ContainerPort{ { Name: "linkerd-proxy", ContainerPort: int32(conf.proxyConfig.GetInboundPort().GetPort()), }, { Name: "linkerd-metrics", ContainerPort: int32(conf.proxyConfig.GetMetricsPort().GetPort()), }, }, Resources: resources, Env: []v1.EnvVar{ {Name: "LINKERD2_PROXY_LOG", Value: conf.proxyConfig.GetLogLevel().GetLevel()}, { Name: "LINKERD2_PROXY_CONTROL_URL", Value: fmt.Sprintf("tcp://%s:%d", controlPlaneDNS, destinationAPIPort), }, {Name: "LINKERD2_PROXY_CONTROL_LISTENER", Value: fmt.Sprintf("tcp://0.0.0.0:%d", conf.proxyConfig.GetControlPort().GetPort())}, {Name: "LINKERD2_PROXY_METRICS_LISTENER", Value: fmt.Sprintf("tcp://0.0.0.0:%d", conf.proxyConfig.GetMetricsPort().GetPort())}, {Name: "LINKERD2_PROXY_OUTBOUND_LISTENER", Value: fmt.Sprintf("tcp://127.0.0.1:%d", conf.proxyConfig.GetOutboundPort().GetPort())}, {Name: "LINKERD2_PROXY_INBOUND_LISTENER", Value: fmt.Sprintf("tcp://0.0.0.0:%d", conf.proxyConfig.GetInboundPort().GetPort())}, {Name: "LINKERD2_PROXY_DESTINATION_PROFILE_SUFFIXES", Value: profileSuffixes}, { Name: podNamespaceEnvVarName, ValueFrom: &v1.EnvVarSource{FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.namespace"}}, }, {Name: "LINKERD2_PROXY_INBOUND_ACCEPT_KEEPALIVE", Value: fmt.Sprintf("%dms", defaultKeepaliveMs)}, {Name: "LINKERD2_PROXY_OUTBOUND_CONNECT_KEEPALIVE", Value: fmt.Sprintf("%dms", defaultKeepaliveMs)}, {Name: "LINKERD2_PROXY_ID", Value: identity.ToDNSName()}, }, LivenessProbe: &proxyProbe, ReadinessProbe: &proxyProbe, } // Special case if the caller specifies that // LINKERD2_PROXY_OUTBOUND_ROUTER_CAPACITY be set on the pod. // We key off of any container image in the pod. Ideally we would instead key // off of something at the top-level of the PodSpec, but there is nothing // easily identifiable at that level. // Currently this will bet set on any proxy that gets injected into a Prometheus pod, // not just the one in Linkerd's Control Plane. for _, container := range conf.podSpec.Containers { if capacity, ok := conf.proxyOutboundCapacity[container.Image]; ok { sidecar.Env = append(sidecar.Env, v1.EnvVar{ Name: "LINKERD2_PROXY_OUTBOUND_ROUTER_CAPACITY", Value: fmt.Sprintf("%d", capacity), }, ) break } } if conf.globalConfig.GetIdentityContext() != nil { yes := true configMapVolume := &v1.Volume{ Name: k8s.TLSTrustAnchorVolumeName, VolumeSource: v1.VolumeSource{ ConfigMap: &v1.ConfigMapVolumeSource{ LocalObjectReference: v1.LocalObjectReference{Name: k8s.TLSTrustAnchorConfigMapName}, Optional: &yes, }, }, } secretVolume := &v1.Volume{ Name: k8s.TLSSecretsVolumeName, VolumeSource: v1.VolumeSource{ Secret: &v1.SecretVolumeSource{ SecretName: identity.ToSecretName(), Optional: &yes, }, }, } base := "/var/linkerd-io" configMapBase := base + "/trust-anchors" secretBase := base + "/identity" tlsEnvVars := []v1.EnvVar{ {Name: "LINKERD2_PROXY_TLS_TRUST_ANCHORS", Value: configMapBase + "/" + k8s.TLSTrustAnchorFileName}, {Name: "LINKERD2_PROXY_TLS_CERT", Value: secretBase + "/" + k8s.TLSCertFileName}, {Name: "LINKERD2_PROXY_TLS_PRIVATE_KEY", Value: secretBase + "/" + k8s.TLSPrivateKeyFileName}, { Name: "LINKERD2_PROXY_TLS_POD_IDENTITY", Value: identity.ToDNSName(), }, {Name: "LINKERD2_PROXY_CONTROLLER_NAMESPACE", Value: conf.globalConfig.GetLinkerdNamespace()}, {Name: "LINKERD2_PROXY_TLS_CONTROLLER_IDENTITY", Value: identity.ToControllerIdentity().ToDNSName()}, } sidecar.Env = append(sidecar.Env, tlsEnvVars...) sidecar.VolumeMounts = []v1.VolumeMount{ {Name: configMapVolume.Name, MountPath: configMapBase, ReadOnly: true}, {Name: secretVolume.Name, MountPath: secretBase, ReadOnly: true}, } if len(conf.podSpec.Volumes) == 0 { patch.addVolumeRoot() } patch.addVolume(configMapVolume) patch.addVolume(secretVolume) } patch.addContainer(&sidecar) if !conf.globalConfig.GetCniEnabled() { nonRoot := false runAsUser := int64(0) initContainer := &v1.Container{ Name: k8s.InitContainerName, Image: conf.taggedProxyInitImage(), ImagePullPolicy: v1.PullPolicy(conf.proxyConfig.GetProxyInitImage().GetPullPolicy()), TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, Args: initArgs, SecurityContext: &v1.SecurityContext{ Capabilities: &v1.Capabilities{ Add: []v1.Capability{v1.Capability("NET_ADMIN")}, }, Privileged: &f, RunAsNonRoot: &nonRoot, RunAsUser: &runAsUser, }, } if len(conf.podSpec.InitContainers) == 0 { patch.addInitContainerRoot() } patch.addInitContainer(initContainer) } } // Given a ObjectMeta, update ObjectMeta in place with the new labels and // annotations. func (conf *ResourceConfig) injectObjectMeta(patch *Patch) { if len(conf.podMeta.Annotations) == 0 { patch.addPodAnnotationsRoot() } patch.addPodAnnotation(k8s.ProxyVersionAnnotation, conf.globalConfig.GetVersion()) if conf.globalConfig.GetIdentityContext() != nil { patch.addPodAnnotation(k8s.IdentityModeAnnotation, k8s.IdentityModeOptional) } else { patch.addPodAnnotation(k8s.IdentityModeAnnotation, k8s.IdentityModeDisabled) } for k, v := range conf.podLabels { patch.addPodLabel(k, v) } } // AddRootLabels adds all the pod labels into the root workload (e.g. Deployment) func (conf *ResourceConfig) AddRootLabels(patch *Patch) { for k, v := range conf.podLabels { patch.addRootLabel(k, v) } } func (conf *ResourceConfig) taggedProxyImage() string { return fmt.Sprintf("%s:%s", conf.proxyConfig.GetProxyImage().GetImageName(), conf.globalConfig.GetVersion()) } func (conf *ResourceConfig) taggedProxyInitImage() string { return fmt.Sprintf("%s:%s", conf.proxyConfig.GetProxyInitImage().GetImageName(), conf.globalConfig.GetVersion()) } // ShouldInjectCLI is used by CLI inject to determine whether or not a given // workload should be injected. It shouldn't if: // - it contains any known sidecars; or // - is on a HostNetwork; or // - the pod is annotated with "linkerd.io/inject: disabled". func ShouldInjectCLI(_ *ResourceConfig, r Report) bool { return r.Injectable() } // ShouldInjectWebhook determines whether or not the given workload should be // injected. It shouldn't if: // - it contains any known sidecars; or // - is on a HostNetwork; or // - the pod is annotated with "linkerd.io/inject: disabled". // Additionally, a workload should be injected if: // - the workload's namespace has the linkerd.io/inject annotation set to // "enabled", and the workload's pod spec does not have the // linkerd.io/inject annotation set to "disabled"; or // - the workload's pod spec has the linkerd.io/inject annotation set to "enabled" func ShouldInjectWebhook(conf *ResourceConfig, r Report) bool { if !r.Injectable() { return false } podAnnotation := conf.podMeta.Annotations[k8s.ProxyInjectAnnotation] nsAnnotation := conf.nsAnnotations[k8s.ProxyInjectAnnotation] if nsAnnotation == k8s.ProxyInjectEnabled && podAnnotation != k8s.ProxyInjectDisabled { return true } return podAnnotation == k8s.ProxyInjectEnabled }