From ba14dc3fc76b3e2e0851515827d703379eef2133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Fern=C3=A1ndez=20L=C3=B3pez?= Date: Tue, 15 Oct 2019 20:33:09 +0200 Subject: [PATCH] Health check: check if proxies trust anchors match configuration (#3524) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Health check: check if proxies trust anchors match configuration If Linkerd is reinstalled or if the trust anchors are modified while proxies are running on the cluster, they will contain an outdated `LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS` certificate. This changeset adds support for `linkerd check`, so it checks if there is any proxy running on the cluster, and performing the check against the configuration trust anchor. If there's a failure (considered a warning), `linkerd check` will notify the user about what pods are the offenders (and in what namespace each one is), and also a hint to remediate the issue (restarting the pods). * Add integration tests for proxy certificate check Fixes #3344 Signed-off-by: Rafael Fernández López --- pkg/healthcheck/healthcheck.go | 49 +++++++++++++ pkg/healthcheck/healthcheck_test.go | 102 ++++++++++++++++++++++++++++ pkg/identity/service.go | 4 ++ proxy-identity/main.go | 8 +-- 4 files changed, 159 insertions(+), 4 deletions(-) diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index af9a2e9f2..3f047ccbe 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -15,6 +15,7 @@ import ( configPb "github.com/linkerd/linkerd2/controller/gen/config" pb "github.com/linkerd/linkerd2/controller/gen/public" "github.com/linkerd/linkerd2/pkg/config" + "github.com/linkerd/linkerd2/pkg/identity" "github.com/linkerd/linkerd2/pkg/k8s" "github.com/linkerd/linkerd2/pkg/profiles" "github.com/linkerd/linkerd2/pkg/tls" @@ -849,6 +850,14 @@ func (hc *HealthChecker) allCategories() []category { return nil }, }, + { + description: "data plane proxies certificate match CA", + hintAnchor: "l5d-data-plane-proxies-certificate-match-ca", + warning: true, + check: func(ctx context.Context) error { + return hc.checkDataPlaneProxiesCertificate() + }, + }, }, }, } @@ -1205,6 +1214,46 @@ func (hc *HealthChecker) checkPodSecurityPolicies(shouldExist bool) error { return checkResources("PodSecurityPolicies", objects, []string{fmt.Sprintf("linkerd-%s-control-plane", hc.ControlPlaneNamespace)}, shouldExist) } +func (hc *HealthChecker) checkDataPlaneProxiesCertificate() error { + podList, err := hc.kubeAPI.CoreV1().Pods(hc.DataPlaneNamespace).List(metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel}) + if err != nil { + return err + } + // Return early if no proxies are deployed on the cluster yet (or on the targeted namespace) + if len(podList.Items) == 0 { + return nil + } + _, configPB, err := FetchLinkerdConfigMap(hc.kubeAPI, hc.ControlPlaneNamespace) + if err != nil { + return err + } + trustAnchorsPem := configPB.GetGlobal().GetIdentityContext().GetTrustAnchorsPem() + offendingPods := []string{} + for _, pod := range podList.Items { + for _, containerSpec := range pod.Spec.Containers { + if containerSpec.Name != k8s.ProxyContainerName { + continue + } + for _, envVar := range containerSpec.Env { + if envVar.Name != identity.EnvTrustAnchors { + continue + } + if envVar.Value != trustAnchorsPem { + if hc.DataPlaneNamespace == "" { + offendingPods = append(offendingPods, fmt.Sprintf("%s/%s", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name)) + } else { + offendingPods = append(offendingPods, pod.ObjectMeta.Name) + } + } + } + } + } + if len(offendingPods) == 0 { + return nil + } + return fmt.Errorf("The following pods have old proxy certificate information; please, restart them:\n\t%s", strings.Join(offendingPods, "\n\t")) +} + func checkResources(resourceName string, objects []runtime.Object, expectedNames []string, shouldExist bool) error { if !shouldExist { if len(objects) > 0 { diff --git a/pkg/healthcheck/healthcheck_test.go b/pkg/healthcheck/healthcheck_test.go index 5623ee30d..7c5bcfd28 100644 --- a/pkg/healthcheck/healthcheck_test.go +++ b/pkg/healthcheck/healthcheck_test.go @@ -2,6 +2,7 @@ package healthcheck import ( "context" + "errors" "fmt" "reflect" "strings" @@ -14,6 +15,7 @@ import ( healthcheckPb "github.com/linkerd/linkerd2/controller/gen/common/healthcheck" configPb "github.com/linkerd/linkerd2/controller/gen/config" pb "github.com/linkerd/linkerd2/controller/gen/public" + "github.com/linkerd/linkerd2/pkg/identity" "github.com/linkerd/linkerd2/pkg/k8s" corev1 "k8s.io/api/core/v1" k8sErrors "k8s.io/apimachinery/pkg/api/errors" @@ -1672,6 +1674,106 @@ metadata: } } +func proxiesWithCertificates(certificates ...string) []string { + result := []string{} + for i, certificate := range certificates { + result = append(result, fmt.Sprintf(` +apiVersion: v1 +kind: Pod +metadata: + name: pod-%d + namespace: namespace-%d + labels: + %s: linkerd +spec: + containers: + - name: %s + env: + - name: %s + value: %s +`, i, i, k8s.ControllerNSLabel, k8s.ProxyContainerName, identity.EnvTrustAnchors, certificate)) + } + return result +} + +func TestCheckDataPlaneProxiesCertificate(t *testing.T) { + const currentCertificate = "current-certificate" + const oldCertificate = "old-certificate" + + linkerdConfigMap := fmt.Sprintf(` +kind: ConfigMap +apiVersion: v1 +metadata: + name: %s +data: + global: | + {"identityContext":{"trustAnchorsPem": "%s"}} +`, k8s.ConfigConfigMapName, currentCertificate) + + var testCases = []struct { + checkDescription string + resources []string + namespace string + expectedErr error + }{ + { + checkDescription: "all proxies match CA certificate (all namespaces)", + resources: proxiesWithCertificates(currentCertificate, currentCertificate), + namespace: "", + expectedErr: nil, + }, + { + checkDescription: "some proxies match CA certificate (all namespaces)", + resources: proxiesWithCertificates(currentCertificate, oldCertificate), + namespace: "", + expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-1/pod-1"), + }, + { + checkDescription: "no proxies match CA certificate (all namespaces)", + resources: proxiesWithCertificates(oldCertificate, oldCertificate), + namespace: "", + expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-0/pod-0\n\tnamespace-1/pod-1"), + }, + { + checkDescription: "some proxies match CA certificate (match in target namespace)", + resources: proxiesWithCertificates(currentCertificate, oldCertificate), + namespace: "namespace-0", + expectedErr: nil, + }, + { + checkDescription: "some proxies match CA certificate (unmatch in target namespace)", + resources: proxiesWithCertificates(currentCertificate, oldCertificate), + namespace: "namespace-1", + expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-1"), + }, + { + checkDescription: "no proxies match CA certificate (specific namespace)", + resources: proxiesWithCertificates(oldCertificate, oldCertificate), + namespace: "namespace-0", + expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-0"), + }, + } + + for id, testCase := range testCases { + testCase := testCase + t.Run(fmt.Sprintf("%d", id), func(t *testing.T) { + hc := NewHealthChecker([]CategoryID{}, &Options{}) + hc.DataPlaneNamespace = testCase.namespace + + var err error + hc.kubeAPI, err = k8s.NewFakeAPI(append(testCase.resources, linkerdConfigMap)...) + if err != nil { + t.Fatalf("Unexpected error: %q", err) + } + + err = hc.checkDataPlaneProxiesCertificate() + if !reflect.DeepEqual(err, testCase.expectedErr) { + t.Fatalf("Error %q does not match expected error: %q", err, testCase.expectedErr) + } + }) + } +} + func TestValidateControlPlanePods(t *testing.T) { pod := func(name string, phase corev1.PodPhase, ready bool) corev1.Pod { return corev1.Pod{ diff --git a/pkg/identity/service.go b/pkg/identity/service.go index cda8bf9c5..7649d5868 100644 --- a/pkg/identity/service.go +++ b/pkg/identity/service.go @@ -20,6 +20,10 @@ const ( // DefaultIssuanceLifetime is the default lifetime of certificates issued by // the identity service. DefaultIssuanceLifetime = 24 * time.Hour + + // EnvTrustAnchors is the environment variable holding the trust anchors for + // the proxy identity. + EnvTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS" ) type ( diff --git a/proxy-identity/main.go b/proxy-identity/main.go index b73d36774..1a13185c7 100644 --- a/proxy-identity/main.go +++ b/proxy-identity/main.go @@ -13,13 +13,13 @@ import ( "path/filepath" "github.com/linkerd/linkerd2/pkg/flags" + "github.com/linkerd/linkerd2/pkg/identity" "github.com/linkerd/linkerd2/pkg/tls" log "github.com/sirupsen/logrus" ) const ( - envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED" - envTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS" + envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED" ) func main() { @@ -40,7 +40,7 @@ func main() { log.Fatalf("Invalid end-entity directory: %s", err) } - if _, err := loadVerifier(os.Getenv(envTrustAnchors)); err != nil { + if _, err := loadVerifier(os.Getenv(identity.EnvTrustAnchors)); err != nil { log.Fatalf("Failed to load trust anchors: %s", err) } @@ -56,7 +56,7 @@ func main() { func loadVerifier(pem string) (verify x509.VerifyOptions, err error) { if pem == "" { - err = fmt.Errorf("'%s' must be set", envTrustAnchors) + err = fmt.Errorf("'%s' must be set", identity.EnvTrustAnchors) return }