Health check: check if proxies trust anchors match configuration (#3524)

* Health check: check if proxies trust anchors match configuration

If Linkerd is reinstalled or if the trust anchors are modified while
proxies are running on the cluster, they will contain an outdated
`LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS` certificate.

This changeset adds support for `linkerd check`, so it checks if there
is any proxy running on the cluster, and performing the check against
the configuration trust anchor. If there's a failure (considered a
warning), `linkerd check` will notify the user about what pods are the
offenders (and in what namespace each one is), and also a hint to
remediate the issue (restarting the pods).

* Add integration tests for proxy certificate check

Fixes #3344

Signed-off-by: Rafael Fernández López <ereslibre@ereslibre.es>
This commit is contained in:
Rafael Fernández López 2019-10-15 20:33:09 +02:00 committed by Ivan Sim
parent 3de35ccc58
commit ba14dc3fc7
4 changed files with 159 additions and 4 deletions

View File

@ -15,6 +15,7 @@ import (
configPb "github.com/linkerd/linkerd2/controller/gen/config"
pb "github.com/linkerd/linkerd2/controller/gen/public"
"github.com/linkerd/linkerd2/pkg/config"
"github.com/linkerd/linkerd2/pkg/identity"
"github.com/linkerd/linkerd2/pkg/k8s"
"github.com/linkerd/linkerd2/pkg/profiles"
"github.com/linkerd/linkerd2/pkg/tls"
@ -849,6 +850,14 @@ func (hc *HealthChecker) allCategories() []category {
return nil
},
},
{
description: "data plane proxies certificate match CA",
hintAnchor: "l5d-data-plane-proxies-certificate-match-ca",
warning: true,
check: func(ctx context.Context) error {
return hc.checkDataPlaneProxiesCertificate()
},
},
},
},
}
@ -1205,6 +1214,46 @@ func (hc *HealthChecker) checkPodSecurityPolicies(shouldExist bool) error {
return checkResources("PodSecurityPolicies", objects, []string{fmt.Sprintf("linkerd-%s-control-plane", hc.ControlPlaneNamespace)}, shouldExist)
}
func (hc *HealthChecker) checkDataPlaneProxiesCertificate() error {
podList, err := hc.kubeAPI.CoreV1().Pods(hc.DataPlaneNamespace).List(metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
if err != nil {
return err
}
// Return early if no proxies are deployed on the cluster yet (or on the targeted namespace)
if len(podList.Items) == 0 {
return nil
}
_, configPB, err := FetchLinkerdConfigMap(hc.kubeAPI, hc.ControlPlaneNamespace)
if err != nil {
return err
}
trustAnchorsPem := configPB.GetGlobal().GetIdentityContext().GetTrustAnchorsPem()
offendingPods := []string{}
for _, pod := range podList.Items {
for _, containerSpec := range pod.Spec.Containers {
if containerSpec.Name != k8s.ProxyContainerName {
continue
}
for _, envVar := range containerSpec.Env {
if envVar.Name != identity.EnvTrustAnchors {
continue
}
if envVar.Value != trustAnchorsPem {
if hc.DataPlaneNamespace == "" {
offendingPods = append(offendingPods, fmt.Sprintf("%s/%s", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name))
} else {
offendingPods = append(offendingPods, pod.ObjectMeta.Name)
}
}
}
}
}
if len(offendingPods) == 0 {
return nil
}
return fmt.Errorf("The following pods have old proxy certificate information; please, restart them:\n\t%s", strings.Join(offendingPods, "\n\t"))
}
func checkResources(resourceName string, objects []runtime.Object, expectedNames []string, shouldExist bool) error {
if !shouldExist {
if len(objects) > 0 {

View File

@ -2,6 +2,7 @@ package healthcheck
import (
"context"
"errors"
"fmt"
"reflect"
"strings"
@ -14,6 +15,7 @@ import (
healthcheckPb "github.com/linkerd/linkerd2/controller/gen/common/healthcheck"
configPb "github.com/linkerd/linkerd2/controller/gen/config"
pb "github.com/linkerd/linkerd2/controller/gen/public"
"github.com/linkerd/linkerd2/pkg/identity"
"github.com/linkerd/linkerd2/pkg/k8s"
corev1 "k8s.io/api/core/v1"
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
@ -1672,6 +1674,106 @@ metadata:
}
}
func proxiesWithCertificates(certificates ...string) []string {
result := []string{}
for i, certificate := range certificates {
result = append(result, fmt.Sprintf(`
apiVersion: v1
kind: Pod
metadata:
name: pod-%d
namespace: namespace-%d
labels:
%s: linkerd
spec:
containers:
- name: %s
env:
- name: %s
value: %s
`, i, i, k8s.ControllerNSLabel, k8s.ProxyContainerName, identity.EnvTrustAnchors, certificate))
}
return result
}
func TestCheckDataPlaneProxiesCertificate(t *testing.T) {
const currentCertificate = "current-certificate"
const oldCertificate = "old-certificate"
linkerdConfigMap := fmt.Sprintf(`
kind: ConfigMap
apiVersion: v1
metadata:
name: %s
data:
global: |
{"identityContext":{"trustAnchorsPem": "%s"}}
`, k8s.ConfigConfigMapName, currentCertificate)
var testCases = []struct {
checkDescription string
resources []string
namespace string
expectedErr error
}{
{
checkDescription: "all proxies match CA certificate (all namespaces)",
resources: proxiesWithCertificates(currentCertificate, currentCertificate),
namespace: "",
expectedErr: nil,
},
{
checkDescription: "some proxies match CA certificate (all namespaces)",
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
namespace: "",
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-1/pod-1"),
},
{
checkDescription: "no proxies match CA certificate (all namespaces)",
resources: proxiesWithCertificates(oldCertificate, oldCertificate),
namespace: "",
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-0/pod-0\n\tnamespace-1/pod-1"),
},
{
checkDescription: "some proxies match CA certificate (match in target namespace)",
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
namespace: "namespace-0",
expectedErr: nil,
},
{
checkDescription: "some proxies match CA certificate (unmatch in target namespace)",
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
namespace: "namespace-1",
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-1"),
},
{
checkDescription: "no proxies match CA certificate (specific namespace)",
resources: proxiesWithCertificates(oldCertificate, oldCertificate),
namespace: "namespace-0",
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-0"),
},
}
for id, testCase := range testCases {
testCase := testCase
t.Run(fmt.Sprintf("%d", id), func(t *testing.T) {
hc := NewHealthChecker([]CategoryID{}, &Options{})
hc.DataPlaneNamespace = testCase.namespace
var err error
hc.kubeAPI, err = k8s.NewFakeAPI(append(testCase.resources, linkerdConfigMap)...)
if err != nil {
t.Fatalf("Unexpected error: %q", err)
}
err = hc.checkDataPlaneProxiesCertificate()
if !reflect.DeepEqual(err, testCase.expectedErr) {
t.Fatalf("Error %q does not match expected error: %q", err, testCase.expectedErr)
}
})
}
}
func TestValidateControlPlanePods(t *testing.T) {
pod := func(name string, phase corev1.PodPhase, ready bool) corev1.Pod {
return corev1.Pod{

View File

@ -20,6 +20,10 @@ const (
// DefaultIssuanceLifetime is the default lifetime of certificates issued by
// the identity service.
DefaultIssuanceLifetime = 24 * time.Hour
// EnvTrustAnchors is the environment variable holding the trust anchors for
// the proxy identity.
EnvTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS"
)
type (

View File

@ -13,13 +13,13 @@ import (
"path/filepath"
"github.com/linkerd/linkerd2/pkg/flags"
"github.com/linkerd/linkerd2/pkg/identity"
"github.com/linkerd/linkerd2/pkg/tls"
log "github.com/sirupsen/logrus"
)
const (
envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED"
envTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS"
envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED"
)
func main() {
@ -40,7 +40,7 @@ func main() {
log.Fatalf("Invalid end-entity directory: %s", err)
}
if _, err := loadVerifier(os.Getenv(envTrustAnchors)); err != nil {
if _, err := loadVerifier(os.Getenv(identity.EnvTrustAnchors)); err != nil {
log.Fatalf("Failed to load trust anchors: %s", err)
}
@ -56,7 +56,7 @@ func main() {
func loadVerifier(pem string) (verify x509.VerifyOptions, err error) {
if pem == "" {
err = fmt.Errorf("'%s' must be set", envTrustAnchors)
err = fmt.Errorf("'%s' must be set", identity.EnvTrustAnchors)
return
}