mirror of https://github.com/linkerd/linkerd2.git
Health check: check if proxies trust anchors match configuration (#3524)
* Health check: check if proxies trust anchors match configuration If Linkerd is reinstalled or if the trust anchors are modified while proxies are running on the cluster, they will contain an outdated `LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS` certificate. This changeset adds support for `linkerd check`, so it checks if there is any proxy running on the cluster, and performing the check against the configuration trust anchor. If there's a failure (considered a warning), `linkerd check` will notify the user about what pods are the offenders (and in what namespace each one is), and also a hint to remediate the issue (restarting the pods). * Add integration tests for proxy certificate check Fixes #3344 Signed-off-by: Rafael Fernández López <ereslibre@ereslibre.es>
This commit is contained in:
parent
3de35ccc58
commit
ba14dc3fc7
|
@ -15,6 +15,7 @@ import (
|
|||
configPb "github.com/linkerd/linkerd2/controller/gen/config"
|
||||
pb "github.com/linkerd/linkerd2/controller/gen/public"
|
||||
"github.com/linkerd/linkerd2/pkg/config"
|
||||
"github.com/linkerd/linkerd2/pkg/identity"
|
||||
"github.com/linkerd/linkerd2/pkg/k8s"
|
||||
"github.com/linkerd/linkerd2/pkg/profiles"
|
||||
"github.com/linkerd/linkerd2/pkg/tls"
|
||||
|
@ -849,6 +850,14 @@ func (hc *HealthChecker) allCategories() []category {
|
|||
return nil
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "data plane proxies certificate match CA",
|
||||
hintAnchor: "l5d-data-plane-proxies-certificate-match-ca",
|
||||
warning: true,
|
||||
check: func(ctx context.Context) error {
|
||||
return hc.checkDataPlaneProxiesCertificate()
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -1205,6 +1214,46 @@ func (hc *HealthChecker) checkPodSecurityPolicies(shouldExist bool) error {
|
|||
return checkResources("PodSecurityPolicies", objects, []string{fmt.Sprintf("linkerd-%s-control-plane", hc.ControlPlaneNamespace)}, shouldExist)
|
||||
}
|
||||
|
||||
func (hc *HealthChecker) checkDataPlaneProxiesCertificate() error {
|
||||
podList, err := hc.kubeAPI.CoreV1().Pods(hc.DataPlaneNamespace).List(metav1.ListOptions{LabelSelector: k8s.ControllerNSLabel})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Return early if no proxies are deployed on the cluster yet (or on the targeted namespace)
|
||||
if len(podList.Items) == 0 {
|
||||
return nil
|
||||
}
|
||||
_, configPB, err := FetchLinkerdConfigMap(hc.kubeAPI, hc.ControlPlaneNamespace)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
trustAnchorsPem := configPB.GetGlobal().GetIdentityContext().GetTrustAnchorsPem()
|
||||
offendingPods := []string{}
|
||||
for _, pod := range podList.Items {
|
||||
for _, containerSpec := range pod.Spec.Containers {
|
||||
if containerSpec.Name != k8s.ProxyContainerName {
|
||||
continue
|
||||
}
|
||||
for _, envVar := range containerSpec.Env {
|
||||
if envVar.Name != identity.EnvTrustAnchors {
|
||||
continue
|
||||
}
|
||||
if envVar.Value != trustAnchorsPem {
|
||||
if hc.DataPlaneNamespace == "" {
|
||||
offendingPods = append(offendingPods, fmt.Sprintf("%s/%s", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name))
|
||||
} else {
|
||||
offendingPods = append(offendingPods, pod.ObjectMeta.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(offendingPods) == 0 {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("The following pods have old proxy certificate information; please, restart them:\n\t%s", strings.Join(offendingPods, "\n\t"))
|
||||
}
|
||||
|
||||
func checkResources(resourceName string, objects []runtime.Object, expectedNames []string, shouldExist bool) error {
|
||||
if !shouldExist {
|
||||
if len(objects) > 0 {
|
||||
|
|
|
@ -2,6 +2,7 @@ package healthcheck
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
@ -14,6 +15,7 @@ import (
|
|||
healthcheckPb "github.com/linkerd/linkerd2/controller/gen/common/healthcheck"
|
||||
configPb "github.com/linkerd/linkerd2/controller/gen/config"
|
||||
pb "github.com/linkerd/linkerd2/controller/gen/public"
|
||||
"github.com/linkerd/linkerd2/pkg/identity"
|
||||
"github.com/linkerd/linkerd2/pkg/k8s"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
k8sErrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
|
@ -1672,6 +1674,106 @@ metadata:
|
|||
}
|
||||
}
|
||||
|
||||
func proxiesWithCertificates(certificates ...string) []string {
|
||||
result := []string{}
|
||||
for i, certificate := range certificates {
|
||||
result = append(result, fmt.Sprintf(`
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: pod-%d
|
||||
namespace: namespace-%d
|
||||
labels:
|
||||
%s: linkerd
|
||||
spec:
|
||||
containers:
|
||||
- name: %s
|
||||
env:
|
||||
- name: %s
|
||||
value: %s
|
||||
`, i, i, k8s.ControllerNSLabel, k8s.ProxyContainerName, identity.EnvTrustAnchors, certificate))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func TestCheckDataPlaneProxiesCertificate(t *testing.T) {
|
||||
const currentCertificate = "current-certificate"
|
||||
const oldCertificate = "old-certificate"
|
||||
|
||||
linkerdConfigMap := fmt.Sprintf(`
|
||||
kind: ConfigMap
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: %s
|
||||
data:
|
||||
global: |
|
||||
{"identityContext":{"trustAnchorsPem": "%s"}}
|
||||
`, k8s.ConfigConfigMapName, currentCertificate)
|
||||
|
||||
var testCases = []struct {
|
||||
checkDescription string
|
||||
resources []string
|
||||
namespace string
|
||||
expectedErr error
|
||||
}{
|
||||
{
|
||||
checkDescription: "all proxies match CA certificate (all namespaces)",
|
||||
resources: proxiesWithCertificates(currentCertificate, currentCertificate),
|
||||
namespace: "",
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
checkDescription: "some proxies match CA certificate (all namespaces)",
|
||||
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
|
||||
namespace: "",
|
||||
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-1/pod-1"),
|
||||
},
|
||||
{
|
||||
checkDescription: "no proxies match CA certificate (all namespaces)",
|
||||
resources: proxiesWithCertificates(oldCertificate, oldCertificate),
|
||||
namespace: "",
|
||||
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tnamespace-0/pod-0\n\tnamespace-1/pod-1"),
|
||||
},
|
||||
{
|
||||
checkDescription: "some proxies match CA certificate (match in target namespace)",
|
||||
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
|
||||
namespace: "namespace-0",
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
checkDescription: "some proxies match CA certificate (unmatch in target namespace)",
|
||||
resources: proxiesWithCertificates(currentCertificate, oldCertificate),
|
||||
namespace: "namespace-1",
|
||||
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-1"),
|
||||
},
|
||||
{
|
||||
checkDescription: "no proxies match CA certificate (specific namespace)",
|
||||
resources: proxiesWithCertificates(oldCertificate, oldCertificate),
|
||||
namespace: "namespace-0",
|
||||
expectedErr: errors.New("The following pods have old proxy certificate information; please, restart them:\n\tpod-0"),
|
||||
},
|
||||
}
|
||||
|
||||
for id, testCase := range testCases {
|
||||
testCase := testCase
|
||||
t.Run(fmt.Sprintf("%d", id), func(t *testing.T) {
|
||||
hc := NewHealthChecker([]CategoryID{}, &Options{})
|
||||
hc.DataPlaneNamespace = testCase.namespace
|
||||
|
||||
var err error
|
||||
hc.kubeAPI, err = k8s.NewFakeAPI(append(testCase.resources, linkerdConfigMap)...)
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %q", err)
|
||||
}
|
||||
|
||||
err = hc.checkDataPlaneProxiesCertificate()
|
||||
if !reflect.DeepEqual(err, testCase.expectedErr) {
|
||||
t.Fatalf("Error %q does not match expected error: %q", err, testCase.expectedErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateControlPlanePods(t *testing.T) {
|
||||
pod := func(name string, phase corev1.PodPhase, ready bool) corev1.Pod {
|
||||
return corev1.Pod{
|
||||
|
|
|
@ -20,6 +20,10 @@ const (
|
|||
// DefaultIssuanceLifetime is the default lifetime of certificates issued by
|
||||
// the identity service.
|
||||
DefaultIssuanceLifetime = 24 * time.Hour
|
||||
|
||||
// EnvTrustAnchors is the environment variable holding the trust anchors for
|
||||
// the proxy identity.
|
||||
EnvTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS"
|
||||
)
|
||||
|
||||
type (
|
||||
|
|
|
@ -13,13 +13,13 @@ import (
|
|||
"path/filepath"
|
||||
|
||||
"github.com/linkerd/linkerd2/pkg/flags"
|
||||
"github.com/linkerd/linkerd2/pkg/identity"
|
||||
"github.com/linkerd/linkerd2/pkg/tls"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED"
|
||||
envTrustAnchors = "LINKERD2_PROXY_IDENTITY_TRUST_ANCHORS"
|
||||
envDisabled = "LINKERD2_PROXY_IDENTITY_DISABLED"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -40,7 +40,7 @@ func main() {
|
|||
log.Fatalf("Invalid end-entity directory: %s", err)
|
||||
}
|
||||
|
||||
if _, err := loadVerifier(os.Getenv(envTrustAnchors)); err != nil {
|
||||
if _, err := loadVerifier(os.Getenv(identity.EnvTrustAnchors)); err != nil {
|
||||
log.Fatalf("Failed to load trust anchors: %s", err)
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ func main() {
|
|||
|
||||
func loadVerifier(pem string) (verify x509.VerifyOptions, err error) {
|
||||
if pem == "" {
|
||||
err = fmt.Errorf("'%s' must be set", envTrustAnchors)
|
||||
err = fmt.Errorf("'%s' must be set", identity.EnvTrustAnchors)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue