From 030fa28d5563a634d637438c39b005a9f8159407 Mon Sep 17 00:00:00 2001 From: katelyn martin Date: Wed, 2 Jul 2025 12:38:04 -0400 Subject: [PATCH] fix: remove ambiguous metrics registry keys (#3987) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### :framed_picture: background the linkerd2 proxy implements, registers, and exports Prometheus metrics using a variety of systems, for historical reasons. new metrics broadly rely upon the official [`prometheus-client`](https://github.com/prometheus/client_rust/) library, whose interfaces are reexported for internal consumption in the [`linkerd_metrics::prom`](https://github.com/linkerd/linkerd2-proxy/blob/main/linkerd/metrics/src/lib.rs#L30-L60) namespace. other metrics predate this library however, and rely on the metrics registry implemented in the workspace's [`linkerd-metrics`](https://github.com/linkerd/linkerd2-proxy/tree/main/linkerd/metrics) library. ### :bug: bug report * https://github.com/linkerd/linkerd2/issues/13821 linkerd/linkerd2#13821 reported a bug in which duplicate metrics could be observed and subsequently dropped by Prometheus when upgrading the control plane via helm with an existing workload running. ### :butterfly: reproduction example for posterity, i'll note the reproduction steps here. i used these steps to identify the `2025.3.2` edge release as the affected release. upgrading from `2025.2.3` to `2025.3.1` did not exhibit this behavior. see below for more discussion about the cause. generate certificates via using these two deployments, courtesy of @GTRekter:
**💾 click to expand: app deployment** ```yaml apiVersion: v1 kind: Namespace metadata: name: simple-app annotations: linkerd.io/inject: enabled --- apiVersion: v1 kind: Service metadata: name: simple-app-v1 namespace: simple-app spec: selector: app: simple-app-v1 version: v1 ports: - port: 80 targetPort: 5678 --- apiVersion: apps/v1 kind: Deployment metadata: name: simple-app-v1 namespace: simple-app spec: replicas: 1 selector: matchLabels: app: simple-app-v1 version: v1 template: metadata: labels: app: simple-app-v1 version: v1 spec: containers: - name: http-app image: hashicorp/http-echo:latest args: - "-text=Simple App v1" ports: - containerPort: 5678 ```
**🤠 click to expand: client deployment** ```yaml apiVersion: apps/v1 kind: Deployment metadata: name: traffic namespace: simple-app spec: replicas: 1 selector: matchLabels: app: traffic template: metadata: labels: app: traffic spec: containers: - name: traffic image: curlimages/curl:latest command: - /bin/sh - -c - | while true; do TIMESTAMP_SEND=$(date '+%Y-%m-%d %H:%M:%S') PAYLOAD="{\"timestamp\":\"$TIMESTAMP_SEND\",\"test_id\":\"sniff_me\",\"message\":\"hello-world\"}" echo "$TIMESTAMP_SEND - Sending payload: $PAYLOAD" RESPONSE=$(curl -s -X POST \ -H "Content-Type: application/json" \ -d "$PAYLOAD" \ http://simple-app-v1.simple-app.svc.cluster.local:80) TIMESTAMP_RESPONSE=$(date '+%Y-%m-%d %H:%M:%S') echo "$TIMESTAMP_RESPONSE - RESPONSE: $RESPONSE" sleep 1 done ```
and this prometheus configuration:
**🔥 click to expand: prometheus configuration** ```yaml global: scrape_interval: 10s scrape_configs: - job_name: 'pod' scrape_interval: 10s static_configs: - targets: ['localhost:4191'] labels: group: 'traffic' ```
we will perform the following steps: ```sh # install the edge release # specify the versions we'll migrate between. export FROM="2025.3.1" export TO="2025.3.2" # create a cluster, and add the helm charts. kind create cluster helm repo add linkerd-edge https://helm.linkerd.io/edge # install linkerd's crd's and control plane. helm install linkerd-crds linkerd-edge/linkerd-crds \ -n linkerd --create-namespace --version $FROM helm install linkerd-control-plane \ -n linkerd \ --set-file identityTrustAnchorsPEM=cert/ca.crt \ --set-file identity.issuer.tls.crtPEM=cert/issuer.crt \ --set-file identity.issuer.tls.keyPEM=cert/issuer.key \ --version $FROM \ linkerd-edge/linkerd-control-plane # install a simple app and a client to drive traffic. kubectl apply -f duplicate-metrics-simple-app.yml kubectl apply -f duplicate-metrics-traffic.yml # bind the traffic pod's metrics port to the host. kubectl port-forward -n simple-app deploy/traffic 4191 # start prometheus, begin scraping metrics prometheus --config.file=prometheus.yml ``` now, open a browser and query `irate(request_total[1m])`. next, upgrade the control plane: ``` helm upgrade linkerd-crds linkerd-edge/linkerd-crds \ -n linkerd --create-namespace --version $TO helm upgrade linkerd-control-plane \ -n linkerd \ --set-file identityTrustAnchorsPEM=cert/ca.crt \ --set-file identity.issuer.tls.crtPEM=cert/issuer.crt \ --set-file identity.issuer.tls.keyPEM=cert/issuer.key \ --version $TO \ linkerd-edge/linkerd-control-plane ``` prometheus will begin emitting warnings regarding 34 time series being dropped. in your browser, querying `irate(request_total[1m])` once more will show that the rate of requests has stopped, due to the new time series being dropped. next, restart the workloads... ``` kubectl rollout restart deployment -n simple-app simple-app-v1 traffic ``` prometheus warnings will go away, as reported in linkerd/linkerd2#13821. ### :mag: related changes * https://github.com/linkerd/linkerd2/pull/13699 * https://github.com/linkerd/linkerd2/pull/13715 in linkerd/linkerd2#13715 and linkerd/linkerd2##13699, we made some changes to the destination controller. from the "Cautions" section of the `2025.3.2` edge release: > Additionally, this release changes the default for `outbound-transport-mode` > to `transport-header`, which will result in all traffic between meshed > proxies flowing on port 4143, rather than using the original destination > port. linkerd/linkerd2#13699 (_included in `edge-25.3.1`_) introduced this outbound transport-protocol configuration surface, but maintained the default behavior, while linkerd/linkerd2#13715 (_included in `edge-25.3.2`_) altered the default behavior to route meshed traffic via port 4143. this is a visible change in behavior that can be observed when upgrading from a version that preceded this change to the mesh. this means that when upgrading across `edge-25.3.2`, such as from the `2025.2.1` to `2025.3.2` versions of the helm charts, or from the `2025.2.3` to the `2025.3.4` versions of the helm charts (_reported upstream in linkerd/linkerd2#13821_), the freshly upgraded destination controller pods will begin routing meshed traffic differently. i'll state explicitly, _that_ is not a bug! it is, however, an important clue to bear in mind: data plane pods that were started with the previous control plane version, and continue running after the control plane upgrade, will have seen both routing patterns. reporting a duplicate time series for affected metrics indicates that there is a hashing collision in our metrics system. ### :bug: the bug(s) we define a collection to structures to model labels for inbound and outbound endpoints' metrics: ```rust // linkerd/app/core/src/metrics.rs #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum EndpointLabels { Inbound(InboundEndpointLabels), Outbound(OutboundEndpointLabels), } #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct InboundEndpointLabels { pub tls: tls::ConditionalServerTls, pub authority: Option, pub target_addr: SocketAddr, pub policy: RouteAuthzLabels, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct OutboundEndpointLabels { pub server_id: tls::ConditionalClientTls, pub authority: Option, pub labels: Option, pub zone_locality: OutboundZoneLocality, pub target_addr: SocketAddr, } ``` \- bear particular attention to the derived `Hash` implementation. note the `tls::ConditionalClientTls` and `tls::ConditionalServerTls` types used in each of these labels. these are used by some of our types like `TlsConnect` to emit prometheus labels, using our legacy system's `FmtLabels` trait: ```rust // linkerd/app/core/src/transport/labels.rs impl FmtLabels for TlsConnect<'_> { fn fmt_labels(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.0 { Conditional::None(tls::NoClientTls::Disabled) => { write!(f, "tls=\"disabled\"") } Conditional::None(why) => { write!(f, "tls=\"no_identity\",no_tls_reason=\"{}\"", why) } Conditional::Some(tls::ClientTls { server_id, .. }) => { write!(f, "tls=\"true\",server_id=\"{}\"", server_id) } } } } ``` \- note the `ClientTls` case, which ignores fields in the client tls information: ```rust // linkerd/tls/src/client.rs /// A stack parameter that configures a `Client` to establish a TLS connection. #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct ClientTls { pub server_name: ServerName, pub server_id: ServerId, pub alpn: Option, } ``` \- this means that there is potential for an identical set of labels to be emitted given two `ClientTls` structures with distinct server names or ALPN protocols. for brevity, i'll elide the equivalent issue with `ServerTls`, and its corresponding `TlsAccept<'_>` label implementation, though it exhibits the same issue. ### :hammer: the fix this pull request introduces two new types: `ClientTlsLabels` and `ServerTlsLabels`. these continue to implement `Hash`, for use as a key in our metrics registry, and for use in formatting labels. `ClientTlsLabels` and `ServerTlsLabels` each resemble `ClientTls` and `ServerTls`, respectively, but do not contain any fields that are elided in label formatting, to prevent duplicate metrics from being emitted. relatedly, #3988 audits our existing `FmtLabels` implementations and makes use of exhaustive bindings, to prevent this category of problem in the short-term future. ideally, we might eventually consider replacing the metrics interfaces in `linkerd-metrics`, but that is strictly kept out-of-scope for the purposes of this particular fix. --- * fix: do not key transport metrics registry on `ClientTls` Signed-off-by: katelyn martin * fix: do not key transport metrics registry on `ServerTls` Signed-off-by: katelyn martin --------- Signed-off-by: katelyn martin --- linkerd/app/admin/src/stack.rs | 4 +-- linkerd/app/core/src/metrics.rs | 8 ++--- linkerd/app/core/src/transport/labels.rs | 35 ++++++++++--------- linkerd/app/inbound/src/detect.rs | 4 +-- linkerd/app/inbound/src/direct.rs | 14 +++++--- linkerd/app/inbound/src/http/router.rs | 2 +- linkerd/app/inbound/src/http/tests.rs | 4 +-- linkerd/app/inbound/src/metrics/authz.rs | 29 ++++++++------- linkerd/app/inbound/src/policy/http.rs | 20 +++++++---- linkerd/app/inbound/src/policy/tcp.rs | 7 ++-- linkerd/app/outbound/src/http/concrete.rs | 7 ++++ .../app/outbound/src/http/endpoint/tests.rs | 6 ++++ linkerd/app/outbound/src/opaq/concrete.rs | 7 ++++ linkerd/app/outbound/src/tls/concrete.rs | 7 ++++ linkerd/tls/src/client.rs | 14 ++++++++ linkerd/tls/src/lib.rs | 10 ++++-- linkerd/tls/src/server.rs | 21 +++++++++++ 17 files changed, 143 insertions(+), 56 deletions(-) diff --git a/linkerd/app/admin/src/stack.rs b/linkerd/app/admin/src/stack.rs index a198f9e62..381243f46 100644 --- a/linkerd/app/admin/src/stack.rs +++ b/linkerd/app/admin/src/stack.rs @@ -214,7 +214,7 @@ impl Config { impl Param for Tcp { fn param(&self) -> transport::labels::Key { transport::labels::Key::inbound_server( - self.tls.clone(), + self.tls.as_ref().map(|t| t.labels()), self.addr.into(), self.policy.server_label(), ) @@ -272,7 +272,7 @@ impl Param for Http { impl Param for Permitted { fn param(&self) -> metrics::EndpointLabels { metrics::InboundEndpointLabels { - tls: self.http.tcp.tls.clone(), + tls: self.http.tcp.tls.as_ref().map(|t| t.labels()), authority: None, target_addr: self.http.tcp.addr.into(), policy: self.permit.labels.clone(), diff --git a/linkerd/app/core/src/metrics.rs b/linkerd/app/core/src/metrics.rs index 3aac6ccc1..3d4822cc2 100644 --- a/linkerd/app/core/src/metrics.rs +++ b/linkerd/app/core/src/metrics.rs @@ -54,7 +54,7 @@ pub struct Proxy { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct ControlLabels { addr: Addr, - server_id: tls::ConditionalClientTls, + server_id: tls::ConditionalClientTlsLabels, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] @@ -65,7 +65,7 @@ pub enum EndpointLabels { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct InboundEndpointLabels { - pub tls: tls::ConditionalServerTls, + pub tls: tls::ConditionalServerTlsLabels, pub authority: Option, pub target_addr: SocketAddr, pub policy: RouteAuthzLabels, @@ -98,7 +98,7 @@ pub struct RouteAuthzLabels { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct OutboundEndpointLabels { - pub server_id: tls::ConditionalClientTls, + pub server_id: tls::ConditionalClientTlsLabels, pub authority: Option, pub labels: Option, pub zone_locality: OutboundZoneLocality, @@ -243,7 +243,7 @@ impl svc::Param for control::ControlAddr { fn param(&self) -> ControlLabels { ControlLabels { addr: self.addr.clone(), - server_id: self.identity.clone(), + server_id: self.identity.as_ref().map(tls::ClientTls::labels), } } } diff --git a/linkerd/app/core/src/transport/labels.rs b/linkerd/app/core/src/transport/labels.rs index 831a625df..c24042b5b 100644 --- a/linkerd/app/core/src/transport/labels.rs +++ b/linkerd/app/core/src/transport/labels.rs @@ -20,16 +20,16 @@ pub enum Key { #[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct ServerLabels { direction: Direction, - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, target_addr: SocketAddr, policy: Option, } #[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub struct TlsAccept<'t>(pub &'t tls::ConditionalServerTls); +pub struct TlsAccept<'t>(pub &'t tls::ConditionalServerTlsLabels); #[derive(Clone, Debug, Eq, PartialEq, Hash)] -pub(crate) struct TlsConnect<'t>(&'t tls::ConditionalClientTls); +pub(crate) struct TlsConnect<'t>(pub &'t tls::ConditionalClientTlsLabels); #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub struct TargetAddr(pub SocketAddr); @@ -38,7 +38,7 @@ pub struct TargetAddr(pub SocketAddr); impl Key { pub fn inbound_server( - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, target_addr: SocketAddr, server: PolicyServerLabel, ) -> Self { @@ -62,7 +62,7 @@ impl FmtLabels for Key { } Self::InboundClient => { - const NO_TLS: tls::client::ConditionalClientTls = + const NO_TLS: tls::client::ConditionalClientTlsLabels = Conditional::None(tls::NoClientTls::Loopback); Direction::In.fmt_labels(f)?; @@ -75,7 +75,7 @@ impl FmtLabels for Key { impl ServerLabels { fn inbound( - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, target_addr: SocketAddr, policy: PolicyServerLabel, ) -> Self { @@ -90,7 +90,7 @@ impl ServerLabels { fn outbound(target_addr: SocketAddr) -> Self { ServerLabels { direction: Direction::Out, - tls: tls::ConditionalServerTls::None(tls::NoServerTls::Loopback), + tls: tls::ConditionalServerTlsLabels::None(tls::NoServerTls::Loopback), target_addr, policy: None, } @@ -114,8 +114,8 @@ impl FmtLabels for ServerLabels { // === impl TlsAccept === -impl<'t> From<&'t tls::ConditionalServerTls> for TlsAccept<'t> { - fn from(c: &'t tls::ConditionalServerTls) -> Self { +impl<'t> From<&'t tls::ConditionalServerTlsLabels> for TlsAccept<'t> { + fn from(c: &'t tls::ConditionalServerTlsLabels) -> Self { TlsAccept(c) } } @@ -129,11 +129,11 @@ impl FmtLabels for TlsAccept<'_> { Conditional::None(why) => { write!(f, "tls=\"no_identity\",no_tls_reason=\"{}\"", why) } - Conditional::Some(tls::ServerTls::Established { client_id, .. }) => match client_id { + Conditional::Some(tls::ServerTlsLabels::Established { client_id }) => match client_id { Some(id) => write!(f, "tls=\"true\",client_id=\"{}\"", id), None => write!(f, "tls=\"true\",client_id=\"\""), }, - Conditional::Some(tls::ServerTls::Passthru { sni }) => { + Conditional::Some(tls::ServerTlsLabels::Passthru { sni }) => { write!(f, "tls=\"opaque\",sni=\"{}\"", sni) } } @@ -142,22 +142,24 @@ impl FmtLabels for TlsAccept<'_> { // === impl TlsConnect === -impl<'t> From<&'t tls::ConditionalClientTls> for TlsConnect<'t> { - fn from(s: &'t tls::ConditionalClientTls) -> Self { +impl<'t> From<&'t tls::ConditionalClientTlsLabels> for TlsConnect<'t> { + fn from(s: &'t tls::ConditionalClientTlsLabels) -> Self { TlsConnect(s) } } impl FmtLabels for TlsConnect<'_> { fn fmt_labels(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self.0 { + let Self(tls) = self; + + match tls { Conditional::None(tls::NoClientTls::Disabled) => { write!(f, "tls=\"disabled\"") } Conditional::None(why) => { write!(f, "tls=\"no_identity\",no_tls_reason=\"{}\"", why) } - Conditional::Some(tls::ClientTls { server_id, .. }) => { + Conditional::Some(tls::ClientTlsLabels { server_id }) => { write!(f, "tls=\"true\",server_id=\"{}\"", server_id) } } @@ -194,9 +196,8 @@ mod tests { use std::sync::Arc; let labels = ServerLabels::inbound( - tls::ConditionalServerTls::Some(tls::ServerTls::Established { + tls::ConditionalServerTlsLabels::Some(tls::ServerTlsLabels::Established { client_id: Some("foo.id.example.com".parse().unwrap()), - negotiated_protocol: None, }), ([192, 0, 2, 4], 40000).into(), PolicyServerLabel( diff --git a/linkerd/app/inbound/src/detect.rs b/linkerd/app/inbound/src/detect.rs index bbab744ce..08660e325 100644 --- a/linkerd/app/inbound/src/detect.rs +++ b/linkerd/app/inbound/src/detect.rs @@ -325,7 +325,7 @@ impl svc::Param> for Forward { impl svc::Param for Forward { fn param(&self) -> transport::labels::Key { transport::labels::Key::inbound_server( - self.tls.clone(), + self.tls.as_ref().map(|t| t.labels()), self.orig_dst_addr.into(), self.permit.labels.server.clone(), ) @@ -429,7 +429,7 @@ impl svc::Param for Http { impl svc::Param for Http { fn param(&self) -> transport::labels::Key { transport::labels::Key::inbound_server( - self.tls.status.clone(), + self.tls.status.as_ref().map(|t| t.labels()), self.tls.orig_dst_addr.into(), self.tls.policy.server_label(), ) diff --git a/linkerd/app/inbound/src/direct.rs b/linkerd/app/inbound/src/direct.rs index 5d0b7f22e..f9b657185 100644 --- a/linkerd/app/inbound/src/direct.rs +++ b/linkerd/app/inbound/src/direct.rs @@ -311,9 +311,8 @@ impl Param> for AuthorizedLocalTcp { impl Param for AuthorizedLocalTcp { fn param(&self) -> transport::labels::Key { transport::labels::Key::inbound_server( - tls::ConditionalServerTls::Some(tls::ServerTls::Established { + tls::ConditionalServerTlsLabels::Some(tls::ServerTlsLabels::Established { client_id: Some(self.client_id.clone()), - negotiated_protocol: None, }), self.addr.into(), self.permit.labels.server.clone(), @@ -344,9 +343,8 @@ impl Param> for LocalHttp { impl Param for LocalHttp { fn param(&self) -> transport::labels::Key { transport::labels::Key::inbound_server( - tls::ConditionalServerTls::Some(tls::ServerTls::Established { + tls::ConditionalServerTlsLabels::Some(tls::ServerTlsLabels::Established { client_id: Some(self.client.client_id.clone()), - negotiated_protocol: None, }), self.addr.into(), self.policy.server_label(), @@ -435,6 +433,14 @@ impl Param for GatewayTransportHeader { } } +impl Param for GatewayTransportHeader { + fn param(&self) -> tls::ConditionalServerTlsLabels { + tls::ConditionalServerTlsLabels::Some(tls::ServerTlsLabels::Established { + client_id: Some(self.client.client_id.clone()), + }) + } +} + impl Param for GatewayTransportHeader { fn param(&self) -> tls::ClientId { self.client.client_id.clone() diff --git a/linkerd/app/inbound/src/http/router.rs b/linkerd/app/inbound/src/http/router.rs index ec38be71f..1705ef6d4 100644 --- a/linkerd/app/inbound/src/http/router.rs +++ b/linkerd/app/inbound/src/http/router.rs @@ -395,7 +395,7 @@ fn endpoint_labels( ) -> impl svc::ExtractParam + Clone { move |t: &Logical| -> metrics::EndpointLabels { metrics::InboundEndpointLabels { - tls: t.tls.clone(), + tls: t.tls.as_ref().map(|t| t.labels()), authority: unsafe_authority_labels .then(|| t.logical.as_ref().map(|d| d.as_http_authority())) .flatten(), diff --git a/linkerd/app/inbound/src/http/tests.rs b/linkerd/app/inbound/src/http/tests.rs index aeda68b4a..8522a3097 100644 --- a/linkerd/app/inbound/src/http/tests.rs +++ b/linkerd/app/inbound/src/http/tests.rs @@ -664,7 +664,7 @@ async fn grpc_response_class() { let response_total = metrics .get_response_total( &metrics::EndpointLabels::Inbound(metrics::InboundEndpointLabels { - tls: Target::meshed_h2().1, + tls: Target::meshed_h2().1.map(|t| t.labels()), authority: None, target_addr: "127.0.0.1:80".parse().unwrap(), policy: metrics::RouteAuthzLabels { @@ -762,7 +762,7 @@ async fn test_unsafe_authority_labels( let response_total = metrics .get_response_total( &metrics::EndpointLabels::Inbound(metrics::InboundEndpointLabels { - tls: Target::meshed_http1().1, + tls: Target::meshed_http1().1.as_ref().map(|t| t.labels()), authority: expected_authority, target_addr: "127.0.0.1:80".parse().unwrap(), policy: metrics::RouteAuthzLabels { diff --git a/linkerd/app/inbound/src/metrics/authz.rs b/linkerd/app/inbound/src/metrics/authz.rs index 5f53c1e09..0e78d56f6 100644 --- a/linkerd/app/inbound/src/metrics/authz.rs +++ b/linkerd/app/inbound/src/metrics/authz.rs @@ -67,7 +67,7 @@ pub struct HTTPLocalRateLimitLabels { #[derive(Debug, Hash, PartialEq, Eq)] struct Key { target: TargetAddr, - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, labels: L, } @@ -80,7 +80,7 @@ type HttpLocalRateLimitKey = Key; // === impl HttpAuthzMetrics === impl HttpAuthzMetrics { - pub fn allow(&self, permit: &HttpRoutePermit, tls: tls::ConditionalServerTls) { + pub fn allow(&self, permit: &HttpRoutePermit, tls: tls::ConditionalServerTlsLabels) { self.0 .allow .lock() @@ -93,7 +93,7 @@ impl HttpAuthzMetrics { &self, labels: ServerLabel, dst: OrigDstAddr, - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, ) { self.0 .route_not_found @@ -103,7 +103,12 @@ impl HttpAuthzMetrics { .incr(); } - pub fn deny(&self, labels: RouteLabels, dst: OrigDstAddr, tls: tls::ConditionalServerTls) { + pub fn deny( + &self, + labels: RouteLabels, + dst: OrigDstAddr, + tls: tls::ConditionalServerTlsLabels, + ) { self.0 .deny .lock() @@ -116,7 +121,7 @@ impl HttpAuthzMetrics { &self, labels: HTTPLocalRateLimitLabels, dst: OrigDstAddr, - tls: tls::ConditionalServerTls, + tls: tls::ConditionalServerTlsLabels, ) { self.0 .http_local_rate_limit @@ -187,7 +192,7 @@ impl FmtMetrics for HttpAuthzMetrics { // === impl TcpAuthzMetrics === impl TcpAuthzMetrics { - pub fn allow(&self, permit: &ServerPermit, tls: tls::ConditionalServerTls) { + pub fn allow(&self, permit: &ServerPermit, tls: tls::ConditionalServerTlsLabels) { self.0 .allow .lock() @@ -196,7 +201,7 @@ impl TcpAuthzMetrics { .incr(); } - pub fn deny(&self, policy: &AllowPolicy, tls: tls::ConditionalServerTls) { + pub fn deny(&self, policy: &AllowPolicy, tls: tls::ConditionalServerTlsLabels) { self.0 .deny .lock() @@ -205,7 +210,7 @@ impl TcpAuthzMetrics { .incr(); } - pub fn terminate(&self, policy: &AllowPolicy, tls: tls::ConditionalServerTls) { + pub fn terminate(&self, policy: &AllowPolicy, tls: tls::ConditionalServerTlsLabels) { self.0 .terminate .lock() @@ -265,7 +270,7 @@ impl FmtLabels for HTTPLocalRateLimitLabels { // === impl Key === impl Key { - fn new(labels: L, dst: OrigDstAddr, tls: tls::ConditionalServerTls) -> Self { + fn new(labels: L, dst: OrigDstAddr, tls: tls::ConditionalServerTlsLabels) -> Self { Self { tls, target: TargetAddr(dst.into()), @@ -281,19 +286,19 @@ impl FmtLabels for Key { } impl ServerKey { - fn from_policy(policy: &AllowPolicy, tls: tls::ConditionalServerTls) -> Self { + fn from_policy(policy: &AllowPolicy, tls: tls::ConditionalServerTlsLabels) -> Self { Self::new(policy.server_label(), policy.dst_addr(), tls) } } impl RouteAuthzKey { - fn from_permit(permit: &HttpRoutePermit, tls: tls::ConditionalServerTls) -> Self { + fn from_permit(permit: &HttpRoutePermit, tls: tls::ConditionalServerTlsLabels) -> Self { Self::new(permit.labels.clone(), permit.dst, tls) } } impl ServerAuthzKey { - fn from_permit(permit: &ServerPermit, tls: tls::ConditionalServerTls) -> Self { + fn from_permit(permit: &ServerPermit, tls: tls::ConditionalServerTlsLabels) -> Self { Self::new(permit.labels.clone(), permit.dst, tls) } } diff --git a/linkerd/app/inbound/src/policy/http.rs b/linkerd/app/inbound/src/policy/http.rs index e3ac922f4..e37e268de 100644 --- a/linkerd/app/inbound/src/policy/http.rs +++ b/linkerd/app/inbound/src/policy/http.rs @@ -248,8 +248,11 @@ impl HttpPolicyService { ); } } - self.metrics - .deny(labels, self.connection.dst, self.connection.tls.clone()); + self.metrics.deny( + labels, + self.connection.dst, + self.connection.tls.as_ref().map(|t| t.labels()), + ); return Err(HttpRouteUnauthorized(()).into()); } }; @@ -279,14 +282,19 @@ impl HttpPolicyService { } }; - self.metrics.allow(&permit, self.connection.tls.clone()); + self.metrics + .allow(&permit, self.connection.tls.as_ref().map(|t| t.labels())); + Ok((permit, r#match, route)) } fn mk_route_not_found(&self) -> Error { let labels = self.policy.server_label(); - self.metrics - .route_not_found(labels, self.connection.dst, self.connection.tls.clone()); + self.metrics.route_not_found( + labels, + self.connection.dst, + self.connection.tls.as_ref().map(|t| t.labels()), + ); HttpRouteNotFound(()).into() } @@ -306,7 +314,7 @@ impl HttpPolicyService { self.metrics.ratelimit( self.policy.ratelimit_label(&err), self.connection.dst, - self.connection.tls.clone(), + self.connection.tls.as_ref().map(|t| t.labels()), ); err.into() }) diff --git a/linkerd/app/inbound/src/policy/tcp.rs b/linkerd/app/inbound/src/policy/tcp.rs index 2defa5288..915e6db27 100644 --- a/linkerd/app/inbound/src/policy/tcp.rs +++ b/linkerd/app/inbound/src/policy/tcp.rs @@ -77,7 +77,8 @@ where // This new services requires a ClientAddr, so it must necessarily be built for each // connection. So we can just increment the counter here since the service can only // be used at most once. - self.metrics.allow(&permit, tls.clone()); + self.metrics + .allow(&permit, tls.as_ref().map(|t| t.labels())); let inner = self.inner.new_service((permit, target)); TcpPolicy::Authorized(Authorized { @@ -97,7 +98,7 @@ where ?tls, %client, "Connection denied" ); - self.metrics.deny(&policy, tls); + self.metrics.deny(&policy, tls.as_ref().map(|t| t.labels())); TcpPolicy::Unauthorized(deny) } } @@ -167,7 +168,7 @@ where %client, "Connection terminated due to policy change", ); - metrics.terminate(&policy, tls); + metrics.terminate(&policy, tls.as_ref().map(|t| t.labels())); return Err(denied.into()); } } diff --git a/linkerd/app/outbound/src/http/concrete.rs b/linkerd/app/outbound/src/http/concrete.rs index 557e2fccc..cf213693b 100644 --- a/linkerd/app/outbound/src/http/concrete.rs +++ b/linkerd/app/outbound/src/http/concrete.rs @@ -279,6 +279,13 @@ impl svc::Param for Endpoint { } } +impl svc::Param for Endpoint { + fn param(&self) -> tls::ConditionalClientTlsLabels { + let tls: tls::ConditionalClientTls = self.param(); + tls.as_ref().map(tls::ClientTls::labels) + } +} + impl svc::Param for Endpoint where T: svc::Param, diff --git a/linkerd/app/outbound/src/http/endpoint/tests.rs b/linkerd/app/outbound/src/http/endpoint/tests.rs index a65db5002..4061143aa 100644 --- a/linkerd/app/outbound/src/http/endpoint/tests.rs +++ b/linkerd/app/outbound/src/http/endpoint/tests.rs @@ -289,6 +289,12 @@ impl svc::Param for Endpoint { } } +impl svc::Param for Endpoint { + fn param(&self) -> tls::ConditionalClientTlsLabels { + tls::ConditionalClientTlsLabels::None(tls::NoClientTls::Disabled) + } +} + impl svc::Param> for Endpoint { fn param(&self) -> Option { None diff --git a/linkerd/app/outbound/src/opaq/concrete.rs b/linkerd/app/outbound/src/opaq/concrete.rs index b0f43806a..5298c7200 100644 --- a/linkerd/app/outbound/src/opaq/concrete.rs +++ b/linkerd/app/outbound/src/opaq/concrete.rs @@ -419,3 +419,10 @@ impl svc::Param for Endpoint { )) } } + +impl svc::Param for Endpoint { + fn param(&self) -> tls::ConditionalClientTlsLabels { + let tls: tls::ConditionalClientTls = self.param(); + tls.as_ref().map(tls::ClientTls::labels) + } +} diff --git a/linkerd/app/outbound/src/tls/concrete.rs b/linkerd/app/outbound/src/tls/concrete.rs index aa31a9734..a122c35bc 100644 --- a/linkerd/app/outbound/src/tls/concrete.rs +++ b/linkerd/app/outbound/src/tls/concrete.rs @@ -385,3 +385,10 @@ impl svc::Param for Endpoint { )) } } + +impl svc::Param for Endpoint { + fn param(&self) -> tls::ConditionalClientTlsLabels { + let tls: tls::ConditionalClientTls = self.param(); + tls.as_ref().map(tls::ClientTls::labels) + } +} diff --git a/linkerd/tls/src/client.rs b/linkerd/tls/src/client.rs index 642db8a61..3f820c143 100644 --- a/linkerd/tls/src/client.rs +++ b/linkerd/tls/src/client.rs @@ -25,6 +25,12 @@ pub struct ClientTls { pub alpn: Option, } +/// Prometheus labels for a [`ClientTls`]. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub struct ClientTlsLabels { + pub server_id: ServerId, +} + /// A stack param that configures the available ALPN protocols. #[derive(Clone, Eq, PartialEq, Hash)] pub struct AlpnProtocols(pub Vec>); @@ -50,6 +56,8 @@ pub enum NoClientTls { /// known TLS identity. pub type ConditionalClientTls = Conditional; +pub type ConditionalClientTlsLabels = Conditional; + #[derive(Clone, Debug)] pub struct Client { identity: L, @@ -84,6 +92,12 @@ impl ClientTls { alpn: None, } } + + pub fn labels(&self) -> ClientTlsLabels { + let Self { server_id, .. } = self; + let server_id = server_id.clone(); + ClientTlsLabels { server_id } + } } // === impl Client === diff --git a/linkerd/tls/src/lib.rs b/linkerd/tls/src/lib.rs index 4d6b0f613..5bf8ea05c 100755 --- a/linkerd/tls/src/lib.rs +++ b/linkerd/tls/src/lib.rs @@ -5,10 +5,14 @@ pub mod client; pub mod server; pub use self::{ - client::{Client, ClientTls, ConditionalClientTls, ConnectMeta, NoClientTls, ServerId}, + client::{ + Client, ClientTls, ClientTlsLabels, ConditionalClientTls, ConditionalClientTlsLabels, + ConnectMeta, NoClientTls, ServerId, + }, server::{ - ClientId, ConditionalServerTls, NewDetectRequiredSni, NewDetectTls, NoServerTls, - NoSniFoundError, ServerTls, SniDetectionTimeoutError, + ClientId, ConditionalServerTls, ConditionalServerTlsLabels, NewDetectRequiredSni, + NewDetectTls, NoServerTls, NoSniFoundError, ServerTls, ServerTlsLabels, + SniDetectionTimeoutError, }, }; diff --git a/linkerd/tls/src/server.rs b/linkerd/tls/src/server.rs index dfe2b4fb0..d6ad50939 100644 --- a/linkerd/tls/src/server.rs +++ b/linkerd/tls/src/server.rs @@ -37,6 +37,13 @@ pub enum ServerTls { }, } +/// Prometheus labels for a [`ServerTls`]. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +pub enum ServerTlsLabels { + Established { client_id: Option }, + Passthru { sni: ServerName }, +} + #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub enum NoServerTls { /// Identity is administratively disabled. @@ -57,6 +64,8 @@ pub enum NoServerTls { /// Indicates whether TLS was established on an accepted connection. pub type ConditionalServerTls = Conditional; +pub type ConditionalServerTlsLabels = Conditional; + pub type DetectIo = EitherIo>; pub type Io = EitherIo>; @@ -303,6 +312,18 @@ impl ServerTls { _ => None, } } + + pub fn labels(&self) -> ServerTlsLabels { + match self { + Self::Established { + client_id, + negotiated_protocol: _, + } => ServerTlsLabels::Established { + client_id: client_id.clone(), + }, + Self::Passthru { sni } => ServerTlsLabels::Passthru { sni: sni.clone() }, + } + } } #[cfg(test)]