Automator: update common-files@master in istio/ztunnel@master (#1595 )

Automator: update common-files@master in istio/ztunnel@master (#1589 )
Buffer inner h2 streams (#1580 )
2025-07-10 07:15:33 -04:00 · 2025-07-08 05:44:30 -04:00 · 2025-07-07 17:52:29 -04:00 · 2025-07-07 15:37:29 -04:00 · 2025-07-07 12:29:29 -04:00 · 2025-06-30 16:52:23 -04:00
150 changed files with 20617 additions and 8676 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -1,6 +1,6 @@
 {
  "name": "istio build-tools",
-  "image": "gcr.io/istio-testing/build-tools:master-f24be7b713480aab44d862ac839ead0b5324d593",
+  "image": "gcr.io/istio-testing/build-tools:master-8e6480403f5cf4c9a4cd9d65174d01850e632e1a",
  "privileged": true,
  "remoteEnv": {
    "USE_GKE_GCLOUD_AUTH_PLUGIN": "True",
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,15 @@
+
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "cargo"
+    # Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    rebase-strategy: "disabled"
+    groups:
+      all:
+        applies-to: version-updates
+          patterns:
+          - "*"
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@ -27,3 +27,8 @@ The three admin ports (Readiness, Admin, and Metrics) are intentionally split.
 * The admin port must be only on localhost, and it should be on the admin thread for isolation
 * The metrics port should be on the admin thread to avoid isolation.
  This *could* be on the readiness port, but historically we had found that the stats query can be very expensive and lead to tail latencies in the data plane.
+
+**NOTE** Networking policy must allow inbound and outbound traffic on port 15008 for all application pods, for the ambient mesh to function.
+The other ports are not relevant for pod-to-pod communication within the ambient mesh, and are only used for traffic redirection and categorization
+within the application pod's network namespace, or for metrics/readiness scraping of the ztunnel pod itself.
+See the Istio documentation [Ambient and Kubernetes NetworkPolicy](https://istio.io/latest/docs/ambient/usage/networkpolicy/) for more details.
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,14 +1,16 @@
 [package]
 name = "ztunnel"
 version = "0.0.0"
-edition = "2021"
-rust-version = "1.77"
+edition = "2024"
+rust-version = "1.85"

 [features]
-default = ["tls-ring"]
+default = ["tls-aws-lc"]
 jemalloc = ["dep:tikv-jemallocator", "dep:jemalloc_pprof"]
 tls-boring = ["dep:boring", "dep:boring-sys", "boring-rustls-provider/fips-only"]
 tls-ring = ["dep:ring", "rustls/ring", "tokio-rustls/ring", "hyper-rustls/ring", "dep:rcgen"]
+tls-aws-lc = ["dep:ring", "rustls/aws_lc_rs", "tokio-rustls/aws_lc_rs", "hyper-rustls/aws-lc-rs", "dep:rcgen", "rcgen/aws_lc_rs"]
+tls-openssl = ["dep:rustls-openssl", "dep:openssl" ]
 testing = ["dep:rcgen", "rcgen/x509-parser"] # Enables utilities supporting tests.

 [lib]
@ -24,6 +26,10 @@ bench = false
 name = "throughput"
 harness = false

+[[bench]]
+name = "basic"
+harness = false
+
 [dependencies]
 # Enabled with 'tls-boring'
 boring-rustls-provider = { git = "https://github.com/janrueth/boring-rustls-provider", optional = true } #
@ -33,78 +39,88 @@ boring-sys = { version = "4", optional = true }
 # Enabled with 'tls-ring'
 ring = { version = "0.17", optional = true }

+# Enabled with 'tls-openssl'
+rustls-openssl = { version = "0.2", optional = true }
+openssl = { version = "0.10", optional = true }
+
 anyhow = "1.0"
 async-stream = "0.3"
 async-trait = "0.1"
 base64 = "0.22"
 byteorder = "1.5"
-bytes = { version = "1.5", features = ["serde"] }
+bytes = { version = "1.10", features = ["serde"] }
 chrono = "0.4"
-drain = "0.1"
-duration-str = "0.7"
+duration-str = "0.17"
 futures = "0.3"
 futures-core = "0.3"
 futures-util = "0.3"
-jemalloc_pprof = { version = "0.1.0", optional = true }
-tikv-jemallocator = { version = "0.5.4", features = ["profiling", "unprefixed_malloc_on_supported_platforms"], optional = true }
-hashbrown = "0.14"
-hickory-client = "0.24"
-hickory-proto = "0.24"
-hickory-resolver = "0.24"
-hickory-server = { version = "0.24", features = [ "hickory-resolver" ] }
-http-02 = { package = "http", version = "0.2.9" }
-http-body-04 = { package = "http-body", version = "0.4" }
-http-body-1 = { package = "http-body", version = "1.0.0-rc.2" }
+jemalloc_pprof = { version = "0.6.0", optional = true }
+tikv-jemallocator = { version = "0.6.0", features = ["profiling", "unprefixed_malloc_on_supported_platforms"], optional = true }
+hashbrown = "0.15"
+hickory-client = "0.25"
+hickory-proto = "0.25"
+hickory-resolver = "0.25"
+hickory-server = { version = "0.25", features = [ "resolver" ]}
+http-body = { package = "http-body", version = "1" }
 http-body-util = "0.1"
-http-types = { version = "2.12", default-features = false }
-hyper = { version = "1.2", features = ["full"] }
+hyper = { version = "1.6", features = ["full"] }
 hyper-rustls = { version = "0.27.0", default-features = false, features = ["logging", "http1", "http2"] }
 hyper-util = { version = "0.1", features = ["full"] }
-ipnet = { version = "2.9", features = ["serde"] }
-itertools = "0.12"
+ipnet = { version = "2.11", features = ["serde"] }
+itertools = "0.14"
 keyed_priority_queue = "0.4"
 libc = "0.2"
 log = "0.4"
-nix = { version = "0.28", features = ["socket", "sched", "uio", "fs", "ioctl", "user"] }
-once_cell = "1.19"
-ppp = "2.2"
-pprof = { version = "0.13", features = ["protobuf", "protobuf-codec", "criterion"] }
-prometheus-client = { version = "0.22" }
+nix = { version = "0.29", features = ["socket", "sched", "uio", "fs", "ioctl", "user", "net", "mount", "resource" ] }
+once_cell = "1.21"
+num_cpus = "1.16"
+ppp = "2.3"
+prometheus-client = { version = "0.23" }
 prometheus-parse = "0.2"
-prost = "0.12"
-prost-types = "0.12"
-rand = "0.8"
+prost = "0.13"
+prost-types = "0.13"
+rand = { version = "0.9" , features = ["small_rng"]}
 rcgen = { version = "0.13", optional = true, features = ["pem"] }
-realm_io = "0.4"
 rustls = { version = "0.23", default-features = false }
-rustls-native-certs = "0.7.0"
-rustls-pemfile = "2.1"
+rustls-native-certs = "0.8"
+rustls-pemfile = "2.2"
 serde = { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0"
 serde_yaml = "0.9"
 socket2 = { version = "0.5", features = ["all"] }
 textnonce = { version = "1.0" }
-thiserror = "1.0"
-tls-listener = { version = "0.10" }
-tokio = { version = "1.0", features = ["full", "test-util"] }
+thiserror = "2.0"
+tls-listener = { version = "0.11" }
+tokio = { version = "1.44", features = ["full", "test-util"] }
 tokio-rustls = { version = "0.26", default-features = false }
 tokio-stream = { version = "0.1", features = ["net"] }
-tonic = { version = "0.11", default-features = false, features = ["prost", "codegen"] }
-tower = { version = "0.4", features = ["full"] }
-tower-hyper-http-body-compat = { git = "https://github.com/howardjohn/tower-hyper-http-body-compat", branch = "deps/hyper-1.0.0-snapshot1", features = ["server", "http2"] }
+tonic = { version = "0.13", default-features = false, features = ["prost", "codegen"] }
+tower = { version = "0.5", features = ["full"] }
 tracing = { version = "0.1"}
 tracing-subscriber = { version = "0.3", features = ["registry", "env-filter", "json"] }
-url = "2.2"
-x509-parser = { version = "0.16", default-features = false }
+url = "2.5"
+x509-parser = { version = "0.17", default-features = false }
 tracing-log = "0.2"
-backoff = "0.4.0"
+backoff = "0.4"
+pin-project-lite = "0.2"
+pingora-pool = "0.4"
+flurry = "0.5"
+h2 = "0.4"
+http = "1.3"
+split-iter = "0.1"
+arcstr = { version = "1.2", features = ["serde"] }
+tracing-core = "0.1"
+tracing-appender = "0.2"
+tokio-util = { version = "0.7", features = ["io-util"] }
+educe = "0.6"

 [target.'cfg(target_os = "linux")'.dependencies]
 netns-rs = "0.1"
+pprof = { version = "0.14", features = ["protobuf", "protobuf-codec", "criterion"] }

 [build-dependencies]
-tonic-build = { version = "0.11", default-features = false, features = ["prost"] }
-prost-build = "0.12"
+tonic-build = { version = "0.13", default-features = false, features = ["prost"] }
+prost-build = "0.13"
 anyhow = "1.0"
 rustc_version = "0.4"

@ -113,6 +129,9 @@ opt-level = 3
 codegen-units = 1
 lto = true

+[profile.bench]
+inherits = "quick-release"
+
 [profile.symbols-release]
 inherits = "release"
 debug = true
@ -133,6 +152,14 @@ diff = "0.1"
 local-ip-address = "0.6"
 matches = "0.1"
 test-case = "3.3"
-oid-registry = "0.7"
+oid-registry = "0.8"
 rcgen = { version = "0.13", features = ["pem", "x509-parser"] }
-#debug = true
+x509-parser = { version = "0.17", default-features = false, features = ["verify"] }
+ctor = "0.4"
+
+[lints.clippy]
+# This rule makes code more confusing
+assigning_clones = "allow"
+# This doesn't understand `strng` which we use everywhere
+borrow_interior_mutable_const = "allow"
+declare_interior_mutable_const = "allow"
--- a/Development.md
+++ b/Development.md
@ -23,8 +23,20 @@ There are a variety of config options that can be used to replace components wit
 * `FAKE_CA="true"`: this will use self-signed fake certificates, eliminating a dependency on a CA
 * `XDS_ADDRESS=""`: disables XDS client completely
 * `LOCAL_XDS_PATH=./examples/localhost.yaml`: read XDS config from a file.
+* `CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="sudo -E"`: have cargo run as sudo
+* `PROXY_MODE=dedicated`: Dedicated mode is the single-tenant proxy mode and is strongly recommended for local development, as it works for 95% of cases and doesn't require manually constructing Linux network namespaces to use.

-Together, `FAKE_CA="true" XDS_ADDRESS="" LOCAL_XDS_PATH=./examples/localhost.yaml cargo run --features testing` (with `--no-default-features` if you have FIPS disabled) can be used to run entirely locally, without a Kubernetes or Istiod dependency.
+The following command (with `--no-default-features` if you have FIPS disabled) can be used to run entirely locally, without a Kubernetes or Istiod dependency.
+
+```shell
+FAKE_CA="true" \
+XDS_ADDRESS="" \
+LOCAL_XDS_PATH=./examples/localhost.yaml \
+CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="sudo -E" \
+PROXY_MODE=dedicated \
+PROXY_WORKLOAD_INFO=default/local/default \
+cargo run --features testing
+```

 ### In-pod mode setup

@ -51,12 +63,16 @@ run fake server with:
 INPOD_UDS=/tmp/ztunnel cargo run --example inpodserver -- pod1
 ```

-run ztunnel with:
+(note: In the above command, you can override PROXY_WORKLOAD_INFO default value if you need to match to different values in your `localhost.yaml` file)
+
+run ztunnel (as root) with:

 ```shell
-RUST_LOG=debug INPOD_ENABLED=true INPOD_UDS=/tmp/ztunnel FAKE_CA="true" XDS_ADDRESS="" LOCAL_XDS_PATH=./examples/localhost.yaml cargo run
+RUST_LOG=debug PROXY_MODE=shared INPOD_UDS=/tmp/ztunnel FAKE_CA="true" XDS_ADDRESS="" LOCAL_XDS_PATH=./examples/localhost.yaml cargo run --features testing
 ```

+(note: to run ztunnel as root, consider using `export CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="sudo -E"` so cargo `sudo` the binary)
+
 see the ztunnel sockets:

 ```shell
@ -70,6 +86,8 @@ ip netns exec pod1 ./scripts/ztunnel-redirect-inpod.sh

 To get traffic to work you may need to adjust the IPs in localhost.yaml and start processes in the pod netns.

+You can also do `make build FEATURES="--features testing` and use `./out/rust/debug/ztunnel` instead of `cargo run ...`
+
 ### In-pod mode with istiod on kind setup

 Run ztunnel on from your terminal. With istiod and workloads running in KinD. This works on Linux only.
@ -148,7 +166,13 @@ spec:
 EOF
 ```

-Now, port forward istiod, copy over the token and run ztunnel (under sudo):
+Add localhost as a verified DNS name to istiod
+
+```shell
+kubectl set env -n istio-system deploy/istiod ISTIOD_CUSTOM_HOST=localhost
+```
+
+Now, port forward istiod, copy over the token and run ztunnel:

 ```shell
 kubectl port-forward -n istio-system svc/istiod 15012:15012 &
@ -158,7 +182,7 @@ xargs env <<EOF
 INPOD_UDS=/tmp/worker1-ztunnel/ztunnel.sock
 CLUSTER_ID=Kubernetes
 RUST_LOG=debug
-INPOD_ENABLED="true"
+PROXY_MODE="shared"
 ISTIO_META_DNS_CAPTURE="true"
 ISTIO_META_DNS_PROXY_ADDR="127.0.0.1:15053"
 SERVICE_ACCOUNT=ztunnel
@ -166,7 +190,7 @@ POD_NAMESPACE=istio-system
 POD_NAME=ztunnel-worker1
 CA_ROOT_CA=/tmp/istio-root.pem
 XDS_ROOT_CA=/tmp/istio-root.pem
-CARGO_TARGET_$(rustc -vV | sed -n 's|host: ||p' | tr '[:lower:]' '[:upper:]'| tr - _)_RUNNER="sudo -E"
+CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="sudo -E"
 cargo run proxy ztunnel
 EOF
 ```
--- a/2
+++ b/2
@ -19,7 +19,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-SHELL := /bin/bash
+SHELL := /usr/bin/env bash

 # allow optional per-repo overrides
 -include Makefile.overrides.mk
--- a/Makefile.core.mk
+++ b/Makefile.core.mk
@ -3,21 +3,32 @@ include common/Makefile.common.mk
 FEATURES ?=
 ifeq ($(TLS_MODE), boring)
 	FEATURES:=--no-default-features -F tls-boring
-endif
-ifeq ($(TEST_MODE), root)
-	export CARGO_TARGET_$(shell rustc -vV | sed -n 's|host: ||p' | tr [:lower:] [:upper:]| tr - _)_RUNNER=sudo -E
+else ifeq ($(TLS_MODE), aws-lc)
+	FEATURES:=--no-default-features -F tls-aws-lc
+else ifeq ($(TLS_MODE), openssl)
+	FEATURES:=--no-default-features -F tls-openssl
 endif

 test:
 	RUST_BACKTRACE=1 cargo test --benches --tests --bins $(FEATURES)

+coverage:
+	FEATURES=$(FEATURES) ./scripts/test-with-coverage.sh 
+
 build:
 	cargo build $(FEATURES)

+# Build the inpodserver example
+inpodserver:
+	cargo build --example inpodserver
+
 # Test that all important features build
 check-features:
 	cargo check --no-default-features -F tls-boring
-	(cd fuzz; cargo check)
+	cargo check --no-default-features -F tls-aws-lc
+	cargo check --no-default-features -F tls-openssl
+	cargo check -F jemalloc
+	(cd fuzz; RUSTFLAGS="--cfg fuzzing" cargo check)

 # target in common/Makefile.common.mk doesn't handle our third party vendored files; only check golang and rust codes
 lint-copyright:
@ -38,10 +49,10 @@ license-check:

 fix: fix-copyright-banner
 	cargo clippy --fix --allow-staged --allow-dirty $(FEATURES)
-	cargo fmt $(FEATURES)
+	cargo fmt

 format:
-	cargo fmt $(FEATURES)
+	cargo fmt

 release:
 	./scripts/release.sh
@ -57,4 +68,4 @@ clean:
 	cargo clean $(FEATURES)

 rust-version:
-	./common/scripts/run.sh /usr/bin/rustc -vV
+	./common/scripts/run.sh /usr/bin/rustc -vV
--- a/Makefile.overrides.mk
+++ b/Makefile.overrides.mk
@ -1,2 +1,9 @@
 # Use the build container by default
 BUILD_WITH_CONTAINER ?= 1
+# Namespaced tests need sys_admin due to docker being overly restrictive (https://github.com/moby/moby/issues/42441)
+# Ironically, this gives the container more privilege than is required without.
+DOCKER_RUN_OPTIONS += --privileged
+ifeq ($(OS), Linux)
+DOCKER_RUN_OPTIONS += -v /fake/path/does/not/exist:/var/run/netns
+endif
+DOCKER_RUN_OPTIONS += -v /dev/null:/run/xtables.lock
--- a/README.md
+++ b/README.md
@ -1,10 +1,8 @@
 # Ztunnel

-Ztunnel provides an experimental implementation of the ztunnel component of
+Ztunnel provides an implementation of the ztunnel component of
 [ambient mesh](https://istio.io/latest/blog/2022/introducing-ambient-mesh/).

-Note: `istio/ztunnel` is currently intended for experimental usage only.
-
 ## Feature Scope

 Ztunnel is intended to be a purpose built implementation of the node proxy in [ambient mesh](https://istio.io/latest/blog/2022/introducing-ambient-mesh/).
@ -36,10 +34,12 @@ Ztunnel's TLS is built on [rustls](https://github.com/rustls/rustls).

 Rustls has support for plugging in various crypto providers to meet various needs (compliance, performance, etc).

-| Name                                          | How To Enable                                  |
-|-----------------------------------------------|------------------------------------------------|
-| [ring](https://github.com/briansmith/ring/)   | Default (or `--features tls-ring`)             |
-| [boring](https://github.com/cloudflare/boring) | `--features tls-boring --no-default-features` |
+| Name                                               | How To Enable                                  |
+|----------------------------------------------------|------------------------------------------------|
+| [aws-lc](https://github.com/aws/aws-lc-rs)         | Default (or `--features tls-aws-lc`)           |
+| [ring](https://github.com/briansmith/ring/)        | `--features tls-ring --no-default-features`    |
+| [boring](https://github.com/cloudflare/boring)     | `--features tls-boring --no-default-features`  |
+| [openssl](https://github.com/tofay/rustls-openssl) | `--features tls-openssl --no-default-features` |

 In all options, only TLS 1.3 with cipher suites `TLS13_AES_256_GCM_SHA384` and `TLS13_AES_128_GCM_SHA256` is used.

@ -67,15 +67,15 @@ To use these vendored libraries and build ztunnel for either of these OS/arch co
 ##### For linux/x86_64

 ``` toml
-BORING_BSSL_PATH = { value = "vendor/boringssl-fips/linux_x86_64", force = true, relative = true }
-BORING_BSSL_INCLUDE_PATH = { value = "vendor/boringssl-fips/include/", force = true, relative = true }
+BORING_BSSL_FIPS_PATH = { value = "vendor/boringssl-fips/linux_x86_64", force = true, relative = true }
+BORING_BSSL_FIPS_INCLUDE_PATH = { value = "vendor/boringssl-fips/include/", force = true, relative = true }
 ```

 ##### For linux/arm64

 ``` toml
-BORING_BSSL_PATH = { value = "vendor/boringssl-fips/linux_arm64", force = true, relative = true }
-BORING_BSSL_INCLUDE_PATH = { value = "vendor/boringssl-fips/include/", force = true, relative = true }
+BORING_BSSL_FIPS_PATH = { value = "vendor/boringssl-fips/linux_arm64", force = true, relative = true }
+BORING_BSSL_FIPS_INCLUDE_PATH = { value = "vendor/boringssl-fips/include/", force = true, relative = true }
 ```

 Once that's done, you should be able to build:
@ -88,6 +88,8 @@ This manual twiddling of environment vars is not ideal but given that the altern

 Note that the Dockerfiles used to build these vendored `boringssl` builds may be found in the respective vendor directories, and can serve as a reference for the build environment needed to generate FIPS-compliant ztunnel builds.

+A release build with this option can be built with `TLS_MODE=boring ./scripts/release.sh`.
+
 ## Development

 Please refer to [this](./Development.md).
@ -123,7 +125,6 @@ accessible by making an HTTP request to either "/stats/prometheus" or "/metrics"
 - DNS Upstream Failures (`istio_dns_upstream_failures_total`)
 - DNS Upstream Request Duration (`istio_dns_upstream_request_duration_seconds`)
 - On Demand DNS Requests (`istio_on_demand_dns_total`)
- On Demand DNS Cache Misses (`istio_on_demand_dns_cache_misses_total`)

 #### In-Pod metrics

@ -142,6 +143,7 @@ Ztunnel exposes a variety of logs, both operational and "access logs".

 Logs are controlled by the `RUST_LOG` variable.
 This can set all levels, or a specific target. For instance, `RUST_LOG=error,ztunnel::proxy=warn`.
+Logs can be emitted in JSON format with `LOG_FORMAT=json`.
 Access logs are under the `access` target.

 An example access log looks like (with newlines for readability; the real logs are on one line):
@ -149,7 +151,7 @@ An example access log looks like (with newlines for readability; the real logs a
 ```text
 2024-04-11T15:38:42.182974Z  INFO access: connection complete
    src.addr=10.244.0.24:46238 src.workload="shell-6d8bcd654d-t88gp" src.namespace="default" src.identity="spiffe://cluster.local/ns/default/sa/default"
-    dst.addr=10.244.0.42:15008 dst.hbone_addr="10.96.108.116:80" dst.service="echo.default.svc.cluster.local"
+    dst.addr=10.244.0.42:15008 dst.hbone_addr=10.96.108.116:80 dst.service="echo.default.svc.cluster.local"
    direction="outbound" bytes_sent=67 bytes_recv=490 duration="13ms"
 ```

--- a/benches/README.md
+++ b/benches/README.md
@ -13,3 +13,108 @@ $ cargo bench -- --save-baseline <name> # save baseline
 $ # ...change something...
 $ cargo bench -- --baseline <name> # compare against it
 ```
+
+## Performance
+
+Ztunnel performance largely falls into throughput and latency.
+While these are sometimes at odds with each other, as Ztunnel is a generic proxy, we aim to make it perform well on both metrics.
+
+### Request flows
+
+The primary responsibility of the proxy is copying bits between peers.
+Currently, this is always either `TCP<-->TCP` or `TCP<-->HBONE`.
+
+#### `TCP` to `TCP`
+
+This is the simplest case, and common amongst many proxies.
+[`copy.rs`](../src/copy.rs) does the bulk of the work, essentially just bi-directionally copying bytes between the two sockets.
+
+Typical bi-di copies are using a fixed buffer.
+To adapt to various workloads, we use dynamically sized buffers, that can grow from 1kb -> 16kb -> 256kb when enough traffic is received.
+This allows high throughput workloads to perform well, without excessive memory costs for low-bandwidth services.
+
+#### `TCP` to `HBONE`
+
+This case ends up being much more complex, as we flow through HTTP2 and TLS.
+The full flow looks as such (pseudocode):
+
+```raw
+copy_bidi():
+    loop {
+        data = tcp_in.read(up to 256k) # based on dynamic buffer size
+        h2.write(data)
+    }
+h2::write(data):
+    Buffer data as a DATA frame, up to a max of `max_send_buffer_size`. We configure this to 256k.
+    Asyncronously, the connection driver will pick up this data and call `rustls.write_vectored([256bytes, rest of data])`.
+rustls::write(data):
+    data=encrypt(data)
+    # TLS records are at most 16k
+    # In practice I have observed at most 4 chunks; unclear where this is configured.
+    tcp_out.write_vectored([chunks of 16k])
+```
+
+From an `iperf` load, this ends up looking something like this in `strace`:
+
+```raw
+% time     seconds  usecs/call     calls    errors syscall
+------ ----------- ----------- --------- --------- ----------------
+ 55.21    0.841290           5    140711           writev
+ 44.78    0.682359          17     38481           recvfrom
+ ```
+
+This will be from `writev([16kb * 4])` calls and `recvfrom(256kb)`.
+
+#### `HBONE` to `TCP`
+
+This flow is substantially different from the inverse direction.
+The receive flow is driven by `h2`. Under the hood this uses a [`LengthDelimitedCodec`](https://docs.rs/tokio-util/latest/tokio_util/codec/length_delimited/struct.LengthDelimitedCodec.html).
+`h2` will attempt to decode 1 frame at a time, using an internal buffer.
+This buffer starts at [`8kb`](https://github.com/tokio-rs/tokio/blob/ed4ddf443d93c3e14ae23699a5a2f81902ad1e66/tokio-util/src/codec/framed_impl.rs#L26) but will grow to meet the size of frames.
+We allow up to a max of `1mb` frame sizes (`config.frame_size`).
+
+Ultimately, this will call `rustls.read(buf)`.
+This goes through a few indirections, but ultimately ends up in `rustls.deframer_buffer`.
+This is what calls `read()` on the underlying IO, in our case the TCP connection.
+This buffer is configured to do [`4kb`](https://github.com/rustls/rustls/blob/8a8023addb9ae311f66b16e272e85654c9588eeb/rustls/src/msgs/deframer.rs#L724) reads generally.
+
+Upon reading the frame from the wire, these get [buffered up by `h2`](https://github.com/hyperium/h2/blob/4617f49b266d560a773372a90be283ba8b2400a9/src/proto/streams/stream.rs#L100).
+We read these in [`recv_stream.poll_data`](../src/proxy/h2.rs), trigger by the `copy_bidirectional`.
+Ultimately, this will write out 1 DATA frame worth of data to the upstream TCP connection
+
+From an `iperf` load, this ends up looking something like this in `strace`:
+
+```raw
+% time     seconds  usecs/call     calls    errors syscall
+------ ----------- ----------- --------- --------- ----------------
+ 61.08    1.253541          50     24703           sendto
+ 38.19    0.783733           2    360707         8 recvfrom
+```
+
+This will be from `sendto(256kb)` calls, with many `recvfrom()` calls ranging from 4k to 16k.
+
+#### Comparison to Envoy
+
+Under an `iperf` load, Envoy client:
+
+```raw
+% time     seconds  usecs/call     calls    errors syscall
+------ ----------- ----------- --------- --------- ----------------
+ 68.24    1.363149           3    440440         1 sendto
+ 31.72    0.633584          11     55114        31 readv
+```
+
+This is from many `sendto(16k)` calls, and `readv([16k]*8)`.
+
+Envoy Server:
+
+```raw
+% time     seconds  usecs/call     calls    errors syscall
+------ ----------- ----------- --------- --------- ----------------
+ 65.24    1.199264           1    757275         8 recvfrom
+ 34.73    0.638315          26     23670           writev
+```
+
+This is from many calls of `recvfrom(5); recvfrom(16k)`, and `writev([16k]*16)`.
+
+(All strace commands are looking at `-e trace=write,writev,read,recvfrom,sendto,readv`).
--- a/benches/basic.rs
+++ b/benches/basic.rs
@ -0,0 +1,202 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::net::SocketAddr;
+use std::sync::{Arc, RwLock};
+use std::time::Duration;
+
+use bytes::Bytes;
+use criterion::{Criterion, Throughput, criterion_group, criterion_main};
+use hickory_resolver::config::{ResolverConfig, ResolverOpts};
+use pprof::criterion::{Output, PProfProfiler};
+use prometheus_client::registry::Registry;
+use tokio::runtime::Runtime;
+use ztunnel::state::workload::Workload;
+use ztunnel::state::{DemandProxyState, ProxyState, ServiceResolutionMode};
+use ztunnel::strng;
+use ztunnel::xds::ProxyStateUpdateMutator;
+use ztunnel::xds::istio::workload::LoadBalancing;
+use ztunnel::xds::istio::workload::Port;
+use ztunnel::xds::istio::workload::Service as XdsService;
+use ztunnel::xds::istio::workload::Workload as XdsWorkload;
+use ztunnel::xds::istio::workload::load_balancing;
+use ztunnel::xds::istio::workload::{NetworkAddress as XdsNetworkAddress, PortList};
+
+pub fn xds(c: &mut Criterion) {
+    use ztunnel::xds::istio::workload::Port;
+    use ztunnel::xds::istio::workload::Service as XdsService;
+    use ztunnel::xds::istio::workload::Workload as XdsWorkload;
+    use ztunnel::xds::istio::workload::{NetworkAddress as XdsNetworkAddress, PortList};
+    let mut c = c.benchmark_group("xds");
+    c.measurement_time(Duration::from_secs(5));
+    c.bench_function("insert-remove", |b| {
+        b.iter(|| {
+            let svc = XdsService {
+                hostname: "example.com".to_string(),
+                addresses: vec![XdsNetworkAddress {
+                    network: "".to_string(),
+                    address: vec![127, 0, 0, 3],
+                }],
+                ..Default::default()
+            };
+            let mut state = ProxyState::new(None);
+            let updater = ProxyStateUpdateMutator::new_no_fetch();
+            updater.insert_service(&mut state, svc).unwrap();
+            const WORKLOAD_COUNT: usize = 1000;
+            for i in 0..WORKLOAD_COUNT {
+                updater
+                    .insert_workload(
+                        &mut state,
+                        XdsWorkload {
+                            uid: format!("cluster1//v1/Pod/default/{i}"),
+                            addresses: vec![Bytes::copy_from_slice(&[
+                                127,
+                                0,
+                                (i / 255) as u8,
+                                (i % 255) as u8,
+                            ])],
+                            services: std::collections::HashMap::from([(
+                                "/example.com".to_string(),
+                                PortList {
+                                    ports: vec![Port {
+                                        service_port: 80,
+                                        target_port: 1234,
+                                    }],
+                                },
+                            )]),
+                            ..Default::default()
+                        },
+                    )
+                    .unwrap();
+            }
+            for i in 0..WORKLOAD_COUNT {
+                updater.remove(&mut state, &strng::format!("cluster1//v1/Pod/default/{i}"));
+            }
+        })
+    });
+}
+
+pub fn load_balance(c: &mut Criterion) {
+    let mut c = c.benchmark_group("load_balance");
+    c.throughput(Throughput::Elements(1));
+    c.measurement_time(Duration::from_secs(5));
+    let mut run = move |name, wl_count, lb: Option<LoadBalancing>| {
+        let (rt, demand, src_wl, svc_addr) = build_load_balancer(wl_count, lb.clone());
+        c.bench_function(name, move |b| {
+            b.to_async(&rt).iter(|| async {
+                demand
+                    .fetch_upstream(
+                        "".into(),
+                        &src_wl,
+                        svc_addr,
+                        ServiceResolutionMode::Standard,
+                    )
+                    .await
+                    .unwrap()
+            })
+        });
+    };
+    run("basic-10", 10, None);
+    run("basic-1000", 1000, None);
+    run("basic-10000", 10000, None);
+    let locality = Some(LoadBalancing {
+        routing_preference: vec![
+            load_balancing::Scope::Network as i32,
+            load_balancing::Scope::Region as i32,
+            load_balancing::Scope::Zone as i32,
+            load_balancing::Scope::Subzone as i32,
+        ],
+        mode: load_balancing::Mode::Failover as i32,
+        health_policy: 0,
+    });
+    run("locality-10", 10, locality.clone());
+    run("locality-1000", 1000, locality.clone());
+    run("locality-10000", 10000, locality.clone());
+}
+
+fn build_load_balancer(
+    wl_count: usize,
+    load_balancing: Option<LoadBalancing>,
+) -> (Runtime, DemandProxyState, Arc<Workload>, SocketAddr) {
+    let svc = XdsService {
+        hostname: "example.com".to_string(),
+        addresses: vec![XdsNetworkAddress {
+            network: "".to_string(),
+            address: vec![127, 0, 0, 3],
+        }],
+        ports: vec![Port {
+            service_port: 80,
+            target_port: 0,
+        }],
+        load_balancing,
+        ..Default::default()
+    };
+    let mut state = ProxyState::new(None);
+    let updater = ProxyStateUpdateMutator::new_no_fetch();
+    updater.insert_service(&mut state, svc).unwrap();
+    for i in 0..wl_count {
+        updater
+            .insert_workload(
+                &mut state,
+                XdsWorkload {
+                    uid: format!("cluster1//v1/Pod/default/{i}"),
+                    addresses: vec![Bytes::copy_from_slice(&[
+                        127,
+                        0,
+                        (i / 255) as u8,
+                        (i % 255) as u8,
+                    ])],
+                    services: std::collections::HashMap::from([(
+                        "/example.com".to_string(),
+                        PortList {
+                            ports: vec![Port {
+                                service_port: 80,
+                                target_port: 1234,
+                            }],
+                        },
+                    )]),
+                    ..Default::default()
+                },
+            )
+            .unwrap();
+    }
+    let mut registry = Registry::default();
+    let metrics = Arc::new(ztunnel::proxy::Metrics::new(&mut registry));
+    let demand = DemandProxyState::new(
+        Arc::new(RwLock::new(state)),
+        None,
+        ResolverConfig::default(),
+        ResolverOpts::default(),
+        metrics,
+    );
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+    let src_wl = rt
+        .block_on(demand.fetch_workload_by_uid(&"cluster1//v1/Pod/default/0".into()))
+        .unwrap();
+    let svc_addr: SocketAddr = "127.0.0.3:80".parse().unwrap();
+    (rt, demand, src_wl, svc_addr)
+}
+
+criterion_group! {
+    name = benches;
+    config = Criterion::default()
+        .with_profiler(PProfProfiler::new(100, Output::Protobuf))
+        .warm_up_time(Duration::from_millis(1));
+    targets = xds, load_balance
+}
+
+criterion_main!(benches);
--- a/benches/throughput.rs
+++ b/benches/throughput.rs
@ -13,71 +13,69 @@
 // limitations under the License.

 use std::cmp::Ordering::{Equal, Greater, Less};
-use std::env;
+use std::future::Future;
+use std::io::Error;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
-use std::sync::Arc;
+use std::sync::mpsc::{Receiver, SyncSender};
+use std::sync::{Arc, RwLock};
 use std::time::Duration;
+use std::{io, thread};

 use bytes::BufMut;
+use criterion::measurement::Measurement;
 use criterion::{
-    criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode, Throughput,
+    BenchmarkGroup, Criterion, SamplingMode, Throughput, criterion_group, criterion_main,
 };
+use hickory_resolver::config::{ResolverConfig, ResolverOpts};
 use pprof::criterion::{Output, PProfProfiler};
 use prometheus_client::registry::Registry;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio::net::TcpStream;
-use tokio::runtime::Runtime;
 use tokio::sync::Mutex;
 use tracing::info;

-use ztunnel::metrics::IncrementRecorder;
 use ztunnel::rbac::{Authorization, RbacMatch, StringMatch};
-use ztunnel::state::workload::{Protocol, Workload};
-use ztunnel::test_helpers::app::TestApp;
+use ztunnel::state::workload::{InboundProtocol, Workload};
+use ztunnel::state::{DemandProxyState, ProxyRbacContext, ProxyState};
+use ztunnel::test_helpers::app::{DestinationAddr, TestApp};
+use ztunnel::test_helpers::linux::{TestMode, WorkloadManager};
 use ztunnel::test_helpers::tcp::Mode;
-use ztunnel::test_helpers::TEST_WORKLOAD_HBONE;
-use ztunnel::test_helpers::TEST_WORKLOAD_SOURCE;
-use ztunnel::test_helpers::TEST_WORKLOAD_TCP;
-use ztunnel::test_helpers::{helpers, tcp};
+use ztunnel::test_helpers::{helpers, tcp, test_default_workload};
 use ztunnel::xds::LocalWorkload;
-use ztunnel::{app, identity, metrics, proxy, test_helpers};
+use ztunnel::{app, identity, metrics, proxy, rbac, setup_netns_test, strng, test_helpers};

 const KB: usize = 1024;
 const MB: usize = 1024 * KB;
+const GB: usize = 1024 * MB;
 // Must be less than or equal to 254
 const MAX_HBONE_WORKLOADS: u8 = 64;

-struct TestEnv {
-    ta: TestApp,
-    echo_addr: SocketAddr,
-    direct: TcpStream,
-    tcp: TcpStream,
-    hbone: TcpStream,
-}
-
-/// initialize_environment sets up a benchmarking environment. This works around issues in async setup with criterion.
-/// Since tests are only sending data on existing connections, we setup a connection for each test type in the setup phase.
-/// Tests consume the
-
 const N_RULES: usize = 10;
 const N_POLICIES: usize = 10_000;
 const DUMMY_NETWORK: &str = "198.51.100.0/24";

+#[ctor::ctor]
+fn initialize_namespace_tests() {
+    ztunnel::test_helpers::namespaced::initialize_namespace_tests();
+}
+
 fn create_test_policies() -> Vec<Authorization> {
    let mut policies: Vec<Authorization> = vec![];
    let mut rules = vec![];
    for _ in 0..N_RULES {
        rules.push(vec![vec![RbacMatch {
            namespaces: vec![
-                StringMatch::Prefix("random-prefix-2b123".to_string()),
-                StringMatch::Suffix("random-postix-2b723".to_string()),
-                StringMatch::Exact("random-exac-2bc13".to_string()),
+                StringMatch::Prefix("random-prefix-2b123".into()),
+                StringMatch::Suffix("random-postix-2b723".into()),
+                StringMatch::Exact("random-exac-2bc13".into()),
            ],
            not_namespaces: vec![],
+            service_accounts: vec![],
+            not_service_accounts: vec![],
            principals: vec![
-                StringMatch::Prefix("random-prefix-2b123".to_string()),
-                StringMatch::Suffix("random-postix-2b723".to_string()),
-                StringMatch::Exact("random-exac-2bc13".to_string()),
+                StringMatch::Prefix("random-prefix-2b123".into()),
+                StringMatch::Suffix("random-postix-2b723".into()),
+                StringMatch::Exact("random-exac-2bc13".into()),
            ],
            not_principals: vec![],
            source_ips: vec![DUMMY_NETWORK.parse().unwrap()],
@ -91,10 +89,10 @@ fn create_test_policies() -> Vec<Authorization> {

    for i in 0..N_POLICIES {
        policies.push(Authorization {
-            name: format!("policy {i}"),
+            name: strng::format!("policy {i}"),
            action: ztunnel::rbac::RbacAction::Deny,
            scope: ztunnel::rbac::RbacScope::Global,
-            namespace: "default".to_string(),
+            namespace: "default".into(),
            rules: rules.clone(),
        });
    }
@ -102,255 +100,303 @@ fn create_test_policies() -> Vec<Authorization> {
    policies
 }

-fn initialize_environment(
-    mode: Mode,
-    policies: Vec<Authorization>,
-) -> (Arc<Mutex<TestEnv>>, Runtime) {
-    if env::var("RUST_LOG").is_err() {
-        env::set_var("RUST_LOG", "error")
-    }
-    helpers::initialize_telemetry();
-    let rt = tokio::runtime::Builder::new_multi_thread()
+fn run_async_blocking<Fut, O>(f: Fut) -> O
+where
+    Fut: Future<Output = O>,
+    O: Send + 'static,
+{
+    tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
-        .unwrap();
-    // Global setup: spin up an echo server and ztunnel instance
-    let (env, _) = rt.block_on(async move {
-        let cert_manager = identity::mock::new_secret_manager(Duration::from_secs(10));
-        let port = 80;
-        let config_source = Some(ztunnel::config::ConfigSource::Static(
-            test_helpers::local_xds_config(port, None, policies).unwrap(),
-        ));
-        let config = test_helpers::test_config_with_port_xds_addr_and_root_cert(
-            port,
-            None,
-            None,
-            config_source,
-        );
-        let app = app::build_with_cert(config, cert_manager.clone())
+        .unwrap()
+        .block_on(f)
+}
+
+#[derive(Clone, Copy, Ord, PartialOrd, PartialEq, Eq)]
+pub enum WorkloadMode {
+    HBONE,
+    TcpClient,
+    Direct,
+}
+
+#[derive(Clone, Copy, Ord, PartialOrd, PartialEq, Eq)]
+pub enum TestTrafficMode {
+    // Each iteration sends a new request
+    Request,
+    // Each iteration establishes a new connection
+    Connection,
+}
+
+#[allow(clippy::type_complexity)]
+fn initialize_environment(
+    ztunnel_mode: WorkloadMode,
+    traffic_mode: TestTrafficMode,
+    echo_mode: Mode,
+    clients: usize,
+) -> anyhow::Result<(
+    WorkloadManager,
+    SyncSender<usize>,
+    Receiver<Result<(), io::Error>>,
+)> {
+    let mut manager = setup_netns_test!(TestMode::Shared);
+    let (server, mut manager) = run_async_blocking(async move {
+        if ztunnel_mode != WorkloadMode::Direct {
+            // we need a client ztunnel
+            manager.deploy_ztunnel("LOCAL").await.unwrap();
+        }
+        if ztunnel_mode == WorkloadMode::HBONE {
+            // we need a server ztunnel
+            manager.deploy_ztunnel("REMOTE").await.unwrap();
+        }
+
+        let server = manager
+            .workload_builder("server", "REMOTE")
+            .register()
            .await
            .unwrap();
-
-        let ta = TestApp::from((&app, cert_manager));
-        ta.ready().await;
-        let echo = tcp::TestServer::new(mode, 0).await;
-        let echo_addr = helpers::with_ip(echo.address(), TEST_WORKLOAD_SOURCE.parse().unwrap());
-        let t = tokio::spawn(async move {
-            let _ = tokio::join!(app.wait_termination(), echo.run());
-        });
-        let mut hbone = ta
-            .socks5_connect(
-                helpers::with_ip(echo_addr, TEST_WORKLOAD_HBONE.parse().unwrap()),
-                TEST_WORKLOAD_SOURCE.parse().unwrap(),
-            )
-            .await;
-        let mut tcp = ta
-            .socks5_connect(
-                helpers::with_ip(echo_addr, TEST_WORKLOAD_TCP.parse().unwrap()),
-                TEST_WORKLOAD_SOURCE.parse().unwrap(),
-            )
-            .await;
-        let mut direct = TcpStream::connect(echo_addr).await.unwrap();
-        direct.set_nodelay(true).unwrap();
-        info!("setup complete");
-
-        let client_mode = match mode {
-            Mode::ReadWrite => Mode::ReadWrite,
-            Mode::ReadDoubleWrite => Mode::ReadDoubleWrite,
-            Mode::Write => Mode::Read,
-            Mode::Read => Mode::Write,
-        };
-        // warmup: send 1 byte so we ensure we have the full connection setup.
-        tcp::run_client(&mut hbone, 1, client_mode).await.unwrap();
-        tcp::run_client(&mut tcp, 1, client_mode).await.unwrap();
-        tcp::run_client(&mut direct, 1, client_mode).await.unwrap();
-        info!("warmup complete");
-
-        (
-            Arc::new(Mutex::new(TestEnv {
-                hbone,
-                tcp,
-                direct,
-                ta,
-                echo_addr,
-            })),
-            t,
-        )
+        (server, manager)
    });
-    (env, rt)
+    server
+        .run_ready(move |ready| async move {
+            let echo = tcp::TestServer::new(echo_mode, 8080).await;
+            ready.set_ready();
+            echo.run().await;
+            Ok(())
+        })
+        .unwrap();
+    let echo_addr = SocketAddr::new(manager.resolver().resolve("server").unwrap(), 8080);
+    let (tx, rx) = std::sync::mpsc::sync_channel::<usize>(0);
+    let (ack_tx, ack_rx) = std::sync::mpsc::sync_channel::<Result<(), io::Error>>(0);
+
+    let client_mode = match echo_mode {
+        Mode::ReadWrite => Mode::ReadWrite,
+        Mode::ReadDoubleWrite => Mode::ReadDoubleWrite,
+        Mode::Write => Mode::Read,
+        Mode::Read => Mode::Write,
+        Mode::Forward(_) => todo!("not implemented for benchmark"),
+        Mode::ForwardProxyProtocol => todo!("not implemented for benchmark"),
+    };
+    let clients: Vec<_> = (0..clients)
+        .map(|id| spawn_client(id, &mut manager, traffic_mode, echo_addr, client_mode))
+        .collect();
+    thread::spawn(move || {
+        while let Ok(size) = rx.recv() {
+            // Send request to all clients
+            for c in &clients {
+                c.tx.send(size).unwrap()
+            }
+            // Then wait for all completions -- this must be done in a separate loop to allow parallel processing.
+            for c in &clients {
+                if let Err(e) = c.ack.recv().unwrap() {
+                    // Failed
+                    ack_tx.send(Err(e)).unwrap();
+                    return;
+                }
+            }
+            // Success
+            ack_tx.send(Ok(())).unwrap();
+        }
+    });
+    Ok((manager, tx, ack_rx))
 }

-pub fn latency(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::ReadWrite, vec![]);
-    let mut c = c.benchmark_group("latency");
-    for size in [1usize, KB] {
-        c.bench_with_input(BenchmarkId::new("direct", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.direct, *size, Mode::ReadWrite).await
+fn spawn_client(
+    i: usize,
+    manager: &mut WorkloadManager,
+    traffic_mode: TestTrafficMode,
+    echo_addr: SocketAddr,
+    client_mode: Mode,
+) -> TestClient {
+    let client = run_async_blocking(async move {
+        manager
+            .workload_builder(&format!("client-{i}"), "LOCAL")
+            .register()
+            .await
+            .unwrap()
+    });
+
+    let (tx, rx) = std::sync::mpsc::sync_channel::<usize>(0);
+    let (ack_tx, ack_rx) = std::sync::mpsc::sync_channel::<Result<(), io::Error>>(0);
+    if traffic_mode == TestTrafficMode::Request {
+        client
+            .run_ready(move |ready| async move {
+                let mut conn = TcpStream::connect(echo_addr).await.unwrap();
+                conn.set_nodelay(true).unwrap();
+                info!("setup complete");
+
+                // warmup: send 1 byte so we ensure we have the full connection setup.
+                tcp::run_client(&mut conn, 1, client_mode).await.unwrap();
+                info!("warmup complete");
+                ready.set_ready();
+
+                // Accept requests and process them
+                while let Ok(size) = rx.recv() {
+                    // Send `size` bytes.
+                    let res = tcp::run_client(&mut conn, size, client_mode).await;
+                    // Report we are done.
+                    ack_tx.send(res).unwrap();
+                }
+                Ok(())
            })
-        });
-        c.bench_with_input(BenchmarkId::new("tcp", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.tcp, *size, Mode::ReadWrite).await
+            .unwrap();
+    } else {
+        client
+            .run_ready(move |ready| async move {
+                ready.set_ready();
+                // Accept requests and process them
+                while let Ok(size) = rx.recv() {
+                    // Open connection
+                    let mut conn = TcpStream::connect(echo_addr).await.unwrap();
+                    conn.set_nodelay(true).unwrap();
+                    // Send `size` bytes.
+                    let res = tcp::run_client(&mut conn, size, client_mode).await;
+                    // Report we are done.
+                    ack_tx.send(res).unwrap();
+                }
+                Ok(())
            })
-        });
-        c.bench_with_input(BenchmarkId::new("hbone", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.hbone, *size, Mode::ReadWrite).await
-            })
-        });
+            .unwrap();
    }
+
+    TestClient { tx, ack: ack_rx }
 }

-pub fn rbac_latency(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::ReadWrite, create_test_policies());
-    let mut c = c.benchmark_group("rbac_latency");
-    for size in [1usize, KB] {
-        c.bench_with_input(BenchmarkId::new("direct", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.direct, *size, Mode::ReadWrite).await
-            })
-        });
-        c.bench_with_input(BenchmarkId::new("tcp", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.tcp, *size, Mode::ReadWrite).await
-            })
-        });
-        c.bench_with_input(BenchmarkId::new("hbone", size), &size, |b, size| {
-            b.to_async(&rt).iter(|| async {
-                tcp::run_client(&mut env.lock().await.hbone, *size, Mode::ReadWrite).await
-            })
-        });
-    }
+struct TestClient {
+    tx: SyncSender<usize>,
+    ack: Receiver<Result<(), Error>>,
 }

 pub fn throughput(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::Read, vec![]);
+    const THROUGHPUT_SEND_SIZE: usize = GB;
+    fn run_throughput<T: Measurement>(
+        c: &mut BenchmarkGroup<T>,
+        name: &str,
+        mode: WorkloadMode,
+        clients: usize,
+    ) {
+        let (_manager, tx, ack) =
+            initialize_environment(mode, TestTrafficMode::Request, Mode::Read, clients).unwrap();
+        let size = THROUGHPUT_SEND_SIZE / clients;
+        c.bench_function(name, |b| {
+            b.iter(|| {
+                tx.send(size).unwrap();
+                ack.recv().unwrap().unwrap();
+            })
+        });
+    }
+
    let mut c = c.benchmark_group("throughput");

-    let size: usize = 10 * MB;
-    c.throughput(Throughput::Bytes(size as u64));
-
+    // Measure in bits, not bytes, to match tools like iperf
+    c.throughput(Throughput::Elements((THROUGHPUT_SEND_SIZE * 8) as u64));
    // Test takes a while, so reduce how many iterations we run
    c.sample_size(10);
    c.sampling_mode(SamplingMode::Flat);
    c.measurement_time(Duration::from_secs(5));
-    c.bench_with_input("direct", &size, |b, size| {
-        b.to_async(&rt).iter(|| async {
-            tcp::run_client(&mut env.lock().await.direct, *size, Mode::Write).await
-        })
-    });
-    c.bench_with_input("tcp", &size, |b, size| {
-        b.to_async(&rt)
-            .iter(|| async { tcp::run_client(&mut env.lock().await.tcp, *size, Mode::Write).await })
-    });
-    c.bench_with_input("hbone", &size, |b, size| {
-        b.to_async(&rt).iter(|| async {
-            tcp::run_client(&mut env.lock().await.hbone, *size, Mode::Write).await
-        })
-    });
+    // Send request in various modes.
+    // Each test will use a pre-existing connection and send 1GB for multiple iterations
+    for clients in [1, 2, 8] {
+        run_throughput(
+            &mut c,
+            &format!("direct{clients}"),
+            WorkloadMode::Direct,
+            clients,
+        );
+        run_throughput(
+            &mut c,
+            &format!("tcp{clients}"),
+            WorkloadMode::TcpClient,
+            clients,
+        );
+        run_throughput(
+            &mut c,
+            &format!("hbone{clients}"),
+            WorkloadMode::HBONE,
+            clients,
+        );
+    }
 }

-pub fn rbac_throughput(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::Read, create_test_policies());
-    let mut c = c.benchmark_group("rbac_throughput");
+pub fn latency(c: &mut Criterion) {
+    const LATENCY_SEND_SIZE: usize = KB;
+    fn run_latency<T: Measurement>(c: &mut BenchmarkGroup<T>, name: &str, mode: WorkloadMode) {
+        let (_manager, tx, ack) =
+            initialize_environment(mode, TestTrafficMode::Request, Mode::Read, 1).unwrap();
+        c.bench_function(name, |b| {
+            b.iter(|| {
+                tx.send(LATENCY_SEND_SIZE).unwrap();
+                ack.recv().unwrap().unwrap();
+            })
+        });
+    }

-    let size: usize = 10 * MB;
-    c.throughput(Throughput::Bytes(size as u64));
+    let mut c = c.benchmark_group("latency");

+    // Measure in RPS
+    c.throughput(Throughput::Elements(1));
    // Test takes a while, so reduce how many iterations we run
-    c.sample_size(10);
-    c.sampling_mode(SamplingMode::Flat);
-    c.measurement_time(Duration::from_secs(5));
-    c.bench_with_input("direct", &size, |b, size| {
-        b.to_async(&rt).iter(|| async {
-            tcp::run_client(&mut env.lock().await.direct, *size, Mode::Write).await
-        })
-    });
-    c.bench_with_input("tcp", &size, |b, size| {
-        b.to_async(&rt)
-            .iter(|| async { tcp::run_client(&mut env.lock().await.tcp, *size, Mode::Write).await })
-    });
-    c.bench_with_input("hbone", &size, |b, size| {
-        b.to_async(&rt).iter(|| async {
-            tcp::run_client(&mut env.lock().await.hbone, *size, Mode::Write).await
-        })
-    });
+    // Send request in various modes.
+    // Each test will use a pre-existing connection and send 1GB for multiple iterations
+    run_latency(&mut c, "direct", WorkloadMode::Direct);
+    run_latency(&mut c, "tcp", WorkloadMode::TcpClient);
+    run_latency(&mut c, "hbone", WorkloadMode::HBONE);
 }

 pub fn connections(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::ReadWrite, vec![]);
+    fn run_connections<T: Measurement>(c: &mut BenchmarkGroup<T>, name: &str, mode: WorkloadMode) {
+        let (_manager, tx, ack) =
+            initialize_environment(mode, TestTrafficMode::Connection, Mode::ReadWrite, 1).unwrap();
+        c.bench_function(name, |b| {
+            b.iter(|| {
+                tx.send(1).unwrap();
+                ack.recv().unwrap().unwrap();
+            })
+        });
+    }
+
    let mut c = c.benchmark_group("connections");
-    c.bench_function("direct", |b| {
-        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s = TcpStream::connect(e.echo_addr).await.unwrap();
-            s.set_nodelay(true).unwrap();
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
-        })
-    });
-    c.bench_function("tcp", |b| {
-        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s =
-                e.ta.socks5_connect(
-                    helpers::with_ip(e.echo_addr, TEST_WORKLOAD_TCP.parse().unwrap()),
-                    TEST_WORKLOAD_SOURCE.parse().unwrap(),
-                )
-                .await;
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
-        })
-    });
-    // This tests connection time over an existing HBONE connection.
-    c.bench_function("hbone", |b| {
-        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s =
-                e.ta.socks5_connect(
-                    helpers::with_ip(e.echo_addr, TEST_WORKLOAD_HBONE.parse().unwrap()),
-                    TEST_WORKLOAD_SOURCE.parse().unwrap(),
-                )
-                .await;
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
-        })
-    });
+
+    // Measure in connections/s
+    c.throughput(Throughput::Elements(1));
+    // Send request in various modes.
+    // Each test will use a pre-existing connection and send 1GB for multiple iterations
+    run_connections(&mut c, "direct", WorkloadMode::Direct);
+    run_connections(&mut c, "tcp", WorkloadMode::TcpClient);
+    run_connections(&mut c, "hbone", WorkloadMode::HBONE);
 }

-pub fn rbac_connections(c: &mut Criterion) {
-    let (env, rt) = initialize_environment(Mode::ReadWrite, create_test_policies());
-    let mut c = c.benchmark_group("rbac_connections");
-    c.bench_function("direct", |b| {
+pub fn rbac(c: &mut Criterion) {
+    let policies = create_test_policies();
+    let mut state = ProxyState::new(None);
+    for p in policies {
+        state.policies.insert(p.to_key(), p);
+    }
+
+    let mut registry = Registry::default();
+    let metrics = Arc::new(crate::proxy::Metrics::new(&mut registry));
+    let mock_proxy_state = DemandProxyState::new(
+        Arc::new(RwLock::new(state)),
+        None,
+        ResolverConfig::default(),
+        ResolverOpts::default(),
+        metrics,
+    );
+    let rc = ProxyRbacContext {
+        conn: rbac::Connection {
+            src: "127.0.0.1:12345".parse().unwrap(),
+            dst: "127.0.0.2:12345".parse().unwrap(),
+            src_identity: None,
+            dst_network: "".into(),
+        },
+        dest_workload: Arc::new(test_default_workload()),
+    };
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+    c.bench_function("rbac", |b| {
        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s = TcpStream::connect(e.echo_addr).await.unwrap();
-            s.set_nodelay(true).unwrap();
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
-        })
-    });
-    c.bench_function("tcp", |b| {
-        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s =
-                e.ta.socks5_connect(
-                    helpers::with_ip(e.echo_addr, TEST_WORKLOAD_TCP.parse().unwrap()),
-                    TEST_WORKLOAD_SOURCE.parse().unwrap(),
-                )
-                .await;
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
-        })
-    });
-    // TODO(https://github.com/istio/ztunnel/issues/15): when we have pooling, split this into "new hbone connection"
-    // and "new connection on existing HBONE connection"
-    c.bench_function("hbone", |b| {
-        b.to_async(&rt).iter(|| async {
-            let e = env.lock().await;
-            let mut s =
-                e.ta.socks5_connect(
-                    helpers::with_ip(e.echo_addr, TEST_WORKLOAD_HBONE.parse().unwrap()),
-                    TEST_WORKLOAD_SOURCE.parse().unwrap(),
-                )
-                .await;
-            tcp::run_client(&mut s, 1, Mode::ReadWrite).await
+            let _ = mock_proxy_state.assert_rbac(&rc).await;
        })
    });
 }
@ -362,14 +408,16 @@ pub fn metrics(c: &mut Criterion) {
    let mut c = c.benchmark_group("metrics");
    c.bench_function("write", |b| {
        b.iter(|| {
-            metrics.increment(&proxy::ConnectionOpen {
+            let co = proxy::ConnectionOpen {
                reporter: Default::default(),
-                source: Some(test_helpers::test_default_workload()),
+                source: Some(Arc::new(test_helpers::test_default_workload())),
                derived_source: None,
                destination: None,
                destination_service: None,
                connection_security_policy: Default::default(),
-            })
+            };
+            let tl = proxy::CommonTrafficLabels::from(co);
+            metrics.connection_opens.get_or_create(&tl).inc();
        })
    });
    c.bench_function("encode", |b| {
@ -409,17 +457,30 @@ fn hbone_connection_config() -> ztunnel::config::ConfigSource {
        let lwl = LocalWorkload {
            workload: Workload {
                workload_ips: vec![hbone_connection_ip(i)],
-                protocol: Protocol::HBONE,
-                uid: format!("cluster1//v1/Pod/default/local-source{}", i),
-                name: format!("workload-{}", i),
-                namespace: format!("namespace-{}", i),
-                service_account: format!("service-account-{}", i),
+                protocol: InboundProtocol::HBONE,
+                uid: strng::format!("cluster1//v1/Pod/default/remote{}", i),
+                name: strng::format!("workload-{}", i),
+                namespace: strng::format!("namespace-{}", i),
+                service_account: strng::format!("service-account-{}", i),
                ..test_helpers::test_default_workload()
            },
            services: Default::default(),
        };
        workloads.push(lwl);
    }
+    let lwl = LocalWorkload {
+        workload: Workload {
+            workload_ips: vec![],
+            protocol: InboundProtocol::HBONE,
+            uid: "cluster1//v1/Pod/default/local-source".into(),
+            name: "local-source".into(),
+            namespace: "default".into(),
+            service_account: "default".into(),
+            ..test_helpers::test_default_workload()
+        },
+        services: Default::default(),
+    };
+    workloads.push(lwl);

    let lc = ztunnel::xds::LocalConfig {
        workloads,
@ -438,6 +499,7 @@ fn hbone_connection_config() -> ztunnel::config::ConfigSource {
 /// connection. Instead, we register MAX_HBONE_WORKLOADS giving us O(MAX_HBONE_WORKLOADS^2)
 /// source/destination IP combinations which is (hopefully) enough.
 fn hbone_connections(c: &mut Criterion) {
+    helpers::run_command("ip link set dev lo up").unwrap();
    let rt = tokio::runtime::Builder::new_multi_thread()
        .enable_all()
        .build()
@ -454,7 +516,7 @@ fn hbone_connections(c: &mut Criterion) {
            None,
            config_source,
        );
-        let app = app::build_with_cert(config, cert_manager.clone())
+        let app = app::build_with_cert(Arc::new(config), cert_manager.clone())
            .await
            .unwrap();
        let ta = TestApp::from((&app, cert_manager));
@ -471,10 +533,12 @@ fn hbone_connections(c: &mut Criterion) {
    let ta: Arc<Mutex<TestApp>> = Arc::new(Mutex::new(ta));
    let addresses = Arc::new(Mutex::new((1u8, 2u8)));

-    let mut c = c.benchmark_group("hbone_connection");
+    let mut c = c.benchmark_group("hbone_connections");
    // WARNING: increasing the measurement time could lead to running out of IP pairs or having too
    // many open connections.
    c.measurement_time(Duration::from_secs(5));
+    // Connections/second
+    c.throughput(Throughput::Elements(1));
    c.bench_function("connect_request_response", |b| {
        b.to_async(&rt).iter(|| async {
            let bench = async {
@ -488,7 +552,7 @@ fn hbone_connections(c: &mut Criterion) {

                // Start HBONE connection
                let mut hbone = ta
-                    .socks5_connect(helpers::with_ip(echo_addr, dest_addr), source_addr)
+                    .socks5_connect(DestinationAddr::Ip(helpers::with_ip(echo_addr, dest_addr)), source_addr)
                    .await;

                // TCP ping
@ -511,7 +575,7 @@ criterion_group! {
    config = Criterion::default()
        .with_profiler(PProfProfiler::new(100, Output::Protobuf))
        .warm_up_time(Duration::from_millis(1));
-    targets = hbone_connections, latency, throughput, connections, rbac_latency, rbac_throughput, rbac_connections,
+    targets = hbone_connections
 }

 criterion_main!(benches);
--- a/build.rs
+++ b/build.rs
@ -18,6 +18,9 @@ use std::process::Command;
 // This build script is used to generate the rust source files that
 // we need for XDS GRPC communication.
 fn main() -> Result<(), anyhow::Error> {
+    // Fuzzing uses custom cfg (https://rust-fuzz.github.io/book/cargo-fuzz/guide.html)
+    // Tell cargo to expect this (https://doc.rust-lang.org/nightly/rustc/check-cfg/cargo-specifics.html).
+    println!("cargo::rustc-check-cfg=cfg(fuzzing)");
    let proto_files = [
        "proto/xds.proto",
        "proto/workload.proto",
@ -46,7 +49,7 @@ fn main() -> Result<(), anyhow::Error> {
    };
    tonic_build::configure()
        .build_server(true)
-        .compile_with_config(
+        .compile_protos_with_config(
            config,
            &proto_files
                .iter()
@ -76,7 +79,7 @@ fn main() -> Result<(), anyhow::Error> {
            for line in String::from_utf8(output.stdout).unwrap().lines() {
                // Each line looks like `istio.io/pkg/version.buildGitRevision=abc`
                if let Some((key, value)) = line.split_once('=') {
-                    let key = key.split('.').last().unwrap();
+                    let key = key.split('.').next_back().unwrap();
                    println!("cargo:rustc-env=ZTUNNEL_BUILD_{key}={value}");
                } else {
                    println!("cargo:warning=invalid build output {line}");
--- a/common/.commonfiles.sha
+++ b/common/.commonfiles.sha
@ -1 +1 @@
-9088296b1343d1a3b2e3f822f6d7942ff2de7a15
+d46067e1a8ba3db4abe2635af5807f00ba1981e6
--- a/common/Makefile.common.mk
+++ b/common/Makefile.common.mk
@ -50,7 +50,7 @@ lint-python:
 	@${FINDFILES} -name '*.py' \( ! \( -name '*_pb2.py' \) \) -print0 | ${XARGS} autopep8 --max-line-length 160 --exit-code -d

 lint-markdown:
-	@${FINDFILES} -name '*.md' -print0 | ${XARGS} mdl --ignore-front-matter --style common/config/mdl.rb
+	@${FINDFILES} -name '*.md' -not -path './manifests/addons/dashboards/*' -print0 | ${XARGS} mdl --ignore-front-matter --style common/config/mdl.rb

 lint-links:
 	@${FINDFILES} -name '*.md' -print0 | ${XARGS} awesome_bot --skip-save-results --allow_ssl --allow-timeout --allow-dupe --allow-redirect --white-list ${MARKDOWN_LINT_ALLOWLIST}
@ -106,13 +106,11 @@ update-common:
 	@if [ "$(CONTRIB_OVERRIDE)" != "CONTRIBUTING.md" ]; then\
 		rm $(TMP)/common-files/files/CONTRIBUTING.md;\
 	fi
-# istio/istio.io uses the  Creative Commons Attribution 4.0 license. Don't update LICENSE with the common Apache license.
-	@LICENSE_OVERRIDE=$(shell grep -l "Creative Commons Attribution 4.0 International Public License" LICENSE)
-	@if [ "$(LICENSE_OVERRIDE)" != "LICENSE" ]; then\
-		rm $(TMP)/common-files/files/LICENSE;\
-	fi
 	@cp -a $(TMP)/common-files/files/* $(TMP)/common-files/files/.devcontainer $(TMP)/common-files/files/.gitattributes $(shell pwd)
 	@rm -fr $(TMP)/common-files
+	@if [ "$(AUTOMATOR_REPO)" == "proxy" ]; then\
+		sed -i -e 's/build-tools:/build-tools-proxy:/g' .devcontainer/devcontainer.json;\
+	fi
 	@$(or $(COMMONFILES_POSTPROCESS), true)

 check-clean-repo:
--- a/common/config/.golangci-format.yml
+++ b/common/config/.golangci-format.yml
@ -1,56 +0,0 @@
-# WARNING: DO NOT EDIT, THIS FILE IS PROBABLY A COPY
-#
-# The original version of this file is located in the https://github.com/istio/common-files repo.
-# If you're looking at this file in a different repo and want to make a change, please go to the
-# common-files repo, make the change there and check it in. Then come back to this repo and run
-# "make update-common".
-
-run:
-  # Timeout for analysis, e.g. 30s, 5m.
-  # Default: 1m
-  timeout: 20m
-  build-tags:
-  - integ
-  - integfuzz
-linters:
-  disable-all: true
-  enable:
-  - goimports
-  - gofumpt
-  - gci
-  fast: false
-linters-settings:
-  gci:
-    sections:
-      - standard # Captures all standard packages if they do not match another section.
-      - default # Contains all imports that could not be matched to another section type.
-      - prefix(istio.io/) # Groups all imports with the specified Prefix.
-  goimports:
-    # put imports beginning with prefix after 3rd-party packages;
-    # it's a comma-separated list of prefixes
-    local-prefixes: istio.io/
-issues:
-  # Which dirs to exclude: issues from them won't be reported.
-  # Can use regexp here: `generated.*`, regexp is applied on full path,
-  # including the path prefix if one is set.
-  # Default dirs are skipped independently of this option's value (see exclude-dirs-use-default).
-  # "/" will be replaced by current OS file path separator to properly work on Windows.
-  # Default: []
-  exclude-dirs:
-    - genfiles$
-    - vendor$
-  # Which files to exclude: they will be analyzed, but issues from them won't be reported.
-  # There is no need to include all autogenerated files,
-  # we confidently recognize autogenerated files.
-  # If it's not, please let us know.
-  # "/" will be replaced by current OS file path separator to properly work on Windows.
-  # Default: []
-  exclude-files:
-    - ".*\\.pb\\.go"
-    - ".*\\.gen\\.go"
-  # Maximum issues count per one linter.
-  # Set to 0 to disable.
-  # Default: 50
-  max-issues-per-linter: 0
-  # Maximum count of issues with the same text. Set to 0 to disable. Default is 3.
-  max-same-issues: 0
--- a/common/config/.golangci.yml
+++ b/common/config/.golangci.yml
@ -1,260 +1,221 @@
-# WARNING: DO NOT EDIT, THIS FILE IS PROBABLY A COPY
-#
-# The original version of this file is located in the https://github.com/istio/common-files repo.
-# If you're looking at this file in a different repo and want to make a change, please go to the
-# common-files repo, make the change there and check it in. Then come back to this repo and run
-# "make update-common".
-
+version: "2"
 run:
-  # Timeout for analysis, e.g. 30s, 5m.
-  # Default: 1m
-  timeout: 20m
  build-tags:
    - integ
    - integfuzz
 linters:
-  disable-all: true
+  default: none
  enable:
-    - errcheck
-    - exportloopref
+    - copyloopvar
    - depguard
+    - errcheck
    - gocritic
-    - gofumpt
-    - goimports
-    - revive
-    - gosimple
+    - gosec
    - govet
    - ineffassign
    - lll
    - misspell
+    - revive
    - staticcheck
-    - stylecheck
-    - typecheck
    - unconvert
    - unparam
    - unused
-    - gci
-    - gosec
-  fast: false
-linters-settings:
-  errcheck:
-    # report about not checking of errors in type assertions: `a := b.(MyStruct)`;
-    # default is false: such cases aren't reported by default.
-    check-type-assertions: false
-    # report about assignment of errors to blank identifier: `num, _ := strconv.Atoi(numStr)`;
-    # default is false: such cases aren't reported by default.
-    check-blank: false
-  govet:
-    disable:
-      # report about shadowed variables
-      - shadow
-  goimports:
-    # put imports beginning with prefix after 3rd-party packages;
-    # it's a comma-separated list of prefixes
-    local-prefixes: istio.io/
-  misspell:
-    # Correct spellings using locale preferences for US or UK.
-    # Default is to use a neutral variety of English.
-    # Setting locale to US will correct the British spelling of 'colour' to 'color'.
-    locale: US
-    ignore-words:
-      - cancelled
-  lll:
-    # max line length, lines longer will be reported. Default is 120.
-    # '\t' is counted as 1 character by default, and can be changed with the tab-width option
-    line-length: 160
-    # tab width in spaces. Default to 1.
-    tab-width: 1
-  revive:
-    ignore-generated-header: false
-    severity: "warning"
-    confidence: 0.0
+  settings:
+    depguard:
+      rules:
+        DenyGogoProtobuf:
+          files:
+            - $all
+          deny:
+            - pkg: github.com/gogo/protobuf
+              desc: gogo/protobuf is deprecated, use golang/protobuf
+    errcheck:
+      check-type-assertions: false
+      check-blank: false
+    gocritic:
+      disable-all: true
+      enabled-checks:
+        - appendCombine
+        - argOrder
+        - assignOp
+        - badCond
+        - boolExprSimplify
+        - builtinShadow
+        - captLocal
+        - caseOrder
+        - codegenComment
+        - commentedOutCode
+        - commentedOutImport
+        - defaultCaseOrder
+        - deprecatedComment
+        - docStub
+        - dupArg
+        - dupBranchBody
+        - dupCase
+        - dupSubExpr
+        - elseif
+        - emptyFallthrough
+        - equalFold
+        - flagDeref
+        - flagName
+        - hexLiteral
+        - indexAlloc
+        - initClause
+        - methodExprCall
+        - nilValReturn
+        - octalLiteral
+        - offBy1
+        - rangeExprCopy
+        - regexpMust
+        - sloppyLen
+        - stringXbytes
+        - switchTrue
+        - typeAssertChain
+        - typeSwitchVar
+        - typeUnparen
+        - underef
+        - unlambda
+        - unnecessaryBlock
+        - unslice
+        - valSwap
+        - weakCond
+    gosec:
+      includes:
+        - G401
+        - G402
+        - G404
+    govet:
+      disable:
+        - shadow
+    lll:
+      line-length: 160
+      tab-width: 1
+    misspell:
+      locale: US
+      ignore-rules:
+        - cancelled
+    revive:
+      confidence: 0
+      severity: warning
+      rules:
+        - name: blank-imports
+        - name: context-keys-type
+        - name: time-naming
+        - name: var-declaration
+        - name: unexported-return
+        - name: errorf
+        - name: context-as-argument
+        - name: dot-imports
+        - name: error-return
+        - name: error-strings
+        - name: error-naming
+        - name: increment-decrement
+        - name: var-naming
+        - name: package-comments
+        - name: range
+        - name: receiver-naming
+        - name: indent-error-flow
+        - name: superfluous-else
+        - name: modifies-parameter
+        - name: unreachable-code
+        - name: struct-tag
+        - name: constant-logical-expr
+        - name: bool-literal-in-expr
+        - name: redefines-builtin-id
+        - name: imports-blocklist
+        - name: range-val-in-closure
+        - name: range-val-address
+        - name: waitgroup-by-value
+        - name: atomic
+        - name: call-to-gc
+        - name: duplicated-imports
+        - name: string-of-int
+        - name: defer
+          arguments:
+            - - call-chain
+        - name: unconditional-recursion
+        - name: identical-branches
+    unparam:
+      check-exported: false
+  exclusions:
+    generated: lax
+    presets:
+      - comments
+      - common-false-positives
+      - legacy
+      - std-error-handling
    rules:
-      - name: blank-imports
-      - name: context-keys-type
-      - name: time-naming
-      - name: var-declaration
-      - name: unexported-return
-      - name: errorf
-      - name: context-as-argument
-      - name: dot-imports
-      - name: error-return
-      - name: error-strings
-      - name: error-naming
-      - name: increment-decrement
-      - name: var-naming
-      - name: package-comments
-      - name: range
-      - name: receiver-naming
-      - name: indent-error-flow
-      - name: superfluous-else
-      - name: modifies-parameter
-      - name: unreachable-code
-      - name: struct-tag
-      - name: constant-logical-expr
-      - name: bool-literal-in-expr
-      - name: redefines-builtin-id
-      - name: imports-blacklist
-      - name: range-val-in-closure
-      - name: range-val-address
-      - name: waitgroup-by-value
-      - name: atomic
-      - name: call-to-gc
-      - name: duplicated-imports
-      - name: string-of-int
-      - name: defer
-        arguments:
-          - - "call-chain"
-      - name: unconditional-recursion
-      - name: identical-branches
-        # the following rules can be enabled in the future
-        # - name: empty-lines
-        # - name: confusing-results
-        # - name: empty-block
-        # - name: get-return
-        # - name: confusing-naming
-        # - name: unexported-naming
-        # - name: early-return
-        # - name: unused-parameter
-        # - name: unnecessary-stmt
-        # - name: deep-exit
-        # - name: import-shadowing
-        # - name: modifies-value-receiver
-        # - name: unused-receiver
-        # - name: bare-return
-        # - name: flag-parameter
-        # - name: unhandled-error
-        # - name: if-return
-  unparam:
-    # Inspect exported functions, default is false. Set to true if no external program/library imports your code.
-    # XXX: if you enable this setting, unparam will report a lot of false-positives in text editors:
-    # if it's called for subdir of a project it can't find external interfaces. All text editor integrations
-    # with golangci-lint call it on a directory with the changed file.
-    check-exported: false
-  gci:
-    sections:
-      - standard # Captures all standard packages if they do not match another section.
-      - default # Contains all imports that could not be matched to another section type.
-      - prefix(istio.io/) # Groups all imports with the specified Prefix.
-  gocritic:
-    # Disable all checks.
-    # Default: false
-    disable-all: true
-    # Which checks should be enabled in addition to default checks. Since we don't want
-    # all of the default checks, we do the disable-all first.
-    enabled-checks:
-      - appendCombine
-      - argOrder
-      - assignOp
-      - badCond
-      - boolExprSimplify
-      - builtinShadow
-      - captLocal
-      - caseOrder
-      - codegenComment
-      - commentedOutCode
-      - commentedOutImport
-      - defaultCaseOrder
-      - deprecatedComment
-      - docStub
-      - dupArg
-      - dupBranchBody
-      - dupCase
-      - dupSubExpr
-      - elseif
-      - emptyFallthrough
-      - equalFold
-      - flagDeref
-      - flagName
-      - hexLiteral
-      - indexAlloc
-      - initClause
-      - methodExprCall
-      - nilValReturn
-      - octalLiteral
-      - offBy1
-      - rangeExprCopy
-      - regexpMust
-      - sloppyLen
-      - stringXbytes
-      - switchTrue
-      - typeAssertChain
-      - typeSwitchVar
-      - typeUnparen
-      - underef
-      - unlambda
-      - unnecessaryBlock
-      - unslice
-      - valSwap
-      - weakCond
-  depguard:
-    rules:
-      DenyGogoProtobuf:
-        files:
-          - $all
-        deny:
-          - pkg: github.com/gogo/protobuf
-            desc: "gogo/protobuf is deprecated, use golang/protobuf"
-  gosec:
-    includes:
-      - G401
-      - G402
-      - G404
+      - linters:
+          - errcheck
+          - maligned
+        path: _test\.go$|tests/|samples/
+      - path: _test\.go$
+        text: 'dot-imports: should not use dot imports'
+      - linters:
+          - staticcheck
+        text: 'SA1019: package github.com/golang/protobuf/jsonpb'
+      - linters:
+          - staticcheck
+        text: 'SA1019: "github.com/golang/protobuf/jsonpb"'
+      - linters:
+          - staticcheck
+        text: 'SA1019: grpc.Dial is deprecated: use NewClient instead'
+      - linters:
+          - staticcheck
+        text: 'SA1019: grpc.DialContext is deprecated: use NewClient instead'
+      - linters:
+          - staticcheck
+        text: 'SA1019: grpc.WithBlock is deprecated'
+      - linters:
+          - staticcheck
+        text: 'SA1019: grpc.FailOnNonTempDialError'
+      - linters:
+          - staticcheck
+        text: 'SA1019: grpc.WithReturnConnectionError'
+      - path: (.+)\.go$
+        text: composite literal uses unkeyed fields
+      # TODO: remove following rule in the future
+      - linters:
+          - staticcheck
+        text: 'QF'
+      - linters:
+          - staticcheck
+        text: 'ST1005'
+      - linters:
+          - staticcheck
+        text: 'S1007'
+    paths:
+      - .*\.pb\.go
+      - .*\.gen\.go
+      - genfiles$
+      - vendor$
+      - third_party$
+      - builtin$
+      - examples$
 issues:
-  # List of regexps of issue texts to exclude, empty list by default.
-  # But independently from this option we use default exclude patterns,
-  # it can be disabled by `exclude-use-default: false`. To list all
-  # excluded by default patterns execute `golangci-lint run --help`
-  exclude:
-    - composite literal uses unkeyed fields
-  # Which dirs to exclude: issues from them won't be reported.
-  # Can use regexp here: `generated.*`, regexp is applied on full path,
-  # including the path prefix if one is set.
-  # Default dirs are skipped independently of this option's value (see exclude-dirs-use-default).
-  # "/" will be replaced by current OS file path separator to properly work on Windows.
-  # Default: []
-  exclude-dirs:
-    - genfiles$
-    - vendor$
-  # Which files to exclude: they will be analyzed, but issues from them won't be reported.
-  # There is no need to include all autogenerated files,
-  # we confidently recognize autogenerated files.
-  # If it's not, please let us know.
-  # "/" will be replaced by current OS file path separator to properly work on Windows.
-  # Default: []
-  exclude-files:
-    - ".*\\.pb\\.go"
-    - ".*\\.gen\\.go"
-  exclude-rules:
-    # Exclude some linters from running on test files.
-    - path: _test\.go$|^tests/|^samples/
-      linters:
-        - errcheck
-        - maligned
-    - path: _test\.go$
-      text: "dot-imports: should not use dot imports"
-    # We need to use the deprecated module since the jsonpb replacement is not backwards compatible.
-    - linters:
-        - staticcheck
-      text: "SA1019: package github.com/golang/protobuf/jsonpb"
-    - linters:
-        - staticcheck
-      text: 'SA1019: "github.com/golang/protobuf/jsonpb"'
-    # This is not helpful. The new function is not very usable and the current function will not be removed
-    - linters:
-        - staticcheck
-      text: 'SA1019: grpc.Dial is deprecated: use NewClient instead'
-    - linters:
-        - staticcheck
-      text: 'SA1019: grpc.DialContext is deprecated: use NewClient instead'
-  # Independently from option `exclude` we use default exclude patterns,
-  # it can be disabled by this option. To list all
-  # excluded by default patterns execute `golangci-lint run --help`.
-  # Default value for this option is true.
-  exclude-use-default: true
-  # Maximum issues count per one linter.
-  # Set to 0 to disable.
-  # Default: 50
  max-issues-per-linter: 0
-  # Maximum count of issues with the same text. Set to 0 to disable. Default is 3.
  max-same-issues: 0
+formatters:
+  enable:
+    - gci
+    - gofumpt
+    - goimports
+  settings:
+    gci:
+      sections:
+        - standard
+        - default
+        - prefix(istio.io/)
+    goimports:
+      local-prefixes:
+        - istio.io/
+  exclusions:
+    generated: lax
+    paths:
+      - .*\.pb\.go
+      - .*\.gen\.go
+      - genfiles$
+      - vendor$
+      - third_party$
+      - builtin$
+      - examples$
--- a/common/config/.hadolint.yml
+++ b/common/config/.hadolint.yml
@ -14,3 +14,4 @@ trustedRegistries:
  - docker.io
  - quay.io
  - "*.pkg.dev"
+  - "cgr.dev"
--- a/common/config/license-lint.yml
+++ b/common/config/license-lint.yml
@ -125,4 +125,21 @@ allowlisted_modules:

 # Simplified BSD (BSD-2-Clause): https://github.com/russross/blackfriday/blob/master/LICENSE.txt
 - github.com/russross/blackfriday
- github.com/russross/blackfriday/v2
+- github.com/russross/blackfriday/v2
+
+# W3C Test Suite License, W3C 3-clause BSD License
+# gonum uses this for its some of its test files
+# gonum.org/v1/gonum/graph/formats/rdf/testdata/LICENSE.md
+- gonum.org/v1/gonum
+
+# BSD 3-clause: https://github.com/go-inf/inf/blob/v0.9.1/LICENSE
+- gopkg.in/inf.v0
+
+# BSD 3-clause: https://github.com/go-git/gcfg/blob/main/LICENSE
+- github.com/go-git/gcfg
+
+# Apache 2.0
+- github.com/aws/smithy-go
+
+# Simplified BSD License: https://github.com/gomarkdown/markdown/blob/master/LICENSE.txt
+- github.com/gomarkdown/markdown
--- a/common/scripts/check_clean_repo.sh
+++ b/common/scripts/check_clean_repo.sh
@ -24,6 +24,7 @@ function write_patch_file() {
    git diff > "${PATCH_OUT}"

    [ -n "${JOB_NAME}" ] && [ -n "${BUILD_ID}" ]
+    # shellcheck disable=SC2319
    IN_PROW="$?"

    # Don't persist large diffs (30M+) on CI
--- a/common/scripts/format_go.sh
+++ b/common/scripts/format_go.sh
@ -21,4 +21,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-golangci-lint run --fix -c ./common/config/.golangci-format.yml
+golangci-lint run --fix -c ./common/config/.golangci.yml
--- a/common/scripts/kind_provisioner.sh
+++ b/common/scripts/kind_provisioner.sh
@ -32,7 +32,10 @@ set -x
 ####################################################################

 # DEFAULT_KIND_IMAGE is used to set the Kubernetes version for KinD unless overridden in params to setup_kind_cluster(s)
-DEFAULT_KIND_IMAGE="gcr.io/istio-testing/kind-node:v1.28.4"
+DEFAULT_KIND_IMAGE="gcr.io/istio-testing/kind-node:v1.33.1"
+
+# the default kind cluster should be ipv4 if not otherwise specified
+KIND_IP_FAMILY="${KIND_IP_FAMILY:-ipv4}"

 # COMMON_SCRIPTS contains the directory this file is in.
 COMMON_SCRIPTS=$(dirname "${BASH_SOURCE:-$0}")
@ -144,7 +147,7 @@ function setup_kind_cluster_retry() {
 # 1. NAME: Name of the Kind cluster (optional)
 # 2. IMAGE: Node image used by KinD (optional)
 # 3. CONFIG: KinD cluster configuration YAML file. If not specified then DEFAULT_CLUSTER_YAML is used
-# 4. NOMETALBINSTALL: Dont install matllb if set.
+# 4. NOMETALBINSTALL: Dont install metalb if set.
 # This function returns 0 when everything goes well, or 1 otherwise
 # If Kind cluster was already created then it would be cleaned up in case of errors
 function setup_kind_cluster() {
@ -174,15 +177,6 @@ function setup_kind_cluster() {
    CONFIG=${DEFAULT_CLUSTER_YAML}
  fi

-  # Configure the cluster IP Family if explicitly set
-  if [ "${IP_FAMILY}" != "ipv4" ]; then
-    grep "ipFamily: ${IP_FAMILY}" "${CONFIG}" || \
-    cat <<EOF >> "${CONFIG}"
-networking:
-  ipFamily: ${IP_FAMILY}
-EOF
-  fi
-
  KIND_WAIT_FLAG="--wait=180s"
  KIND_DISABLE_CNI="false"
  if [[ -n "${KUBERNETES_CNI:-}" ]]; then
@ -191,16 +185,26 @@ EOF
  fi

  # Create KinD cluster
-  if ! (yq eval "${CONFIG}" --expression ".networking.disableDefaultCNI = ${KIND_DISABLE_CNI}" | \
+  if ! (yq eval "${CONFIG}" --expression ".networking.disableDefaultCNI = ${KIND_DISABLE_CNI}" \
+    --expression ".networking.ipFamily = \"${KIND_IP_FAMILY}\"" | \
    kind create cluster --name="${NAME}" -v4 --retain --image "${IMAGE}" ${KIND_WAIT_FLAG:+"$KIND_WAIT_FLAG"} --config -); then
    echo "Could not setup KinD environment. Something wrong with KinD setup. Exporting logs."
    return 9
+    # kubectl config set clusters.kind-istio-testing.server https://istio-testing-control-plane:6443
  fi
+
+  if [[ -n "${DEVCONTAINER:-}" ]]; then
+    # identify our docker container id using proc and regex
+    containerid=$(grep 'resolv.conf' /proc/self/mountinfo | sed 's/.*\/docker\/containers\/\([0-9a-f]*\).*/\1/')
+    docker network connect kind "$containerid"
+    kind export kubeconfig --name="${NAME}" --internal
+  fi
+
  # Workaround kind issue causing taints to not be removed in 1.24
  kubectl taint nodes "${NAME}"-control-plane node-role.kubernetes.io/control-plane- 2>/dev/null || true

  # Determine what CNI to install
-  case "${KUBERNETES_CNI:-}" in 
+  case "${KUBERNETES_CNI:-}" in

    "calico")
      echo "Installing Calico CNI"
@ -235,7 +239,7 @@ EOF
  # https://github.com/coredns/coredns/issues/2494#issuecomment-457215452
  # CoreDNS should handle those domains and answer with NXDOMAIN instead of SERVFAIL
  # otherwise pods stops trying to resolve the domain.
-  if [ "${IP_FAMILY}" = "ipv6" ] || [ "${IP_FAMILY}" = "dual" ]; then
+  if [ "${KIND_IP_FAMILY}" = "ipv6" ] || [ "${KIND_IP_FAMILY}" = "dual" ]; then
    # Get the current config
    original_coredns=$(kubectl get -oyaml -n=kube-system configmap/coredns)
    echo "Original CoreDNS config:"
@ -272,14 +276,14 @@ function cleanup_kind_clusters() {
 # setup_kind_clusters sets up a given number of kind clusters with given topology
 # as specified in cluster topology configuration file.
 # 1. IMAGE = docker image used as node by KinD
-# 2. IP_FAMILY = either ipv4 or ipv6
+# 2. KIND_IP_FAMILY = either ipv4 or ipv6 or dual
 #
 # NOTE: Please call load_cluster_topology before calling this method as it expects
 # cluster topology information to be loaded in advance
 function setup_kind_clusters() {
  IMAGE="${1:-"${DEFAULT_KIND_IMAGE}"}"
  KUBECONFIG_DIR="${ARTIFACTS:-$(mktemp -d)}/kubeconfig"
-  IP_FAMILY="${2:-ipv4}"
+  KIND_IP_FAMILY="${2:-ipv4}"

  check_default_cluster_yaml

--- a/common/scripts/lint_go.sh
+++ b/common/scripts/lint_go.sh
@ -21,8 +21,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+GOLANGCILINT_RUN_ARGS=(--output.text.path stdout --output.junit-xml.path "${ARTIFACTS}"/junit-lint.xml)
+
 if [[ "${ARTIFACTS}" != "" ]]; then
-  golangci-lint run -v -c ./common/config/.golangci.yml --out-format colored-line-number,junit-xml:"${ARTIFACTS}"/junit-lint.xml
+  golangci-lint run -v -c ./common/config/.golangci.yml "${GOLANGCILINT_RUN_ARGS[@]}"
 else
  golangci-lint run -v -c ./common/config/.golangci.yml
 fi
--- a/common/scripts/run.sh
+++ b/common/scripts/run.sh
@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash

 # WARNING: DO NOT EDIT, THIS FILE IS PROBABLY A COPY
 #
@ -36,7 +36,7 @@ MOUNT_DEST="${MOUNT_DEST:-/work}"

 read -ra DOCKER_RUN_OPTIONS <<< "${DOCKER_RUN_OPTIONS:-}"

-[[ -t 1 ]] && DOCKER_RUN_OPTIONS+=("-it")
+[[ -t 0 ]] && DOCKER_RUN_OPTIONS+=("-it")
 [[ ${UID} -ne 0 ]] && DOCKER_RUN_OPTIONS+=(-u "${UID}:${DOCKER_GID}")

 # $CONTAINER_OPTIONS becomes an empty arg when quoted, so SC2086 is disabled for the
@ -47,7 +47,9 @@ read -ra DOCKER_RUN_OPTIONS <<< "${DOCKER_RUN_OPTIONS:-}"
    "${DOCKER_RUN_OPTIONS[@]}" \
    --init \
    --sig-proxy=true \
+    --cap-add=SYS_ADMIN \
    ${DOCKER_SOCKET_MOUNT:--v /var/run/docker.sock:/var/run/docker.sock} \
+    -e DOCKER_HOST=${DOCKER_SOCKET_HOST:-unix:///var/run/docker.sock} \
    $CONTAINER_OPTIONS \
    --env-file <(env | grep -v ${ENV_BLOCKLIST}) \
    -e IN_BUILD_CONTAINER=1 \
--- a/common/scripts/setup_env.sh
+++ b/common/scripts/setup_env.sh
@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 # shellcheck disable=SC2034

 # WARNING: DO NOT EDIT, THIS FILE IS PROBABLY A COPY
@ -75,7 +75,7 @@ fi
 TOOLS_REGISTRY_PROVIDER=${TOOLS_REGISTRY_PROVIDER:-gcr.io}
 PROJECT_ID=${PROJECT_ID:-istio-testing}
 if [[ "${IMAGE_VERSION:-}" == "" ]]; then
-  IMAGE_VERSION=master-f24be7b713480aab44d862ac839ead0b5324d593
+  IMAGE_VERSION=master-8e6480403f5cf4c9a4cd9d65174d01850e632e1a
 fi
 if [[ "${IMAGE_NAME:-}" == "" ]]; then
  IMAGE_NAME=build-tools
@ -95,6 +95,19 @@ IMG="${IMG:-${TOOLS_REGISTRY_PROVIDER}/${PROJECT_ID}/${IMAGE_NAME}:${IMAGE_VERSI

 CONTAINER_CLI="${CONTAINER_CLI:-docker}"

+# Try to use the latest cached image we have. Use at your own risk, may have incompatibly-old versions
+if [[ "${LATEST_CACHED_IMAGE:-}" != "" ]]; then
+  prefix="$(<<<"$IMAGE_VERSION" cut -d- -f1)"
+  query="${TOOLS_REGISTRY_PROVIDER}/${PROJECT_ID}/${IMAGE_NAME}:${prefix}-*"
+  latest="$("${CONTAINER_CLI}" images --filter=reference="${query}" --format "{{.CreatedAt|json}}~{{.Repository}}:{{.Tag}}~{{.CreatedSince}}" | sort -n -r | head -n1)"
+  IMG="$(<<<"$latest" cut -d~ -f2)"
+  if [[ "${IMG}" == "" ]]; then
+    echo "Attempted to use LATEST_CACHED_IMAGE, but found no images matching ${query}" >&2
+    exit 1
+  fi
+  echo "Using cached image $IMG, created $(<<<"$latest" cut -d~ -f3)" >&2
+fi
+
 ENV_BLOCKLIST="${ENV_BLOCKLIST:-^_\|^PATH=\|^GOPATH=\|^GOROOT=\|^SHELL=\|^EDITOR=\|^TMUX=\|^USER=\|^HOME=\|^PWD=\|^TERM=\|^RUBY_\|^GEM_\|^rvm_\|^SSH=\|^TMPDIR=\|^CC=\|^CXX=\|^MAKEFILE_LIST=}"

 # Remove functions from the list of exported variables, they mess up with the `env` command.
--- a/deny.toml
+++ b/deny.toml
@ -4,7 +4,7 @@ targets = [
    { triple = "x86_64-unknown-linux-gnu" },
    { triple = "aarch64-unknown-linux-gnu" },
 ]
-features = ["tls-boring", "tls-ring"]
+features = ["tls-boring", "tls-ring", "tls-aws-lc", "tls-openssl" ]

 [advisories]
 version = 2
@ -32,6 +32,7 @@ allow = ["Apache-2.0",
    "OpenSSL",
    "PHP-3.0",
    "TCP-wrappers",
+    "Unicode-3.0",
    "W3C",
    "Xnet",
    "Zlib",
@ -54,25 +55,20 @@ skip = [
    { crate = "rand", version = "0.7.3", reason = "acceptable duplicate" },
    { crate = "regex-syntax", version = "0.6.29", reason = "acceptable duplicate" },
    { crate = "regex-automata", version = "0.1.10", reason = "acceptable duplicate" },
-    { crate = "rand_chacha", version = " 0.2.2", reason = "acceptable duplicate" },
+    { crate = "rand_chacha", version = "0.2.2", reason = "acceptable duplicate" },
+    { crate = "thiserror-impl", version = "1.0.69", reason = "acceptable duplicate" },
+    { crate = "thiserror", version = "1.0.69", reason = "acceptable duplicate" },
    { crate = "base64" },
    { crate = "bitflags" },
    { crate = "fastrand" },
    { crate = "getrandom" },
-    { crate = "hashbrown" },
-    { crate = "http" },
-    { crate = "http-body" },
-    { crate = "idna" },
-    { crate = "indexmap" },
    { crate = "itertools" },
    { crate = "memoffset" },
    { crate = "nix" },
-    { crate = "heck" },
 ]

 [sources]

 allow-git = [
    "https://github.com/janrueth/boring-rustls-provider",
-    "https://github.com/howardjohn/tower-hyper-http-body-compat"
 ]
--- a/examples/inpodserver.rs
+++ b/examples/inpodserver.rs
@ -0,0 +1,89 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::os::fd::AsRawFd;
+
+use ztunnel::test_helpers::inpod::StartZtunnelMessage;
+use ztunnel::{
+    inpod::istio::zds::WorkloadInfo,
+    test_helpers::inpod::{Message, start_ztunnel_server},
+};
+
+const PROXY_WORKLOAD_INFO: &str = "PROXY_WORKLOAD_INFO";
+
+#[cfg(target_os = "linux")]
+#[tokio::main]
+async fn main() {
+    let uds = std::env::var("INPOD_UDS").unwrap();
+    let pwi = match parse_proxy_workload_info() {
+        Ok(pwi) => pwi,
+        Err(e) => {
+            eprintln!("Failed to parse proxy workload info: {:?}", e);
+            return;
+        }
+    };
+    let netns = std::env::args().nth(1).unwrap();
+    let mut netns_base_dir = std::path::PathBuf::from("/var/run/netns");
+    netns_base_dir.push(netns);
+    let netns_file = std::fs::File::open(netns_base_dir).unwrap();
+
+    let fd = netns_file.as_raw_fd();
+
+    let mut sender = start_ztunnel_server(uds.into()).await;
+    sender
+        .send(Message::Start(StartZtunnelMessage {
+            uid: "uid-0".to_string(),
+            workload_info: Some(pwi),
+            fd,
+        }))
+        .await
+        .unwrap();
+    sender.wait_forever().await.unwrap();
+}
+
+fn parse_proxy_workload_info() -> Result<WorkloadInfo, Error> {
+    let pwi = match std::env::var(PROXY_WORKLOAD_INFO) {
+        Ok(val) => val,
+        Err(_) => {
+            // Provide a default WorkloadInfo value if the environment variable is not set.
+            return Ok(WorkloadInfo {
+                name: "local".to_string(),
+                namespace: "default".to_string(),
+                service_account: "default".to_string(),
+            });
+        }
+    };
+
+    let s: Vec<&str> = pwi.splitn(3, "/").collect();
+    let &[ns, name, sa] = &s[..] else {
+        return Err(Error::InvalidArgument(format!(
+            "{PROXY_WORKLOAD_INFO} must match the format 'namespace/name/service-account' (got {s:?})"
+        )));
+    };
+
+    Ok(WorkloadInfo {
+        name: name.to_string(),
+        namespace: ns.to_string(),
+        service_account: sa.to_string(),
+    })
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("invalid argument: {0}")]
+    InvalidArgument(String),
+}
+
+#[cfg(not(target_os = "linux"))]
+fn main() {}
--- a/examples/localhost.yaml
+++ b/examples/localhost.yaml
@ -31,7 +31,7 @@ workloads:
 policies:
  - action: Allow
    rules:
-    - - - not_destination_ports:
+    - - - notDestinationPorts:
          - 9999
    name: deny-9999
    namespace: default
--- a/fuzz/Cargo.lock
+++ b/fuzz/Cargo.lock
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@ -2,7 +2,7 @@
 name = "ztunnel-fuzz"
 version = "0.0.0"
 publish = false
-edition = "2021"
+edition = "2024"

 [package.metadata]
 cargo-fuzz = true
@ -10,8 +10,8 @@ cargo-fuzz = true
 [dependencies]
 hyper = "1.1"
 libfuzzer-sys = "0.4"
-prost = "0.12.1"
-anyhow = "1.0.65"
+prost = "0.13"
+anyhow = "1.0"

 [dependencies.ztunnel]
 path = ".."
--- a/fuzz/fuzz_targets/baggage.rs
+++ b/fuzz/fuzz_targets/baggage.rs
@ -14,13 +14,14 @@

 #![no_main]

-use hyper::{http::HeaderValue, HeaderMap};
+use hyper::{HeaderMap, http::HeaderValue};
 use libfuzzer_sys::fuzz_target;
 use ztunnel::baggage::parse_baggage_header;
 use ztunnel::proxy::BAGGAGE_HEADER;

 fuzz_target!(|data: &[u8]| {
    let _ = run_baggage_header_parser(data);
+    let _ = run_forwarded_header_parser(data);
 });

 fn run_baggage_header_parser(data: &[u8]) -> anyhow::Result<()> {
@ -29,3 +30,9 @@ fn run_baggage_header_parser(data: &[u8]) -> anyhow::Result<()> {
    parse_baggage_header(hm.get_all(BAGGAGE_HEADER))?;
    Ok(())
 }
+
+fn run_forwarded_header_parser(data: &[u8]) -> anyhow::Result<()> {
+    let s = std::str::from_utf8(data)?;
+    let _ = ztunnel::proxy::parse_forwarded_host(s);
+    Ok(())
+}
--- a/fuzz/fuzz_targets/protobuf.rs
+++ b/fuzz/fuzz_targets/protobuf.rs
@ -27,11 +27,11 @@ fuzz_target!(|data: &[u8]| {
 });

 fn run_workload(data: &[u8]) -> anyhow::Result<()> {
-    Workload::try_from(&XdsWorkload::decode(data)?)?;
+    Workload::try_from(XdsWorkload::decode(data)?)?;
    Ok(())
 }

 fn run_rbac(data: &[u8]) -> anyhow::Result<()> {
-    Authorization::try_from(&XdsAuthorization::decode(data)?)?;
+    Authorization::try_from(XdsAuthorization::decode(data)?)?;
    Ok(())
 }
--- a/images/ztunnel-blue-on-dark.png
+++ b/images/ztunnel-blue-on-dark.png
--- a/images/ztunnel-blue-on-dark.svg
+++ b/images/ztunnel-blue-on-dark.svg
@ -0,0 +1,4 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="712" height="712" fill="#151927"/>
+<path d="M355.982 149.89C255.737 149.89 174.181 231.446 174.181 331.691V534.901C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.901V331.691C228.563 261.429 285.72 204.273 355.982 204.273C426.244 204.273 483.4 261.429 483.4 331.691L483.31 507.71H324.222L448.324 346.991C456.263 336.93 457.822 322.954 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.118H379.511L255.41 507.855C247.47 517.915 245.911 531.892 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.446 456.245 149.89 355.982 149.89Z" fill="#466BB0"/>
+</svg>
--- a/images/ztunnel-blue-on-light.png
+++ b/images/ztunnel-blue-on-light.png
--- a/images/ztunnel-blue-on-light.svg
+++ b/images/ztunnel-blue-on-light.svg
@ -0,0 +1,4 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="712" height="712" fill="white"/>
+<path d="M355.982 149.89C255.737 149.89 174.181 231.446 174.181 331.691V534.901C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.901V331.691C228.563 261.429 285.72 204.273 355.982 204.273C426.244 204.273 483.4 261.429 483.4 331.691L483.31 507.71H324.222L448.324 346.991C456.263 336.93 457.822 322.954 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.118H379.511L255.41 507.855C247.47 517.915 245.911 531.892 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.446 456.245 149.89 355.982 149.89Z" fill="#466BB0"/>
+</svg>
--- a/images/ztunnel-blue-on-transparent.png
+++ b/images/ztunnel-blue-on-transparent.png
--- a/images/ztunnel-blue-on-transparent.svg
+++ b/images/ztunnel-blue-on-transparent.svg
@ -0,0 +1,3 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M355.982 149.89C255.737 149.89 174.181 231.445 174.181 331.691V534.9C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.9V331.691C228.563 261.428 285.72 204.272 355.982 204.272C426.244 204.272 483.4 261.428 483.4 331.691L483.31 507.709H324.222L448.324 346.99C456.263 336.93 457.822 322.953 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.117H379.511L255.41 507.854C247.47 517.915 245.911 531.891 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.445 456.245 149.89 355.982 149.89Z" fill="#466BB0"/>
+</svg>
--- a/images/ztunnel-dark-on-light.png
+++ b/images/ztunnel-dark-on-light.png
--- a/images/ztunnel-dark-on-light.svg
+++ b/images/ztunnel-dark-on-light.svg
@ -0,0 +1,4 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="712" height="712" fill="white"/>
+<path d="M355.982 149.89C255.737 149.89 174.181 231.445 174.181 331.691V534.9C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.9V331.691C228.563 261.428 285.72 204.272 355.982 204.272C426.244 204.272 483.4 261.428 483.4 331.691L483.31 507.709H324.222L448.324 346.99C456.263 336.93 457.822 322.953 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.117H379.511L255.41 507.854C247.47 517.915 245.911 531.891 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.445 456.245 149.89 355.982 149.89Z" fill="#151927"/>
+</svg>
--- a/images/ztunnel-dark-on-transparent.png
+++ b/images/ztunnel-dark-on-transparent.png
--- a/images/ztunnel-dark-on-transparent.svg
+++ b/images/ztunnel-dark-on-transparent.svg
@ -0,0 +1,3 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M355.982 149.89C255.737 149.89 174.181 231.445 174.181 331.691V534.9C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.9V331.691C228.563 261.428 285.72 204.272 355.982 204.272C426.244 204.272 483.4 261.428 483.4 331.691L483.31 507.709H324.222L448.324 346.99C456.263 336.93 457.822 322.953 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.117H379.511L255.41 507.854C247.47 517.915 245.911 531.891 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.445 456.245 149.89 355.982 149.89Z" fill="#151927"/>
+</svg>
--- a/images/ztunnel-gray-on-light.png
+++ b/images/ztunnel-gray-on-light.png
--- a/images/ztunnel-gray-on-light.svg
+++ b/images/ztunnel-gray-on-light.svg
@ -0,0 +1,4 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="712" height="712" fill="white"/>
+<path d="M355.982 149.89C255.737 149.89 174.181 231.445 174.181 331.691V534.9C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.9V331.691C228.563 261.428 285.72 204.272 355.982 204.272C426.244 204.272 483.4 261.428 483.4 331.691L483.31 507.709H324.222L448.324 346.99C456.263 336.93 457.822 322.953 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.117H379.511L255.41 507.854C247.47 517.915 245.911 531.891 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.445 456.245 149.89 355.982 149.89Z" fill="#67696D"/>
+</svg>
--- a/images/ztunnel-gray-on-transparent.png
+++ b/images/ztunnel-gray-on-transparent.png
--- a/images/ztunnel-gray-on-transparent.svg
+++ b/images/ztunnel-gray-on-transparent.svg
@ -0,0 +1,3 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M355.982 149.89C255.737 149.89 174.181 231.445 174.181 331.691V534.9C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.9V331.691C228.563 261.428 285.72 204.272 355.982 204.272C426.244 204.272 483.4 261.428 483.4 331.691L483.31 507.709H324.222L448.324 346.99C456.263 336.93 457.822 322.953 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.117H379.511L255.41 507.854C247.47 517.915 245.911 531.891 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.445 456.245 149.89 355.982 149.89Z" fill="#67696D"/>
+</svg>
--- a/images/ztunnel-light-on-blue.png
+++ b/images/ztunnel-light-on-blue.png
--- a/images/ztunnel-light-on-blue.svg
+++ b/images/ztunnel-light-on-blue.svg
@ -0,0 +1,4 @@
+<svg width="712" height="712" viewBox="0 0 712 712" fill="none" xmlns="http://www.w3.org/2000/svg">
+<rect width="712" height="712" fill="#466BB0"/>
+<path d="M355.982 149.89C255.737 149.89 174.181 231.446 174.181 331.691V534.901C174.181 549.91 186.362 562.092 201.372 562.092C216.382 562.092 228.563 549.91 228.563 534.901V331.691C228.563 261.429 285.72 204.273 355.982 204.273C426.244 204.273 483.4 261.429 483.4 331.691L483.31 507.71H324.222L448.324 346.991C456.263 336.93 457.822 322.954 452.239 311.225C446.819 299.823 435.797 292.735 423.453 292.735H262.244V347.118H379.511L255.41 507.855C247.47 517.915 245.911 531.892 251.495 543.62C256.915 555.022 267.936 562.11 280.281 562.11H483.346C513.383 562.11 537.819 537.674 537.819 507.637V331.691C537.801 231.446 456.245 149.89 355.982 149.89Z" fill="white"/>
+</svg>
--- a/proto/authorization.proto
+++ b/proto/authorization.proto
@ -57,6 +57,9 @@ message Match {
  repeated StringMatch namespaces = 1;
  repeated StringMatch not_namespaces = 2;

+  repeated ServiceAccountMatch service_accounts = 11;
+  repeated ServiceAccountMatch not_service_accounts = 12;
+
  repeated StringMatch principals = 3;
  repeated StringMatch not_principals = 4;

@ -75,6 +78,11 @@ message Address {
  uint32 length = 2;
 }

+message ServiceAccountMatch {
+  string namespace = 1;
+  string serviceAccount = 2;
+}
+
 message StringMatch {
  oneof match_type {
    // exact string match
--- a/proto/workload.proto
+++ b/proto/workload.proto
@ -15,6 +15,10 @@
 syntax = "proto3";

 package istio.workload;
+
+import "google/protobuf/wrappers.proto";
+import "google/protobuf/any.proto";
+
 option go_package="pkg/workloadapi";

 // Address represents a unique address.
@ -62,7 +66,7 @@ message Service {
  string hostname = 3;
  // Address represents the addresses the service can be reached at.
  // There may be multiple addresses for a single service if it resides in multiple networks,
-  // multiple clusters, and/or if it's dual stack (TODO: support dual stack).
+  // multiple clusters, and/or if it's dual stack.
  // For a headless kubernetes service, this list will be empty.
  repeated NetworkAddress addresses = 4;
  // Ports for the service.
@ -79,6 +83,32 @@ message Service {
  // Note: this applies only to connecting directly to the workload; when waypoints are used, the waypoint's load_balancing
  // configuration is used.
  LoadBalancing load_balancing = 8;
+
+  // IP families provides configuration about the IP families this service supports.
+  IPFamilies ip_families = 9;
+
+  // Extension provides a mechanism to attach arbitrary additional configuration to an object.
+  repeated Extension extensions = 10;
+}
+
+enum IPFamilies {
+  // AUTOMATIC is inferred from the configured addresses.
+  AUTOMATIC = 0;
+  // Only IPv4 is supported
+  IPV4_ONLY = 1;
+  // Only IPv6 is supported
+  IPV6_ONLY = 2;
+  // Both IPv4 and IPv6 is supported
+  DUAL = 3;
+}
+
+// NetworkMode indicates how the addresses of the workload should be treated.
+enum NetworkMode {
+  // STANDARD means that the workload is uniquely identified by its address (within its network).
+  STANDARD = 0;
+  // HOST_NETWORK means the workload has an IP address that is shared by many workloads. The data plane should avoid
+  // attempting to lookup these workloads by IP address (which could return the wrong result).
+  HOST_NETWORK = 1;
 }

 message LoadBalancing {
@ -114,12 +144,20 @@ message LoadBalancing {
    // 4. Any endpoints
    FAILOVER = 2;
  }
+  enum HealthPolicy {
+    // Only select healthy endpoints
+    ONLY_HEALTHY = 0;
+    // Include all endpoints, even if they are unhealthy.
+    ALLOW_ALL = 1;
+  }

  // routing_preference defines what scopes we want to keep traffic within.
  // The `mode` determines how these routing preferences are handled
  repeated Scope routing_preference = 1;
  // mode defines how we should handle the routing preferences.
  Mode mode = 2;
+  // health_policy defines how we should filter endpoints
+  HealthPolicy health_policy = 3;
 }

 // Workload represents a workload - an endpoint (or collection behind a hostname).
@ -162,7 +200,6 @@ message Workload {
  // a workload that backs a Kubernetes service will typically have only endpoints. A
  // workload that backs a headless Kubernetes service, however, will have both
  // addresses as well as a hostname used for direct access to the headless endpoint.
-  // TODO: support this field
  string hostname = 21;

  // Network represents the network this workload is on. This may be elided for the default network.
@ -204,10 +241,8 @@ message Workload {
  // * Requests *to* this workload, via the tunnel_protocol, do not need to be de-tunneled.
  bool native_tunnel = 14;

-  
-  // If an application, such as a sandwiched waypoint proxy, supports
-  // directly receiving information from zTunnel they can set application_protocol.
-  // This supersedes native_tunnel.
+  // If an application, such as a sandwiched waypoint proxy, supports directly
+  // receiving information from zTunnel they can set application_protocol.
  ApplicationTunnel application_tunnel = 23;

  // The services for which this workload is an endpoint.
@ -228,6 +263,16 @@ message Workload {
  // The Locality defines information about where a workload is geographically deployed
  Locality locality = 24;

+  NetworkMode network_mode = 25;
+
+  // Extension provides a mechanism to attach arbitrary additional configuration to an object.
+  repeated Extension extensions = 26;
+
+  // Capacity for this workload.
+  // This represents the amount of traffic the workload can handle, relative to other workloads
+  // If unset, the capacity is default to 1.
+  google.protobuf.UInt32Value capacity = 27;
+
  // Reservations for deleted fields.
  reserved 15;
 }
@ -274,7 +319,6 @@ enum TunnelProtocol {
  // Future options may include things like QUIC/HTTP3, etc.
 }

-
 // ApplicationProtocol specifies a workload  (application or gateway) can
 // consume tunnel information.
 message ApplicationTunnel {
@ -290,7 +334,7 @@ message ApplicationTunnel {
    PROXY = 1;
  }

-  // A target natively handles this type of traffic.  
+  // A target natively handles this type of traffic.
  Protocol protocol = 1;

  // optional: if set, traffic should be sent to this port after the last zTunnel hop
@ -307,10 +351,8 @@ message GatewayAddress {
  }
  // port to reach the gateway at for mTLS HBONE connections
  uint32 hbone_mtls_port = 3;
-  // port to reach the gateway at for single tls HBONE connections
-  // used for sending unauthenticated traffic originating outside the mesh to a waypoint-enabled destination
-  // A value of 0 = unset
-  uint32 hbone_single_tls_port = 4;
+  reserved "hbone_single_tls_port";
+  reserved 4;
 }

 // NetworkAddress represents an address bound to a specific network.
@ -328,3 +370,13 @@ message NamespacedHostname {
  // hostname (ex: gateway.example.com)
  string hostname = 2;
 }
+
+// Extension provides a mechanism to attach arbitrary additional configuration to an object.
+message Extension {
+  // name provides an opaque name for the extension.
+  // This may have semantic meaning or used for debugging.
+  // This should be unique amongst all extensions attached to an item.
+  string name = 1;
+  // config provides some opaque configuration.
+  google.protobuf.Any config = 2;
+}
--- a/proto/zds.proto
+++ b/proto/zds.proto
@ -16,10 +16,11 @@ message ZdsHello {
 }

 message WorkloadInfo {
+  reserved "trust_domain"; // Deprecated
+  reserved 4;
  string name = 1;
  string namespace = 2;
  string service_account = 3;
-  string trust_domain = 4;
 }

 // Add a workload to the ztunnel. this will be accompanied by ancillary data contianing
--- a/scripts/node-redirect.sh
+++ b/scripts/node-redirect.sh
@ -1,104 +0,0 @@
-#!/bin/bash
-
-# This script sets up redirection in the host network namespace for namespaced tests (tests/README.md)
-set -ex
-
-HOST_IP="$(ip -j addr | jq '.[] | select(.ifname == "eth0").addr_info[0].local' -r)"
-ZTUNNEL_IP="${1:?ztunnel IP}"
-ZTUNNEL_INTERFACE="${2:?ztunnel interface}"
-shift; shift;
-ipset create ztunnel-pods-ips hash:ip
-for ip in "$@"; do
-  ipset add ztunnel-pods-ips "${ip}"
-done
-
-# Setup interfaces
-ip link add name istioin type geneve id 1000 remote "${ZTUNNEL_IP}"
-ip addr add 192.168.126.1/30 dev istioin
-ip link set istioin up
-
-ip link add name istioout type geneve id 1001 remote "${ZTUNNEL_IP}"
-ip addr add 192.168.127.1/30 dev istioout
-ip link set istioout up
-
-cat <<EOF | iptables-restore -w
-*mangle
-:PREROUTING ACCEPT
-:INPUT ACCEPT
-:FORWARD ACCEPT
-:OUTPUT ACCEPT
-:POSTROUTING ACCEPT
-:ztunnel-FORWARD -
-:ztunnel-INPUT -
-:ztunnel-OUTPUT -
-:ztunnel-POSTROUTING -
-:ztunnel-PREROUTING -
-A PREROUTING -j ztunnel-PREROUTING
-A INPUT -j ztunnel-INPUT
-A FORWARD -j ztunnel-FORWARD
-A OUTPUT -j ztunnel-OUTPUT
-A POSTROUTING -j ztunnel-POSTROUTING
-A ztunnel-FORWARD -m mark --mark 0x220/0x220 -j CONNMARK --save-mark --nfmask 0x220 --ctmask 0x220
-A ztunnel-FORWARD -m mark --mark 0x210/0x210 -j CONNMARK --save-mark --nfmask 0x210 --ctmask 0x210
-A ztunnel-INPUT -m mark --mark 0x220/0x220 -j CONNMARK --save-mark --nfmask 0x220 --ctmask 0x220
-A ztunnel-INPUT -m mark --mark 0x210/0x210 -j CONNMARK --save-mark --nfmask 0x210 --ctmask 0x210
-A ztunnel-OUTPUT -s ${HOST_IP}/32 -j MARK --set-xmark 0x220/0xffffffff
-A ztunnel-PREROUTING -i istioin -j MARK --set-xmark 0x200/0x200
-A ztunnel-PREROUTING -i istioin -j RETURN
-A ztunnel-PREROUTING -i istioout -j MARK --set-xmark 0x200/0x200
-A ztunnel-PREROUTING -i istioout -j RETURN
-A ztunnel-PREROUTING -p udp -m udp --dport 6081 -j RETURN
-A ztunnel-PREROUTING -m connmark --mark 0x220/0x220 -j MARK --set-xmark 0x200/0x200
-A ztunnel-PREROUTING -m mark --mark 0x200/0x200 -j RETURN
-A ztunnel-PREROUTING ! -i ${ZTUNNEL_INTERFACE} -m connmark --mark 0x210/0x210 -j MARK --set-xmark 0x40/0x40
-A ztunnel-PREROUTING -m mark --mark 0x40/0x40 -j RETURN
-A ztunnel-PREROUTING ! -s ${ZTUNNEL_IP}/32 -i ${ZTUNNEL_INTERFACE} -j MARK --set-xmark 0x210/0x210
-A ztunnel-PREROUTING -m mark --mark 0x200/0x200 -j RETURN
-A ztunnel-PREROUTING -i ${ZTUNNEL_INTERFACE} -j MARK --set-xmark 0x220/0x220
-A ztunnel-PREROUTING -p udp -j MARK --set-xmark 0x220/0x220
-A ztunnel-PREROUTING -m mark --mark 0x200/0x200 -j RETURN
-A ztunnel-PREROUTING -p tcp -m set --match-set ztunnel-pods-ips src -j MARK --set-xmark 0x100/0x100
-COMMIT
-*nat
-:PREROUTING ACCEPT
-:INPUT ACCEPT
-:OUTPUT ACCEPT
-:POSTROUTING ACCEPT
-:ztunnel-POSTROUTING -
-:ztunnel-PREROUTING -
-A PREROUTING -j ztunnel-PREROUTING
-A POSTROUTING -j ztunnel-POSTROUTING
-A ztunnel-POSTROUTING -m mark --mark 0x100/0x100 -j ACCEPT
-A ztunnel-PREROUTING -m mark --mark 0x100/0x100 -j ACCEPT
-COMMIT
-EOF
-
-ip route add table 101 "${ZTUNNEL_IP}" dev "${ZTUNNEL_INTERFACE}" scope link
-ip route add table 101 0.0.0.0/0 via 192.168.127.2 dev istioout
-ip route add table 102 "${ZTUNNEL_IP}" dev "${ZTUNNEL_INTERFACE}" scope link
-ip route add table 102 0.0.0.0/0 via "${ZTUNNEL_IP}" dev "${ZTUNNEL_INTERFACE}" onlink
-ip route add table 100 "${ZTUNNEL_IP}" dev "${ZTUNNEL_INTERFACE}" scope link
-for ip in "$@"; do
-  ip route add table 100 "${ip}/32" via 192.168.126.2 dev istioin src "$HOST_IP"
-done
-
-ip rule add priority 100 fwmark 0x200/0x200 goto 32766
-ip rule add priority 101 fwmark 0x100/0x100 lookup 101
-ip rule add priority 102 fwmark 0x040/0x040 lookup 102
-ip rule add priority 103 table 100
-
-#IPTABLES=iptables-legacy
-#$IPTABLES -t mangle -I PREROUTING -j LOG --log-prefix "mangle pre [node] "
-#$IPTABLES -t mangle -I POSTROUTING -j LOG --log-prefix "mangle post [node] "
-#$IPTABLES -t mangle -I INPUT -j LOG --log-prefix "mangle inp [node] "
-#$IPTABLES -t mangle -I OUTPUT -j LOG --log-prefix "mangle out [node] "
-#$IPTABLES -t mangle -I FORWARD -j LOG --log-prefix "mangle fw [node] "
-#$IPTABLES -t nat -I POSTROUTING -j LOG --log-prefix "nat post [node] "
-#$IPTABLES -t nat -I INPUT -j LOG --log-prefix "nat inp [node] "
-#$IPTABLES -t nat -I OUTPUT -j LOG --log-prefix "nat out [node] "
-#$IPTABLES -t nat -I PREROUTING -j LOG --log-prefix "nat pre [node] "
-#$IPTABLES -t raw -I PREROUTING -j LOG --log-prefix "raw pre [node] "
-#$IPTABLES -t raw -I OUTPUT -j LOG --log-prefix "raw out [node] "
-#$IPTABLES -t filter -I FORWARD -j LOG --log-prefix "filt fw [node] "
-#$IPTABLES -t filter -I OUTPUT -j LOG --log-prefix "filt out [node] "
-#$IPTABLES -t filter -I INPUT -j LOG --log-prefix "filt inp [node] "
--- a/scripts/release.sh
+++ b/scripts/release.sh
@ -20,20 +20,28 @@ WD=$(dirname "$0")
 WD=$(cd "$WD" || exit; pwd)

 case $(uname -m) in
-    x86_64)
-      export ARCH=amd64;;
-    aarch64)
-      export ARCH=arm64
-      # TODO(https://github.com/istio/ztunnel/issues/357) clean up this hack
-      sed -i 's/x86_64/arm64/g' .cargo/config.toml
-      ;;
-    *) echo "unsupported architecture"; exit 1 ;;
+  x86_64) export ARCH=amd64;;
+  aarch64) export ARCH=arm64 ;;
+  *) echo "unsupported architecture"; exit 1;;
 esac

-cargo build --release
+if [[ "$TLS_MODE" == "boring" ]]; then
+  if [[ "$ARCH" == "arm64" ]]; then
+    # TODO(https://github.com/istio/ztunnel/issues/357) clean up this hack
+    sed -i 's/x86_64/arm64/g' .cargo/config.toml
+  fi
+  cargo build --release --no-default-features -F tls-boring
+elif [[ "$TLS_MODE" == "aws-lc" ]]; then
+  cargo build --release --no-default-features -F tls-aws-lc
+elif [[ "$TLS_MODE" == "openssl" ]]; then
+  cargo build --release --no-default-features -F tls-openssl
+else
+  cargo build --release
+fi

 SHA="$(git rev-parse --verify HEAD)"
-RELEASE_NAME="ztunnel-${SHA}-${ARCH}"
+BINARY_PREFIX=${BINARY_PREFIX:-"ztunnel"}
+RELEASE_NAME="${BINARY_PREFIX}-${SHA}-${ARCH}"
 ls -lh "${WD}/../out/rust/release/ztunnel"
 DEST="${DEST:-gs://istio-build/ztunnel}"
 if [[ "$CI" == "" && "$DEST" == "gs://istio-build/ztunnel" ]]; then
--- a/scripts/run-cached.sh
+++ b/scripts/run-cached.sh
@ -0,0 +1,81 @@
+#!/bin/bash
+set -e
+
+SCRIPT_INPUT=("$@")
+
+# Function to log error and execute command
+function error_and_exec() {
+    echo "Error: $1" >&2
+    exec "${SCRIPT_INPUT[@]}"
+}
+
+# Check if RUST_CACHE_DIR is set
+if [ -z "${RUST_CACHE_DIR}" ]; then
+    error_and_exec "RUST_CACHE_DIR is not set" "$@"
+fi
+
+# Check if out/ directory exists
+if [ -d "out" ]; then
+    error_and_exec "out/ directory already exists" "$@"
+fi
+
+# Get current branch name
+if [ -z "${PULL_BASE_REF}" ]; then
+    error_and_exec "Could not determine current branch" "$@"
+fi
+
+# Make sure we are on presubmit
+if [ "${JOB_TYPE}" != "presubmit" ]; then
+    error_and_exec "Caching only available on presubmit" "$@"
+fi
+
+CACHE_DIR="${RUST_CACHE_DIR}/${PULL_BASE_REF}"
+
+
+# Strip binaries to keep things smaller
+cat <<EOF > ~/.cargo/config.toml
+[target.'cfg(debug_assertions)']
+rustflags = ["-C", "strip=debuginfo"]
+EOF
+# Check if branch cache exists
+if [ ! -d "${CACHE_DIR}" ]; then
+    # Not an error, we may need to populate it the first time
+    echo "Cache for branch ${PULL_BASE_REF} not found, we will populate it"  >&2
+else
+    echo "Found cache for branch ${PULL_BASE_REF}, copying it"  >&2
+    # Copy cache to out directory
+    mkdir -p out
+    cp -ar "${CACHE_DIR}" out/rust
+    echo "Cache size: $(du -sh out/rust)"  >&2
+fi
+
+# Run the provided command
+"$@"
+
+# Clean up everything except build and deps directories
+find out/rust -mindepth 1 -maxdepth 1 -type d \
+  ! -path "out/rust/debug" \
+  -exec rm -rf {} +
+
+find out/rust -mindepth 2 -maxdepth 2 -type d \
+    ! -path "out/rust/debug/build" \
+    ! -path "out/rust/debug/deps" \
+    ! -path "out/rust/debug/.fingerprint" \
+    -exec rm -rf {} +
+
+# Update the cache with our state
+tmp="${RUST_CACHE_DIR}/${RANDOM}"
+tmp_to_delete="${RUST_CACHE_DIR}/${RANDOM}"
+
+echo "Backing up cache"  >&2
+
+# Move our cache into the volume (this is slow since its cross-filesystem)
+mv out/rust "${tmp}"
+# Move the existing cache - we would delete it now, but mv is faster than delete, so we do this later
+# to prevent the time period when things are down
+# Note: we could use `exch` here in the future, but its not in our Ubuntu version
+mv "${CACHE_DIR}" "${tmp_to_delete}" || true
+# Populate the cache with our new info
+mv "${tmp}" "${CACHE_DIR}"
+# Remove the old one
+rm -rf "${tmp_to_delete}" || true
--- a/scripts/test-with-coverage.sh
+++ b/scripts/test-with-coverage.sh
@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+# shellcheck disable=SC2046,SC2086
+
+# Copyright Istio Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -xe
+
+ARTIFACTS="${ARTIFACTS:-out/coverage}"
+output_dir=$ARTIFACTS/report
+# Where to store merged coverage file 
+profdata=out/coverage/ztunnel.profdata
+# Where to store intermediate *.profraw files
+profiles=out/coverage/profiles
+
+export LLVM_PROFILE_FILE="$profiles/profile_%m_%p.profraw"
+# Enable coverage
+export RUSTFLAGS="-C instrument-coverage"
+export RUST_BACKTRACE=1
+RUSTUP_DEFAULT_TOOLCHAIN="$(rustup show active-toolchain | awk '{print $1}')"
+RUSTUP_LLVM_BIN_PATH="${HOME}/.rustup/toolchains/${RUSTUP_DEFAULT_TOOLCHAIN}/lib/rustlib/$(arch)-unknown-linux-gnu/bin"
+
+# Clean directory
+rm -rf "$profdata" "$profiles"
+mkdir -p "$profiles"
+
+echo "Running tests"
+cargo test --benches --tests --bins $FEATURES
+
+# Merge profraw data
+echo "Merging profraw files in $profiles to $profdata"
+${RUSTUP_LLVM_BIN_PATH}/llvm-profdata merge -sparse $(find "$profiles" -name '*.profraw') -o $profdata
+
+# Taken from 
+# https://doc.rust-lang.org/rustc/instrument-coverage.html#tips-for-listing-the-binaries-automatically
+test_bins=$(cargo test --benches --tests --bins --no-run --message-format=json $FEATURES \
+| jq -r "select(.profile.test == true) | .filenames[]" \
+| grep -v dSYM -)
+
+objs=""
+for file in $test_bins
+do
+    objs="$objs $(printf "%s %s " -object "$file")"
+done
+echo $objs
+
+echo "Publishing coverage report to $output_dir"
+
+${RUSTUP_LLVM_BIN_PATH}/llvm-cov show \
+    -instr-profile="$profdata" \
+    $objs \
+    -Xdemangler=rustfilt \
+    -format=html \
+    -sources=$(find src -name '*.rs') \
+    -output-dir="$output_dir"
--- a/scripts/tproxy.sh
+++ b/scripts/tproxy.sh
@ -1,82 +0,0 @@
-#!/usr/bin/env bash
-
-# Init the base set of tables and routes
-init() {
-  # Anything with the mark 15001 will be sent to loopback
-  ip -4 rule add fwmark 15001 lookup 15001
-  ip -4 route add local default dev lo table 15001
-
-  iptables -t mangle -N ZT_CAPTURE_EGRESS
-  iptables -t mangle -A ZT_CAPTURE_EGRESS -j MARK --set-mark 15001
-
-  # PREROUTING on loopback - anything routed by the route table 15001, based on OUTPUT mark
-  # Ignore local source or dst - it's not egress
-  iptables -t mangle -N ZT_TPROXY
-  iptables -t mangle -A ZT_TPROXY -d 127.0.0.0/8 -j RETURN
-  iptables -t mangle -A ZT_TPROXY --match mark --mark 15001 -p tcp  -j TPROXY --tproxy-mark 15001/0xffffffff --on-port 15001
-  iptables -t mangle -A PREROUTING -i lo -j ZT_TPROXY
-
-
-  # Table that determines who gets redirected
-  iptables -t mangle -N ZT_EGRESS
-  iptables -t mangle -A OUTPUT  -j ZT_EGRESS
-}
-
-init6() {
-  # Anything with the mark 15001 will be sent to loopback
-  ip -6 rule add fwmark 15001 lookup 15001
-  ip -6 route add local default dev lo table 15001
-
-  ip6tables -t mangle -N ZT_CAPTURE_EGRESS
-  ip6tables -t mangle -A ZT_CAPTURE_EGRESS -j MARK --set-mark 15001
-
-  # PREROUTING on loopback - anything routed by the route table 15001, based on OUTPUT mark
-  # Ignore local source or dst - it's not egress
-  ip6tables -t mangle -N ZT_TPROXY
-  ip6tables -t mangle -A ZT_TPROXY -d ::1/128 -j RETURN
-  ip6tables -t mangle -A ZT_TPROXY --match mark --mark 15001 -p tcp  -j TPROXY --tproxy-mark 15001/0xffffffff --on-port 15001
-  ip6tables -t mangle -A PREROUTING -i lo -j ZT_TPROXY
-
-
-  # Table that determines who gets redirected
-  ip6tables -t mangle -N ZT_EGRESS
-  ip6tables -t mangle -A OUTPUT  -j ZT_EGRESS
-}
-
-
-# Clean the configurable table for outbound capture
-clean() {
-  iptables -t mangle -F ZT_EGRESS
-  ip6tables -t mangle -F ZT_EGRESS
-}
-
-# Setup outbound capture
-setup() {
-  iptables -t mangle -A ZT_EGRESS  -p tcp --dport 15001 -j RETURN
-  iptables -t mangle -A ZT_EGRESS  -p tcp --dport 15009 -j RETURN
-  iptables -t mangle -A ZT_EGRESS  -p tcp --dport 15008 -j RETURN
-
-  iptables -t mangle -A ZT_EGRESS -m owner --uid-owner 0 -j RETURN
-
-  # For now capture only 10, to avoid breaking internet requests.
-  # Will need to be expanded
-  iptables -t mangle -A ZT_EGRESS -d 10.0.0.0/8 -j ZT_CAPTURE_EGRESS
-  iptables -t mangle -A ZT_EGRESS -d 142.251.46.228/32 -j ZT_CAPTURE_EGRESS
-}
-
-setup6() {
-  ip6tables -t mangle -A ZT_EGRESS  -p tcp --dport 15001 -j RETURN
-  ip6tables -t mangle -A ZT_EGRESS  -p tcp --dport 15009 -j RETURN
-  ip6tables -t mangle -A ZT_EGRESS  -p tcp --dport 15008 -j RETURN
-
-  ip6tables -t mangle -A ZT_EGRESS -m owner --uid-owner 0 -j RETURN
-
-  # For now capture only 10, to avoid breaking internet requests.
-  # Will need to be expanded
-  ip6tables -t mangle -A ZT_EGRESS -d fc::/7 -j ZT_CAPTURE_EGRESS
-  ip6tables -t mangle -A ZT_EGRESS -d fe:c0::/10 -j ZT_CAPTURE_EGRESS
-}
-
-if [[ "$1" != "" ]]; then
-  $1
-fi
--- a/scripts/ztunnel-redirect-inpod.sh
+++ b/scripts/ztunnel-redirect-inpod.sh
@ -4,46 +4,44 @@

 set -ex

-# Below is from config.sh but used in redirect-worker.sh as well
-POD_OUTBOUND=15001
-POD_INBOUND=15008
-POD_INBOUND_PLAINTEXT=15006
-
 # CONNMARK is needed to make original src work. we set conn mark on prerouting. this is will not effect connections
 # from ztunnel to outside the pod, which will go on OUTPUT chain.
 # as we are in the pod ns, we can use whatever iptables is default.
-iptables-restore --wait 10 <<EOF
-# Generated by iptables-save v1.8.9 (nf_tables) on Thu Jun 22 11:52:46 2023
+iptables-restore --wait 10 --noflush <<EOF
 *mangle
 :PREROUTING ACCEPT [0:0]
 :INPUT ACCEPT [0:0]
 :FORWARD ACCEPT [0:0]
 :OUTPUT ACCEPT [0:0]
 :POSTROUTING ACCEPT [0:0]
-
-A PREROUTING -m mark --mark 1337/0xfff -j CONNMARK --set-xmark 0x111/0xfff
-A PREROUTING -p tcp -m tcp --dport $POD_INBOUND -m mark ! --mark 1337/0xfff -j TPROXY --on-port $POD_INBOUND --on-ip 127.0.0.1 --tproxy-mark 0x111/0xfff
-A PREROUTING -p tcp -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-A PREROUTING ! -d 127.0.0.1/32 -p tcp -m mark ! --mark 1337/0xfff -j TPROXY --on-port $POD_INBOUND_PLAINTEXT --on-ip 127.0.0.1 --tproxy-mark 0x111/0xfff
-A OUTPUT -m connmark --mark 0x111/0xfff -j CONNMARK --restore-mark --nfmask 0xffffffff --ctmask 0xffffffff
+:ISTIO_OUTPUT - [0:0]
+:ISTIO_PRERT - [0:0]
+-A PREROUTING -j ISTIO_PRERT
+-A OUTPUT -j ISTIO_OUTPUT
+-A ISTIO_OUTPUT -m connmark --mark 0x111/0xfff -j CONNMARK --restore-mark --nfmask 0xffffffff --ctmask 0xffffffff
+-A ISTIO_PRERT -m mark --mark 0x539/0xfff -j CONNMARK --set-xmark 0x111/0xfff
 COMMIT
-# Completed on Thu Jun 22 11:52:46 2023
-# Generated by iptables-save v1.8.9 (nf_tables) on Thu Jun 22 11:52:46 2023
 *nat
 :PREROUTING ACCEPT [0:0]
 :INPUT ACCEPT [0:0]
 :OUTPUT ACCEPT [0:0]
 :POSTROUTING ACCEPT [0:0]
-:ISTIO_REDIRECT - [0:0]
-A OUTPUT -p tcp -j ISTIO_REDIRECT
-A ISTIO_REDIRECT -p tcp -m mark --mark 0x111/0xfff -j ACCEPT
-A ISTIO_REDIRECT -p tcp -m mark ! --mark 1337/0xfff -j REDIRECT --to-ports $POD_OUTBOUND
+:ISTIO_OUTPUT - [0:0]
+:ISTIO_PRERT - [0:0]
+-A OUTPUT -j ISTIO_OUTPUT
+-A PREROUTING -j ISTIO_PRERT
+-A ISTIO_OUTPUT -d 169.254.7.127/32 -p tcp -m tcp -j ACCEPT
+-A ISTIO_OUTPUT ! -o lo -p udp -m mark ! --mark 0x539/0xfff -m udp --dport 53 -j REDIRECT --to-ports 15053
+-A ISTIO_OUTPUT ! -d 127.0.0.1/32 -p tcp -m tcp --dport 53 -m mark ! --mark 0x539/0xfff -j REDIRECT --to-ports 15053
+-A ISTIO_OUTPUT -p tcp -m mark --mark 0x111/0xfff -j ACCEPT
+-A ISTIO_OUTPUT ! -d 127.0.0.1/32 -o lo -j ACCEPT
+-A ISTIO_OUTPUT ! -d 127.0.0.1/32 -p tcp -m mark ! --mark 0x539/0xfff -j REDIRECT --to-ports 15001
+-A ISTIO_PRERT -s 169.254.7.127/32 -p tcp -m tcp -j ACCEPT
+-A ISTIO_PRERT ! -d 127.0.0.1/32 -p tcp ! --dport 15008 -m mark ! --mark 0x539/0xfff -j REDIRECT --to-ports 15006
 COMMIT
-# Completed on Thu Jun 22 11:52:46 2023
 EOF

-
 ip route add local 0.0.0.0/0 dev lo table 100 || :

 # tproxy and original src
-ip rule add fwmark 0x111/0xfff pref 32764 lookup 100 || :
+ip rule add fwmark 0x111/0xfff pref 32764 lookup 100 || :
--- a/scripts/ztunnel-redirect.sh
+++ b/scripts/ztunnel-redirect.sh
@ -1,55 +1,13 @@
 #!/bin/bash
 # shellcheck disable=SC2086
-# This script sets up redirection in the ztunnel network namespace for namespaced tests (tests/README.md)
+# This script sets up redirection in the ztunnel network namespace for namespaced tests for dedicated mode (tests/README.md)
+# See ztunnel-redirect-inpod.sh for inpod mode.
 set -ex

-INSTANCE_IP="${1:?INSTANCE_IP}"
-shift
-
-
-# tproxy mark, it's only used here.
-MARK=0x400/0xfff
-ORG_SRC_RET_MARK=0x4d3/0xfff
-
-# Below is from config.sh but used in redirect-worker.sh as well
+# Mark ztunnel will set
+MARK=0x539/0xfff
+# Port used for outbound traffic
 POD_OUTBOUND=15001
-POD_INBOUND=15008
-POD_INBOUND_PLAINTEXT=15006
-
-INBOUND_TUN=istioin
-OUTBOUND_TUN=istioout
-
-# TODO: look into why link local (169.254.x.x) address didn't work
-# they don't respond to ARP.
-INBOUND_TUN_IP=192.168.126.1
-ZTUNNEL_INBOUND_TUN_IP=192.168.126.2
-OUTBOUND_TUN_IP=192.168.127.1
-ZTUNNEL_OUTBOUND_TUN_IP=192.168.127.2
-TUN_PREFIX=30
-
-HOST_IP=$(ip route | grep default | awk '{print $3}')
-
-ip link add name p$INBOUND_TUN type geneve id 1000 remote $HOST_IP
-ip addr add $ZTUNNEL_INBOUND_TUN_IP/$TUN_PREFIX dev p$INBOUND_TUN
-
-ip link add name p$OUTBOUND_TUN type geneve id 1001 remote $HOST_IP
-ip addr add $ZTUNNEL_OUTBOUND_TUN_IP/$TUN_PREFIX dev p$OUTBOUND_TUN
-
-ip link set p$INBOUND_TUN up
-ip link set p$OUTBOUND_TUN up
-
-echo 0 > /proc/sys/net/ipv4/conf/p$INBOUND_TUN/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/p$OUTBOUND_TUN/rp_filter
-
-ip rule add priority 20000 fwmark $MARK lookup 100
-ip rule add priority 20003 fwmark $ORG_SRC_RET_MARK lookup 100
-ip route add local 0.0.0.0/0 dev lo table 100
-
-ip route add table 101 $HOST_IP dev eth0 scope link
-ip route add table 101 0.0.0.0/0 via $OUTBOUND_TUN_IP dev p$OUTBOUND_TUN
-
-ip route add table 102 $HOST_IP dev eth0 scope link
-ip route add table 102 0.0.0.0/0 via $INBOUND_TUN_IP dev p$INBOUND_TUN

 set +e
 num_legacy_lines=$( (iptables-legacy-save || true; ip6tables-legacy-save || true) 2>/dev/null | grep -c '^-')
@ -78,30 +36,7 @@ set -e

 $IPTABLES -w -t mangle -F PREROUTING
 $IPTABLES -w -t nat -F OUTPUT
-
-$IPTABLES -w -t mangle -A PREROUTING -p tcp -i p$INBOUND_TUN -m tcp --dport=$POD_INBOUND -j TPROXY --tproxy-mark $MARK --on-port $POD_INBOUND --on-ip 127.0.0.1
-$IPTABLES -w -t mangle -A PREROUTING -p tcp -i p$OUTBOUND_TUN -j TPROXY --tproxy-mark $MARK --on-port $POD_OUTBOUND --on-ip 127.0.0.1
-$IPTABLES -w -t mangle -A PREROUTING -p tcp -i p$INBOUND_TUN -j TPROXY --tproxy-mark $MARK --on-port $POD_INBOUND_PLAINTEXT --on-ip 127.0.0.1
-
-$IPTABLES -w -t mangle -A PREROUTING -p tcp -i eth0 ! --dst $INSTANCE_IP -j MARK --set-mark $ORG_SRC_RET_MARK
-
-# With normal linux routing we need to disable the rp_filter
-# as we get packets from a tunnel that doesn't have default routes.
-echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/eth0/rp_filter
-
-#$IPTABLES -t mangle -I PREROUTING -j LOG --log-prefix "mangle pre [zt] "
-#$IPTABLES -t mangle -I POSTROUTING -j LOG --log-prefix "mangle post [zt] "
-#$IPTABLES -t mangle -I INPUT -j LOG --log-prefix "mangle inp [zt] "
-#$IPTABLES -t mangle -I OUTPUT -j LOG --log-prefix "mangle out [zt] "
-#$IPTABLES -t mangle -I FORWARD -j LOG --log-prefix "mangle fw [zt] "
-#$IPTABLES -t nat -I POSTROUTING -j LOG --log-prefix "nat post [zt] "
-#$IPTABLES -t nat -I INPUT -j LOG --log-prefix "nat inp [zt] "
-#$IPTABLES -t nat -I OUTPUT -j LOG --log-prefix "nat out [zt] "
-#$IPTABLES -t nat -I PREROUTING -j LOG --log-prefix "nat pre [zt] "
-#$IPTABLES -t raw -I PREROUTING -j LOG --log-prefix "raw pre [zt] "
-#$IPTABLES -t raw -I OUTPUT -j LOG --log-prefix "raw out [zt] "
-#$IPTABLES -t filter -I FORWARD -j LOG --log-prefix "filt fw [zt] "
-#$IPTABLES -t filter -I OUTPUT -j LOG --log-prefix "filt out [zt] "
-#$IPTABLES -t filter -I INPUT -j LOG --log-prefix "filt inp [zt] "
+# Redirect outbound traffic that is NOT from ztunnel (identified by mark)
+# We do not currently bother redirecting inbound traffic since we don't test it, but a more complete solution would.
+# Note: in real world, this would be a UID/GID match like sidecars. Setting mark is enabled only for testing (for now?)
+$IPTABLES -w -t nat -A OUTPUT -p tcp ! -o lo -m mark ! --mark $MARK -j REDIRECT --to-ports "${POD_OUTBOUND}"
--- a/src/admin.rs
+++ b/src/admin.rs
@ -13,7 +13,7 @@
 // limitations under the License.

 use crate::config::Config;
-use crate::hyper_util::{empty_response, plaintext_response, Server};
+use crate::hyper_util::{Server, empty_response, plaintext_response};
 use crate::identity::SecretManager;
 use crate::state::DemandProxyState;
 use crate::tls::Certificate;
@ -23,11 +23,9 @@ use crate::{signal, telemetry};

 use base64::engine::general_purpose::STANDARD;
 use bytes::Bytes;
-use drain::Watch;
 use http_body_util::Full;
 use hyper::body::Incoming;
-use hyper::{header::HeaderValue, header::CONTENT_TYPE, Request, Response};
-use pprof::protos::Message;
+use hyper::{Request, Response, header::CONTENT_TYPE, header::HeaderValue};
 use std::borrow::Borrow;
 use std::collections::HashMap;

@ -36,22 +34,12 @@ use std::sync::Arc;
 use std::time::SystemTime;
 use std::{net::SocketAddr, time::Duration};

+use crate::drain::DrainWatcher;
 use tokio::time;
 use tracing::{error, info, warn};
+use tracing_subscriber::filter;

 pub trait AdminHandler: Sync + Send {
-    fn path(&self) -> &'static str;
-    fn description(&self) -> &'static str;
-    // sadly can't use async trait because no Sync
-    // see: https://github.com/dtolnay/async-trait/issues/248, https://github.com/dtolnay/async-trait/issues/142
-    // we can't use FutureExt::shared because our result is not clonable
-    fn handle(
-        &self,
-        req: Request<Incoming>,
-    ) -> std::pin::Pin<Box<dyn futures_util::Future<Output = Response<Full<Bytes>>> + Sync + Send>>;
-}
-
-pub trait AdminHandler2: Sync + Send {
    fn key(&self) -> &'static str;
    // sadly can't use async trait because no Sync
    // see: https://github.com/dtolnay/async-trait/issues/248, https://github.com/dtolnay/async-trait/issues/142
@ -61,24 +49,24 @@ pub trait AdminHandler2: Sync + Send {

 struct State {
    proxy_state: DemandProxyState,
-    config: Config,
+    config: Arc<Config>,
    shutdown_trigger: signal::ShutdownTrigger,
    cert_manager: Arc<SecretManager>,
-    handlers: Vec<Arc<dyn AdminHandler2>>,
+    handlers: Vec<Arc<dyn AdminHandler>>,
 }

 pub struct Service {
    s: Server<State>,
 }

-#[derive(serde::Serialize, Debug, Clone)]
+#[derive(serde::Serialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct ConfigDump {
    #[serde(flatten)]
    proxy_state: DemandProxyState,
    static_config: LocalConfig,
    version: BuildInfo,
-    config: Config,
+    config: Arc<Config>,
    certificates: Vec<CertsDump>,
 }

@ -98,14 +86,15 @@ pub struct CertsDump {
    identity: String,
    state: String,
    cert_chain: Vec<CertDump>,
+    root_certs: Vec<CertDump>,
 }

 impl Service {
    pub async fn new(
-        config: Config,
+        config: Arc<Config>,
        proxy_state: DemandProxyState,
        shutdown_trigger: signal::ShutdownTrigger,
-        drain_rx: Watch,
+        drain_rx: DrainWatcher,
        cert_manager: Arc<SecretManager>,
    ) -> anyhow::Result<Self> {
        Server::<State>::bind(
@ -128,14 +117,16 @@ impl Service {
        self.s.address()
    }

-    pub fn add_handler(&mut self, handler: Arc<dyn AdminHandler2>) {
+    pub fn add_handler(&mut self, handler: Arc<dyn AdminHandler>) {
        self.s.state_mut().handlers.push(handler);
    }

    pub fn spawn(self) {
        self.s.spawn(|state, req| async move {
            match req.uri().path() {
+                #[cfg(target_os = "linux")]
                "/debug/pprof/profile" => handle_pprof(req).await,
+                #[cfg(target_os = "linux")]
                "/debug/pprof/heap" => handle_jemalloc_pprof_heapgen(req).await,
                "/quitquitquit" => Ok(handle_server_shutdown(
                    state.shutdown_trigger.clone(),
@ -230,10 +221,12 @@ async fn dump_certs(cert_manager: &SecretManager) -> Vec<CertsDump> {
                Unavailable(err) => dump.state = format!("Unavailable: {err}"),
                Available(certs) => {
                    dump.state = "Available".to_string();
-                    dump.cert_chain = std::iter::once(&certs.cert)
-                        .chain(certs.chain.iter())
+                    dump.cert_chain = certs
+                        .cert_and_intermediates()
+                        .iter()
                        .map(dump_cert)
                        .collect();
+                    dump.root_certs = certs.roots.iter().map(dump_cert).collect();
                }
            };
            dump
@ -244,7 +237,9 @@ async fn dump_certs(cert_manager: &SecretManager) -> Vec<CertsDump> {
    dump
 }

+#[cfg(target_os = "linux")]
 async fn handle_pprof(_req: Request<Incoming>) -> anyhow::Result<Response<Full<Bytes>>> {
+    use pprof::protos::Message;
    let guard = pprof::ProfilerGuardBuilder::default()
        .frequency(1000)
        // .blocklist(&["libc", "libgcc", "pthread", "vdso"])
@ -283,7 +278,7 @@ async fn handle_server_shutdown(
 }

 async fn handle_config_dump(
-    handlers: &[Arc<dyn AdminHandler2>],
+    handlers: &[Arc<dyn AdminHandler>],
    mut dump: ConfigDump,
 ) -> anyhow::Result<Response<Full<Bytes>>> {
    if let Some(cfg) = dump.config.local_xds_config.clone() {
@ -313,6 +308,7 @@ async fn handle_config_dump(
    let body = serde_json::to_string_pretty(&kv)?;
    Ok(Response::builder()
        .status(hyper::StatusCode::OK)
+        .header(hyper::header::CONTENT_TYPE, "application/json")
        .body(body.into())
        .expect("builder with known status code should not fail"))
 }
@ -368,34 +364,56 @@ fn list_loggers() -> Response<Full<Bytes>> {
    }
 }

-fn change_log_level(reset: bool, level: &str) -> Response<Full<Bytes>> {
-    match tracing::level_filters::LevelFilter::from_str(level) {
-        Ok(level_filter) => {
-            // Valid level, continue processing
-            tracing::info!("Parsed level: {:?}", level_filter);
-            match telemetry::set_level(reset, level) {
-                Ok(_) => list_loggers(),
-                Err(e) => plaintext_response(
-                    hyper::StatusCode::BAD_REQUEST,
-                    format!("Failed to set new level: {}\n{}", e, HELP_STRING),
-                ),
+fn validate_log_level(level: &str) -> anyhow::Result<()> {
+    for clause in level.split(',') {
+        // We support 2 forms, compared to the underlying library
+        // <level>: supported, sets the default
+        // <scope>:<level>: supported, sets a scope's level
+        // <scope>: sets the scope to 'trace' level. NOT SUPPORTED.
+        match clause {
+            "off" | "error" | "warn" | "info" | "debug" | "trace" => continue,
+            s if s.contains('=') => {
+                filter::Targets::from_str(s)?;
            }
+            s => anyhow::bail!("level {s} is invalid"),
        }
-        Err(_) => {
+    }
+    Ok(())
+}
+
+fn change_log_level(reset: bool, level: &str) -> Response<Full<Bytes>> {
+    if !reset && level.is_empty() {
+        return list_loggers();
+    }
+    if !level.is_empty() {
+        if let Err(_e) = validate_log_level(level) {
            // Invalid level provided
-            plaintext_response(
+            return plaintext_response(
                hyper::StatusCode::BAD_REQUEST,
                format!("Invalid level provided: {}\n{}", level, HELP_STRING),
-            )
-        }
+            );
+        };
+    }
+    match telemetry::set_level(reset, level) {
+        Ok(_) => list_loggers(),
+        Err(e) => plaintext_response(
+            hyper::StatusCode::BAD_REQUEST,
+            format!("Failed to set new level: {}\n{}", e, HELP_STRING),
+        ),
    }
 }

-#[cfg(feature = "jemalloc")]
+#[cfg(all(feature = "jemalloc", target_os = "linux"))]
 async fn handle_jemalloc_pprof_heapgen(
    _req: Request<Incoming>,
 ) -> anyhow::Result<Response<Full<Bytes>>> {
-    let mut prof_ctl = jemalloc_pprof::PROF_CTL.as_ref()?.lock().await;
+    let Some(prof_ctrl) = jemalloc_pprof::PROF_CTL.as_ref() else {
+        return Ok(Response::builder()
+            .status(hyper::StatusCode::INTERNAL_SERVER_ERROR)
+            .body("jemalloc profiling is not enabled".into())
+            .expect("builder with known status code should not fail"));
+    };
+    let mut prof_ctl = prof_ctrl.lock().await;
    if !prof_ctl.activated() {
        return Ok(Response::builder()
            .status(hyper::StatusCode::INTERNAL_SERVER_ERROR)
@ -405,7 +423,7 @@ async fn handle_jemalloc_pprof_heapgen(
    let pprof = prof_ctl.dump_pprof()?;
    Ok(Response::builder()
        .status(hyper::StatusCode::OK)
-        .body(Bytes::from(pprof?).into())
+        .body(Bytes::from(pprof).into())
        .expect("builder with known status code should not fail"))
 }

@ -426,23 +444,24 @@ fn base64_encode(data: String) -> String {

 #[cfg(test)]
 mod tests {
+    use super::ConfigDump;
    use super::change_log_level;
    use super::dump_certs;
    use super::handle_config_dump;
-    use super::ConfigDump;
    use crate::admin::HELP_STRING;
-    use crate::config::construct_config;
    use crate::config::ProxyConfig;
+    use crate::config::construct_config;
    use crate::identity;
+    use crate::strng;
    use crate::test_helpers::{get_response_str, helpers, new_proxy_state};
-    use crate::xds::istio::security::string_match::MatchType as XdsMatchType;
    use crate::xds::istio::security::Address as XdsAddress;
    use crate::xds::istio::security::Authorization as XdsAuthorization;
    use crate::xds::istio::security::Clause as XdsClause;
    use crate::xds::istio::security::Match as XdsMatch;
    use crate::xds::istio::security::Rule as XdsRule;
+    use crate::xds::istio::security::ServiceAccountMatch as XdsServiceAccountMatch;
    use crate::xds::istio::security::StringMatch as XdsStringMatch;
-    use crate::xds::istio::workload::gateway_address::Destination as XdsDestination;
+    use crate::xds::istio::security::string_match::MatchType as XdsMatchType;
    use crate::xds::istio::workload::GatewayAddress as XdsGatewayAddress;
    use crate::xds::istio::workload::LoadBalancing as XdsLoadBalancing;
    use crate::xds::istio::workload::Locality as XdsLocality;
@ -452,9 +471,11 @@ mod tests {
    use crate::xds::istio::workload::Service as XdsService;
    use crate::xds::istio::workload::Workload as XdsWorkload;
    use crate::xds::istio::workload::WorkloadType as XdsWorkloadType;
+    use crate::xds::istio::workload::gateway_address::Destination as XdsDestination;
    use bytes::Bytes;
    use http_body_util::BodyExt;
    use std::collections::HashMap;
+    use std::sync::Arc;
    use std::time::Duration;

    fn diff_json<'a>(a: &'a serde_json::Value, b: &'a serde_json::Value) -> String {
@ -495,9 +516,9 @@ mod tests {
        for i in 0..2 {
            manager
                .fetch_certificate(&identity::Identity::Spiffe {
-                    trust_domain: "trust_domain".to_string(),
-                    namespace: "namespace".to_string(),
-                    service_account: format!("sa-{i}"),
+                    trust_domain: "trust_domain".into(),
+                    namespace: "namespace".into(),
+                    service_account: strng::format!("sa-{i}"),
                })
                .await
                .unwrap();
@ -524,11 +545,13 @@ mod tests {
        let want = serde_json::json!([
          {
            "certChain": [],
+            "rootCerts": [],
            "identity": "spiffe://error/ns/forgotten/sa/sa-failed",
            "state": "Unavailable: the identity is no longer needed"
          },
          {
            "certChain": [],
+            "rootCerts": [],
            "identity": "spiffe://test/ns/test/sa/sa-pending",
            "state": "Initializing"
          },
@ -536,15 +559,17 @@ mod tests {
            "certChain": [
              {
                "expirationTime": "2023-03-11T12:57:26Z",
-                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNXekNDQVVPZ0F3SUJBZ0lVWnlUOTI5c3d0QjhPSG1qUmFURWFENnlqcWc0d0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZUZ3MHlNekF6TVRFd05UVTMKTWpaYUZ3MHlNekF6TVRFeE1qVTNNalphTUJneEZqQVVCZ05WQkFvTURXTnNkWE4wWlhJdWJHOWpZV3d3CldUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFSYXIyQm1JWUFndkptT3JTcENlRlE3OUpQeQo4Y3c0K3pFRThmcXI1N2svdW1NcDVqWFpFR0JwZWRCSVkrcWZtSlBYRWlyYTlFOTJkU21rZks1QUtNV3gKbzJnd1pqQTFCZ05WSFJFRUxqQXNoaXB6Y0dsbVptVTZMeTkwY25WemRGOWtiMjFoYVc0dmJuTXZibUZ0ClpYTndZV05sTDNOaEwzTmhMVEF3RGdZRFZSMFBBUUgvQkFRREFnV2dNQjBHQTFVZEpRUVdNQlFHQ0NzRwpBUVVGQndNQkJnZ3JCZ0VGQlFjREFqQU5CZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFjTzNlMjAvK0ZrRkwKUmttMTNtQlFNYjVPUmpTOGhwWjBRMkZKd2wrSXV4TGY2MUJDZS9RVlhOVklpSUdlMXRVRTh5UTRoMXZrCjhVb01sSmpTQkdiM3VDdHVLRFVKN0xOM1VBUmV4YU1uQkZobC9mWmQxU3ZZcmhlWjU3WDlrTElVa2hkSQpDUVdxOFVFcXBWZEloNGxTZjhoYnFRQksvUWhCN0I2bUJOSW5uMThZTEhiOEpmU0N2aXBWYTRuNXByTlYKbVNWc1JPMUtpY1FQYVhpUzJta0xBWVFRanROYkVJdnJwQldCYytmVWZPaEQ0YmhwUFVmSVFIN1dFcUZLCm5TMnQwSmh1d08zM2FoUDhLZVBWWDRDRkJ4VXc2SDhrd1dJUkh5dW9YbGFwMmVST1EycFRyYmtmVjJZbgpmWjZxV0huREJ5ZjN6bkFQQVM1ZnZ4b1RoKzBYTHc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
-                "serialNumber": "588850990443535479077311695632745359443207891470",
+                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNYRENDQVVTZ0F3SUJBZ0lVTDVaZ0toTEI1YUt3YXRuZE1sR25CZWZ3Qkxnd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZUZ3MHlNekF6TVRFd05UVTMKTWpaYUZ3MHlNekF6TVRFeE1qVTNNalphTUJneEZqQVVCZ05WQkFvTURXTnNkWE4wWlhJdWJHOWpZV3d3CldUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFSYXIyQm1JWUFndkptT3JTcENlRlE3OUpQeQo4Y3c0K3pFRThmcXI1N2svdW1NcDVqWFpFR0JwZWRCSVkrcWZtSlBYRWlyYTlFOTJkU21rZks1QUtNV3gKbzJrd1p6QTFCZ05WSFJFRUxqQXNoaXB6Y0dsbVptVTZMeTkwY25WemRGOWtiMjFoYVc0dmJuTXZibUZ0ClpYTndZV05sTDNOaEwzTmhMVEF3RHdZRFZSMFBBUUgvQkFVREF3ZWdBREFkQmdOVkhTVUVGakFVQmdncgpCZ0VGQlFjREFRWUlLd1lCQlFVSEF3SXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBQ2xKZVJpdmpLYVkKdm5TUHhjUXZPNTNxVFpiUUdHWFc5OHI5Qm1FWGUwYm5YeXZlMWJUVlNYcWVNMXZHdE1DalJGai91dE9VCkRwcHphQVJGRlRzenN2QWdJNStwNFhpbVU4U0FwTlhUYVZjWHkwcG04c2dIWUF6U2drMExBcW1wTWJxbwpvNDB6dmFxVk9nQ1F0c2Vobkg5SCtMQXd1WDl1T08vY2J5NnRidjhrSkhrMWZOTmZ6RTlxZVUwUGFhWWQKZjZXQzhkaWliRGJoN0tjR29rSG80NDMvT05Mb0tJZU9aTFJIbXBFdDdyYnprTDl4elNlNnVZaGQ1SlNGCk55dlY2T3Zoc1FXVVpqd1BmanUvUVJUTzFPdWgrUUZYaTAxNFpvUjRVRnRZaDRjcXphcUlpYVQ0MERyMgpNTHk4eEhJUzRmM1ltUXJEei9VN1pUSG9xaWFLaVBZPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
+                "serialNumber": "271676055104741785552467469040731750696653685944",
                "validFrom": "2023-03-11T05:57:26Z"
              },
+            ],
+            "rootCerts": [
              {
-                "expirationTime": "2296-12-24T18:31:28Z",
-                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lVQytjLzYwZStGMWVFKzdWcXhuYVdjT09abm1Fd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZ0Z3MHlNekF6TVRFeE9ETXgKTWpoYUdBOHlNamsyTVRJeU5ERTRNekV5T0Zvd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oCmJEQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU1lQ1R4UEp0dWQwVXh3KwpDYWFkZFdEN2ErUUV1UVkrQlBUS0pkbk1lajBzQk1mVU1iVDE2SkxrWU5GZ3JqMVVWSEhjcFNvSUhvY3AKMnNkMzJTWTRiZGJva1Fjb3ArQmp0azU1alE0NktMWXNKZ2IyTnd2WW8xdDhFMWFldEpxRkdWN3JtZVpiCkZZZWFpKzZxN2lNamxiQ0dBdTcvVW5LSnNkR25hSlFnTjhkdTBUMUtEZ2pxS1B5SHFkc3U5a2JwQ3FpRQpYTVJtdzQvQkVoRkd6bUlEMm9VREtCMzZkdVZiZHpTRW01MVF2Z1U1SUxYSWd5VnJlak41Q0ZzQytXK3gKamVPWExFenRmSEZVb3FiM3dXaGtCdUV4bXI4MUoyaEdXOXBVTEoyd2tRZ2RmWFA3Z3RNa0I2RXlLdy94CkllYU5tTHpQSUdyWDAxelFZSWRaVHVEd01ZMENBd0VBQWFOVE1GRXdIUVlEVlIwT0JCWUVGRDhrNGYxYQpya3V3UitVUmhLQWUySVRaS1o3Vk1COEdBMVVkSXdRWU1CYUFGRDhrNGYxYXJrdXdSK1VSaEtBZTJJVFoKS1o3Vk1BOEdBMVVkRXdFQi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLcm5BZVNzClNTSzMvOHp4K2h6ajZTRlhkSkE5Q1EwMkdFSjdoSHJLaWpHV1ZZZGRhbDlkQWJTNXRMZC8vcUtPOXVJcwpHZXR5L09rMmJSUTZjcXFNbGdkTnozam1tcmJTbFlXbUlYSTB5SEdtQ2lTYXpIc1hWYkVGNkl3eTN0Y1IKNHZvWFdLSUNXUGgrQzJjVGdMbWVaMEV1ekZ4cTR3Wm5DZjQwd0tvQUo5aTFhd1NyQm5FOWpXdG5wNEY0CmhXbkpUcEdreTVkUkFMRTBsLzJBYnJsMzh3Z2ZNOHI0SW90bVBUaEZLbkZlSUhVN2JRMXJZQW9xcGJBaApDdjBCTjVQakFRUldNazZib28zZjBha1MwN25sWUlWcVhoeHFjWW5PZ3drZGxUdFg5TXFHSXEyNm44bjEKTldXd25tS09qTnNrNnFSbXVsRWdlR080dnhUdlNKWWIraFU9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K",
-                "serialNumber": "67955938755654933561614970125599055831405010529",
-                "validFrom": "2023-03-11T18:31:28Z"
+                "expirationTime": "2299-01-17T23:35:46Z",
+                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURJRENDQWdpZ0F3SUJBZ0lVUmxsdFV1bTJRbTE1dFQ5end1MmtwaDR2ZWRjd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZ0Z3MHlOVEEwTURNeU16TTEKTkRaYUdBOHlNams1TURFeE56SXpNelUwTmxvd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oCmJEQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQUxxVHVwVXlMK2pvd3FOZQpMQUxFbnlXYS9VNmgyaktCYzFYWUFtekR1MDN4S0VhM3JhU1ZzU05BYjFnN1hybmgxaTViNEg0enBtY3gKdStsZURlMDh4OEdOOFJRVjBoUlE0bkkvb0lseHhmc2NOWDZoNGwyVlRRSGNLcnFaYUFRQ2NDTVJuc2EzCk9tUFNPQmRPdTR2ZkFxeVVxMS9ici82TEczRWFQMDYxQ09lMzVWUTFhbkZJYXQrVWJ6bEcrZmpGbXZXbwpxZFdFMVFaekV4UWdXV3VKNjh6RjJBN25MTXVxc0k5cG8wR2FKcHhwajZnc0tIZ3NRZ1JoYWR4UlR3ejAKc0hrVE0rS216SkY0aTJ1NDJ3VHc5YWpzME5NZmQ5WjdBbWlvRXpnS0J3bURBdGQra04zUFdyby8vaHAxClRtOUVqTVFac2s3QmV6NVVyUDA4Y09yTXNOTUNBd0VBQWFOZ01GNHdIUVlEVlIwT0JCWUVGRzlmWGRqQgo0THN2RUpxWUxZNllQc2xWMWxXVU1COEdBMVVkSXdRWU1CYUFGRzlmWGRqQjRMc3ZFSnFZTFk2WVBzbFYKMWxXVU1BOEdBMVVkRXdFQi93UUZNQU1CQWY4d0N3WURWUjBQQkFRREFnSUVNQTBHQ1NxR1NJYjNEUUVCCkN3VUFBNElCQVFDaXVMUzljZkNjRDNDblNGbUpOays5MkNhRXEyUmxTMXF1dmdTa3Z5ckhZNTV4cUxrYQpCbUVDU3VCT2FCT3lHNlZMaFlPMy9OeDBwRERJbUJYak1GZTRJRVJER3QvQTA0am41S2RFTGRiK1laOWUKdUZvY09xdWpucnFVYkxXT2Zra21rd3E5TDFWNjNsKzAxdGRFUlhYa0ZuWHM4QTFhUnh6U2RCSVUrZEtKCmpyRHNtUzdnK1B5dWNEZzJ2WWtTcExoMTdhTm1RdndrOWRPMlpvVHdMcW1JSEZYcHhlNW1PdmlyRVE1RQpYL1JzRW9IY0hURTNGUk0xaDBVdUI1SjN4ekVoOXpHUFRwNWljS2d1TC9vUElmUXVJdWhaRCtWNWg3ZzcKS3k1RHlNVWNLT0l1T0c2SStLdDJYaWpHMld5UHRwWEJBTXJoU2ZaM2ViQWd0WjZJdjZxdgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
+                "serialNumber": "401623643733315109898464329860171355725264550359",
+                "validFrom": "2025-04-03T23:35:46Z"
              }
            ],
            "identity": "spiffe://trust_domain/ns/namespace/sa/sa-0",
@ -554,15 +579,17 @@ mod tests {
            "certChain": [
              {
                "expirationTime": "2023-03-11T13:57:26Z",
-                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNXekNDQVVPZ0F3SUJBZ0lVWElQK29ySVF3dDZFUGRLSFdRU0VMOTM0bjdFd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZUZ3MHlNekF6TVRFd05qVTMKTWpaYUZ3MHlNekF6TVRFeE16VTNNalphTUJneEZqQVVCZ05WQkFvTURXTnNkWE4wWlhJdWJHOWpZV3d3CldUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFSYXIyQm1JWUFndkptT3JTcENlRlE3OUpQeQo4Y3c0K3pFRThmcXI1N2svdW1NcDVqWFpFR0JwZWRCSVkrcWZtSlBYRWlyYTlFOTJkU21rZks1QUtNV3gKbzJnd1pqQTFCZ05WSFJFRUxqQXNoaXB6Y0dsbVptVTZMeTkwY25WemRGOWtiMjFoYVc0dmJuTXZibUZ0ClpYTndZV05sTDNOaEwzTmhMVEV3RGdZRFZSMFBBUUgvQkFRREFnV2dNQjBHQTFVZEpRUVdNQlFHQ0NzRwpBUVVGQndNQkJnZ3JCZ0VGQlFjREFqQU5CZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFHV2tCY1plUEhrZisKSEpoazY5NHhDaHZLVENkVlRoNE9QNTBvWC9TdE0vK3NsazU0Y2RkcnRpOG0rdEFnai8wK0FLaFhpSTJaCjBNRFZPaEpOWTVRT1VXdkVBUWNYVTlPR2NCWmsyRWNGVW9BOC9RRzFpcVB3ejJJRGluakYrb3lTWExEdApFRGxPdW1Sa3VETWtyME51TGNZTlJuYUI0LzMreDAvdVlRM2M3TXpvUEtUQmZQdW1DY0wzbG5mR1dGR3kKc1d3b1p5V01CK1ZFdjYzK2psdTZDZmwzUGN1NEtFNHVhQUJiWHVvRkhjeU8yMW5sZVVvT3Z2VXhLZDdGCkxvQWNsVDNaSUI3dzNUcXE2MFR3UlV6ZGZkQlA5UURabEVSL1JLTDZWbnBBUVZhbXZBWmNjZFVuTWZjOAppT0N6TWVqV2tweGxXL3MrMW1nMUxzQWxyYlJMdHc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
-                "serialNumber": "528170730419860468572163268563070820131458817969",
+                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNYRENDQVVTZ0F3SUJBZ0lVSlVGNVVGbU52OVhYQlFWaDFDbFk0VFNLRng4d0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZUZ3MHlNekF6TVRFd05qVTMKTWpaYUZ3MHlNekF6TVRFeE16VTNNalphTUJneEZqQVVCZ05WQkFvTURXTnNkWE4wWlhJdWJHOWpZV3d3CldUQVRCZ2NxaGtqT1BRSUJCZ2dxaGtqT1BRTUJCd05DQUFSYXIyQm1JWUFndkptT3JTcENlRlE3OUpQeQo4Y3c0K3pFRThmcXI1N2svdW1NcDVqWFpFR0JwZWRCSVkrcWZtSlBYRWlyYTlFOTJkU21rZks1QUtNV3gKbzJrd1p6QTFCZ05WSFJFRUxqQXNoaXB6Y0dsbVptVTZMeTkwY25WemRGOWtiMjFoYVc0dmJuTXZibUZ0ClpYTndZV05sTDNOaEwzTmhMVEV3RHdZRFZSMFBBUUgvQkFVREF3ZWdBREFkQmdOVkhTVUVGakFVQmdncgpCZ0VGQlFjREFRWUlLd1lCQlFVSEF3SXdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBSWdscTIvNnJyWlIKa25UUmZqM201SnU0MmFycGlxVVNHR3A2Mks3L09zeDc5RmovZDBwdU1hMzFkMFhwS0w3N0F2QmtvcVk3CjFWejJKOHRzUkZhZEM1ZmFtQlRXdUN4OUE5R0V3WHEzQmllK2l1a2RGWjZqUTRsb2EybHVWWWFZanhUbgpqR3NLQm0xR0hwMHpacFFVNkdENzA2c2RaTjltaGlqWVA4RnpxWGg1TTlzTzQ4UldveElOUmhXd0pKejQKYUlaZWlRTlJWdkRNZm93MGtxdFFtN001TnQzanA2RkJjTzhGQkJvV0p3MXNCSitLME5XN0VuUG82Yyt0CjE5MkZ0Nmx0eXpvV1BSMnVIYUZENi9FRjZVTkowcTN1ejZicjNYRFg1Q3lrRjQxSEMrNHRSMjQ3RWhmZgpGQkpyUVc0dXAxdHAzdnZGYTdHYnl6bkZWUEc4M3dvPQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
+                "serialNumber": "212692774886610945930036647276614034927450199839",
                "validFrom": "2023-03-11T06:57:26Z"
              },
+            ],
+            "rootCerts": [
              {
-                "expirationTime": "2296-12-24T18:31:28Z",
-                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURFekNDQWZ1Z0F3SUJBZ0lVQytjLzYwZStGMWVFKzdWcXhuYVdjT09abm1Fd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZ0Z3MHlNekF6TVRFeE9ETXgKTWpoYUdBOHlNamsyTVRJeU5ERTRNekV5T0Zvd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oCmJEQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU1lQ1R4UEp0dWQwVXh3KwpDYWFkZFdEN2ErUUV1UVkrQlBUS0pkbk1lajBzQk1mVU1iVDE2SkxrWU5GZ3JqMVVWSEhjcFNvSUhvY3AKMnNkMzJTWTRiZGJva1Fjb3ArQmp0azU1alE0NktMWXNKZ2IyTnd2WW8xdDhFMWFldEpxRkdWN3JtZVpiCkZZZWFpKzZxN2lNamxiQ0dBdTcvVW5LSnNkR25hSlFnTjhkdTBUMUtEZ2pxS1B5SHFkc3U5a2JwQ3FpRQpYTVJtdzQvQkVoRkd6bUlEMm9VREtCMzZkdVZiZHpTRW01MVF2Z1U1SUxYSWd5VnJlak41Q0ZzQytXK3gKamVPWExFenRmSEZVb3FiM3dXaGtCdUV4bXI4MUoyaEdXOXBVTEoyd2tRZ2RmWFA3Z3RNa0I2RXlLdy94CkllYU5tTHpQSUdyWDAxelFZSWRaVHVEd01ZMENBd0VBQWFOVE1GRXdIUVlEVlIwT0JCWUVGRDhrNGYxYQpya3V3UitVUmhLQWUySVRaS1o3Vk1COEdBMVVkSXdRWU1CYUFGRDhrNGYxYXJrdXdSK1VSaEtBZTJJVFoKS1o3Vk1BOEdBMVVkRXdFQi93UUZNQU1CQWY4d0RRWUpLb1pJaHZjTkFRRUxCUUFEZ2dFQkFLcm5BZVNzClNTSzMvOHp4K2h6ajZTRlhkSkE5Q1EwMkdFSjdoSHJLaWpHV1ZZZGRhbDlkQWJTNXRMZC8vcUtPOXVJcwpHZXR5L09rMmJSUTZjcXFNbGdkTnozam1tcmJTbFlXbUlYSTB5SEdtQ2lTYXpIc1hWYkVGNkl3eTN0Y1IKNHZvWFdLSUNXUGgrQzJjVGdMbWVaMEV1ekZ4cTR3Wm5DZjQwd0tvQUo5aTFhd1NyQm5FOWpXdG5wNEY0CmhXbkpUcEdreTVkUkFMRTBsLzJBYnJsMzh3Z2ZNOHI0SW90bVBUaEZLbkZlSUhVN2JRMXJZQW9xcGJBaApDdjBCTjVQakFRUldNazZib28zZjBha1MwN25sWUlWcVhoeHFjWW5PZ3drZGxUdFg5TXFHSXEyNm44bjEKTldXd25tS09qTnNrNnFSbXVsRWdlR080dnhUdlNKWWIraFU9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K",
-                "serialNumber": "67955938755654933561614970125599055831405010529",
-                "validFrom": "2023-03-11T18:31:28Z"
+                "expirationTime": "2299-01-17T23:35:46Z",
+                "pem": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURJRENDQWdpZ0F3SUJBZ0lVUmxsdFV1bTJRbTE1dFQ5end1MmtwaDR2ZWRjd0RRWUpLb1pJaHZjTgpBUUVMQlFBd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oYkRBZ0Z3MHlOVEEwTURNeU16TTEKTkRaYUdBOHlNams1TURFeE56SXpNelUwTmxvd0dERVdNQlFHQTFVRUNnd05ZMngxYzNSbGNpNXNiMk5oCmJEQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQUxxVHVwVXlMK2pvd3FOZQpMQUxFbnlXYS9VNmgyaktCYzFYWUFtekR1MDN4S0VhM3JhU1ZzU05BYjFnN1hybmgxaTViNEg0enBtY3gKdStsZURlMDh4OEdOOFJRVjBoUlE0bkkvb0lseHhmc2NOWDZoNGwyVlRRSGNLcnFaYUFRQ2NDTVJuc2EzCk9tUFNPQmRPdTR2ZkFxeVVxMS9ici82TEczRWFQMDYxQ09lMzVWUTFhbkZJYXQrVWJ6bEcrZmpGbXZXbwpxZFdFMVFaekV4UWdXV3VKNjh6RjJBN25MTXVxc0k5cG8wR2FKcHhwajZnc0tIZ3NRZ1JoYWR4UlR3ejAKc0hrVE0rS216SkY0aTJ1NDJ3VHc5YWpzME5NZmQ5WjdBbWlvRXpnS0J3bURBdGQra04zUFdyby8vaHAxClRtOUVqTVFac2s3QmV6NVVyUDA4Y09yTXNOTUNBd0VBQWFOZ01GNHdIUVlEVlIwT0JCWUVGRzlmWGRqQgo0THN2RUpxWUxZNllQc2xWMWxXVU1COEdBMVVkSXdRWU1CYUFGRzlmWGRqQjRMc3ZFSnFZTFk2WVBzbFYKMWxXVU1BOEdBMVVkRXdFQi93UUZNQU1CQWY4d0N3WURWUjBQQkFRREFnSUVNQTBHQ1NxR1NJYjNEUUVCCkN3VUFBNElCQVFDaXVMUzljZkNjRDNDblNGbUpOays5MkNhRXEyUmxTMXF1dmdTa3Z5ckhZNTV4cUxrYQpCbUVDU3VCT2FCT3lHNlZMaFlPMy9OeDBwRERJbUJYak1GZTRJRVJER3QvQTA0am41S2RFTGRiK1laOWUKdUZvY09xdWpucnFVYkxXT2Zra21rd3E5TDFWNjNsKzAxdGRFUlhYa0ZuWHM4QTFhUnh6U2RCSVUrZEtKCmpyRHNtUzdnK1B5dWNEZzJ2WWtTcExoMTdhTm1RdndrOWRPMlpvVHdMcW1JSEZYcHhlNW1PdmlyRVE1RQpYL1JzRW9IY0hURTNGUk0xaDBVdUI1SjN4ekVoOXpHUFRwNWljS2d1TC9vUElmUXVJdWhaRCtWNWg3ZzcKS3k1RHlNVWNLT0l1T0c2SStLdDJYaWpHMld5UHRwWEJBTXJoU2ZaM2ViQWd0WjZJdjZxdgotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg==",
+                "serialNumber": "401623643733315109898464329860171355725264550359",
+                "validFrom": "2025-04-03T23:35:46Z"
              }
            ],
            "identity": "spiffe://trust_domain/ns/namespace/sa/sa-1",
@ -600,7 +627,6 @@ mod tests {
                    address: [127, 0, 0, 10].to_vec(),
                })),
                hbone_mtls_port: 15008,
-                hbone_single_tls_port: 15003,
            }),
            network_gateway: Some(XdsGatewayAddress {
                destination: Some(XdsDestination::Address(XdsNetworkAddress {
@ -608,9 +634,9 @@ mod tests {
                    address: [127, 0, 0, 11].to_vec(),
                })),
                hbone_mtls_port: 15008,
-                hbone_single_tls_port: 15003,
            }),
            tunnel_protocol: Default::default(),
+            network_mode: Default::default(),
            uid: "uid".to_string(),
            name: "name".to_string(),
            namespace: "namespace".to_string(),
@ -641,6 +667,8 @@ mod tests {
                zone: "zone".to_string(),
                subzone: "subezone".to_string(),
            }),
+            extensions: Default::default(),
+            capacity: Default::default(),
            // ..Default::default() // intentionally don't default. we want all fields populated
        };

@ -661,7 +689,10 @@ mod tests {
            load_balancing: Some(XdsLoadBalancing {
                routing_preference: vec![1, 2],
                mode: 1,
+                health_policy: 1,
            }), // ..Default::default() // intentionally don't default. we want all fields populated
+            ip_families: 0,
+            extensions: Default::default(),
        };

        let auth = XdsAuthorization {
@ -696,6 +727,14 @@ mod tests {
                        not_namespaces: vec![XdsStringMatch {
                            match_type: Some(XdsMatchType::Exact("not-ns".to_string())),
                        }],
+                        service_accounts: vec![XdsServiceAccountMatch {
+                            namespace: "ns".into(),
+                            service_account: "sa".into(),
+                        }],
+                        not_service_accounts: vec![XdsServiceAccountMatch {
+                            namespace: "ns".into(),
+                            service_account: "sa".into(),
+                        }],
                        principals: vec![XdsStringMatch {
                            match_type: Some(XdsMatchType::Exact(
                                "spiffe://cluster.local/ns/ns/sa/sa".to_string(),
@ -721,7 +760,7 @@ mod tests {
            proxy_state,
            static_config: Default::default(),
            version: Default::default(),
-            config: default_config,
+            config: Arc::new(default_config),
            certificates: dump_certs(&manager).await,
        };

@ -742,17 +781,16 @@ mod tests {
            .unwrap();
        let resp_str = String::from(std::str::from_utf8(&resp_bytes).unwrap());

-        // quick sanity check that our workload is there and keyed properly.
+        // quick sanity check that our workload is there.
        // avoid stronger checks since serialization is not determinstic, and
        // most of the value of this test is ensuring that we can serialize
        // the config dump at all from our internal types
-        assert!(resp_str.contains("defaultnw/127.0.0.2"));
+        assert!(resp_str.contains("127.0.0.2"), "{resp_str}");
        // Check a waypoint
        assert!(resp_str.contains(
            r#"waypoint": {
        "destination": "defaultnw/127.0.0.10",
-        "hboneMtlsPort": 15008,
-        "hboneSingleTlsPort": 15003
+        "hboneMtlsPort": 15008
      }"#
        ));
    }
@ -767,6 +805,14 @@ mod tests {
    async fn test_change_log_level() {
        helpers::initialize_telemetry();

+        // no changes
+        let resp = change_log_level(false, "");
+        let resp_str = get_response_str(resp).await;
+        assert_eq!(
+            resp_str,
+            "current log level is hickory_server::server::server_future=off,info\n"
+        );
+
        let resp = change_log_level(true, "");
        let resp_str = get_response_str(resp).await;
        assert_eq!(
@ -776,7 +822,10 @@ mod tests {

        let resp = change_log_level(true, "invalid_level");
        let resp_str = get_response_str(resp).await;
-        assert!(resp_str.contains(HELP_STRING));
+        assert!(
+            resp_str.contains(HELP_STRING),
+            "got {resp_str} want {HELP_STRING}"
+        );

        let resp = change_log_level(true, "debug");
        let resp_str = get_response_str(resp).await;
@ -785,6 +834,13 @@ mod tests {
            "current log level is hickory_server::server::server_future=off,debug\n"
        );

+        let resp = change_log_level(true, "access=debug,info");
+        let resp_str = get_response_str(resp).await;
+        assert_eq!(
+            resp_str,
+            "current log level is hickory_server::server::server_future=off,access=debug,info\n"
+        );
+
        let resp = change_log_level(true, "warn");
        let resp_str = get_response_str(resp).await;
        assert_eq!(
@ -801,17 +857,23 @@ mod tests {

        let resp = change_log_level(true, "trace");
        let resp_str = get_response_str(resp).await;
-        assert!(resp_str
-            .contains("current log level is hickory_server::server::server_future=off,trace\n"));
+        assert!(
+            resp_str
+                .contains("current log level is hickory_server::server::server_future=off,trace\n")
+        );

        let resp = change_log_level(true, "info");
        let resp_str = get_response_str(resp).await;
-        assert!(resp_str
-            .contains("current log level is hickory_server::server::server_future=off,info\n"));
+        assert!(
+            resp_str
+                .contains("current log level is hickory_server::server::server_future=off,info\n")
+        );

        let resp = change_log_level(true, "off");
        let resp_str = get_response_str(resp).await;
-        assert!(resp_str
-            .contains("current log level is hickory_server::server::server_future=off,off\n"));
+        assert!(
+            resp_str
+                .contains("current log level is hickory_server::server::server_future=off,off\n")
+        );
    }
 }
--- a/src/app.rs
+++ b/src/app.rs
@ -16,16 +16,16 @@ use std::future::Future;

 use crate::proxyfactory::ProxyFactory;

+use crate::drain;
+use anyhow::Context;
+use prometheus_client::registry::Registry;
 use std::net::SocketAddr;
 use std::pin::Pin;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::{mpsc, Arc};
+use std::sync::{Arc, mpsc};
 use std::thread;
-
-use anyhow::Context;
-use prometheus_client::registry::Registry;
 use tokio::task::JoinSet;
-use tracing::{warn, Instrument};
+use tracing::{Instrument, warn};

 use crate::identity::SecretManager;
 use crate::state::ProxyStateManager;
@ -33,7 +33,7 @@ use crate::{admin, config, metrics, proxy, readiness, signal};
 use crate::{dns, xds};

 pub async fn build_with_cert(
-    config: config::Config,
+    config: Arc<config::Config>,
    cert_manager: Arc<SecretManager>,
 ) -> anyhow::Result<Bound> {
    // Start the data plane worker pool.
@ -45,7 +45,7 @@ pub async fn build_with_cert(
    // Any component which wants time to gracefully exit should take in a drain_rx clone,
    // await drain_rx.signaled(), then cleanup.
    // Note: there is still a hard timeout if the draining takes too long
-    let (drain_tx, drain_rx) = drain::channel();
+    let (drain_tx, drain_rx) = drain::new();

    // Register readiness tasks.
    let ready = readiness::Ready::new();
@ -80,11 +80,7 @@ pub async fn build_with_cert(
    let istio_registry = metrics::sub_registry(&mut registry);
    let _ = metrics::meta::Metrics::new(istio_registry);
    let xds_metrics = xds::Metrics::new(istio_registry);
-    let proxy_metrics = if config.proxy {
-        Some(proxy::Metrics::new(istio_registry))
-    } else {
-        None
-    };
+    let proxy_metrics = Arc::new(proxy::Metrics::new(istio_registry));
    let dns_metrics = if config.dns_proxy {
        Some(dns::Metrics::new(istio_registry))
    } else {
@ -93,8 +89,14 @@ pub async fn build_with_cert(

    let (xds_tx, xds_rx) = tokio::sync::watch::channel(());
    // Create the manager that updates proxy state from XDS.
-    let state_mgr =
-        ProxyStateManager::new(config.clone(), xds_metrics, xds_tx, cert_manager.clone()).await?;
+    let state_mgr = ProxyStateManager::new(
+        config.clone(),
+        xds_metrics,
+        proxy_metrics.clone(),
+        xds_tx,
+        cert_manager.clone(),
+    )
+    .await?;
    let mut xds_rx_for_task = xds_rx.clone();
    tokio::spawn(async move {
        let _ = xds_rx_for_task.changed().await;
@ -132,8 +134,27 @@ pub async fn build_with_cert(
    )
    .map_err(|e| anyhow::anyhow!("failed to start proxy factory {:?}", e))?;

-    if config.inpod_enabled {
-        tracing::info!("in-pod mode enabled");
+    if config.proxy_mode == config::ProxyMode::Shared {
+        tracing::info!("shared proxy mode - in-pod mode enabled");
+
+        // Create ztunnel inbound listener only if its specific identity and workload info are configured.
+        if let Some(inbound) = proxy_gen.create_ztunnel_self_proxy_listener().await? {
+            // Run the inbound listener in the data plane worker pool
+            let mut xds_rx_for_inbound = xds_rx.clone();
+            data_plane_pool.send(DataPlaneTask {
+                block_shutdown: true,
+                fut: Box::pin(async move {
+                    tracing::info!("Starting ztunnel inbound listener task");
+                    let _ = xds_rx_for_inbound.changed().await;
+                    tokio::task::spawn(async move {
+                        inbound.run().in_current_span().await;
+                    })
+                    .await?;
+                    Ok(())
+                }),
+            })?;
+        }
+
        let run_future = init_inpod_proxy_mgr(
            &mut registry,
            &mut admin_server,
@ -154,7 +175,11 @@ pub async fn build_with_cert(
        })?;
    } else {
        tracing::info!("proxy mode enabled");
-        let proxies = proxy_gen.new_proxies().await?;
+        let wli = config
+            .proxy_workload_information
+            .clone()
+            .expect("proxy_workload_information is required for dedicated mode");
+        let proxies = proxy_gen.new_proxies_for_dedicated(wli).await?;
        match proxies.proxy {
            Some(proxy) => {
                proxy_addresses = Some(proxy.addresses());
@ -241,7 +266,8 @@ fn new_data_plane_pool(num_worker_threads: usize) -> mpsc::Sender<DataPlaneTask>
            .thread_name_fn(|| {
                static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
                let id = ATOMIC_ID.fetch_add(1, Ordering::SeqCst);
-                format!("ztunnel-proxy-{id}")
+                // Thread name can only be 16 chars so keep it short
+                format!("ztunnel-{id}")
            })
            .enable_all()
            .build()
@ -280,7 +306,7 @@ fn new_data_plane_pool(num_worker_threads: usize) -> mpsc::Sender<DataPlaneTask>
    tx
 }

-pub async fn build(config: config::Config) -> anyhow::Result<Bound> {
+pub async fn build(config: Arc<config::Config>) -> anyhow::Result<Bound> {
    let cert_manager = if config.fake_ca {
        mock_secret_manager()
    } else {
@ -306,7 +332,7 @@ fn init_inpod_proxy_mgr(
    _config: &config::Config,
    _proxy_gen: ProxyFactory,
    _ready: readiness::Ready,
-    _drain_rx: drain::Watch,
+    _drain_rx: drain::DrainWatcher,
 ) -> anyhow::Result<std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send + Sync>>> {
    anyhow::bail!("in-pod mode is not supported on non-linux platforms")
 }
@ -318,7 +344,7 @@ fn init_inpod_proxy_mgr(
    config: &config::Config,
    proxy_gen: ProxyFactory,
    ready: readiness::Ready,
-    drain_rx: drain::Watch,
+    drain_rx: drain::DrainWatcher,
 ) -> anyhow::Result<std::pin::Pin<Box<dyn std::future::Future<Output = ()> + Send + Sync>>> {
    let metrics = Arc::new(crate::inpod::metrics::Metrics::new(
        registry.sub_registry_with_prefix("workload_manager"),
@ -347,7 +373,7 @@ pub struct Bound {
    pub udp_dns_proxy_address: Option<SocketAddr>,

    pub shutdown: signal::Shutdown,
-    drain_tx: drain::Signal,
+    drain_tx: drain::DrainTrigger,
 }

 impl Bound {
@ -357,7 +383,9 @@ impl Bound {

        // Start a drain; this will attempt to end all connections
        // or itself be interrupted by a stronger TERM signal, whichever comes first.
-        self.drain_tx.drain().await;
+        self.drain_tx
+            .start_drain_and_wait(drain::DrainMode::Graceful)
+            .await;

        Ok(())
    }
--- a/src/assertions.rs
+++ b/src/assertions.rs
@ -0,0 +1,47 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Below helper functions are used to help make about the size of types.
+// There are some compile time ways to do this, but they don't work in the way we need for the most part;
+// analyzing the size of Futures which we don't have explicit declarations for.
+// Future size is determined by the max required stack size for the async function. This means deeply
+// branched code can create huge Future's, leading to high per-connection memory usage in ztunnel.
+// Debugging these usages can be done by `RUSTFLAGS=-Zprint-type-sizes cargo +nightly build -j 1`,
+// or by logging with the functions below.
+
+#[cfg(all(any(test, feature = "testing"), debug_assertions))]
+pub fn size_between_ref<T>(min: usize, max: usize, t: &T) {
+    let size = std::mem::size_of_val(t);
+    if size < min || size > max {
+        // If it is too small: that is good, we just want to update the assertion to be more aggressive
+        // If it is too big: that is bad. We may need to increase the limit, or consider refactors.
+        panic!(
+            "type {} size is unexpected, wanted {min}..{max}, got {size}",
+            std::any::type_name::<T>(),
+        )
+    }
+    tracing::trace!(
+        "type {} size is within expectations, wanted {min}..{max}, got {size}",
+        std::any::type_name::<T>(),
+    )
+}
+
+#[cfg(not(all(any(test, feature = "testing"), debug_assertions)))]
+pub fn size_between_ref<T>(_min: usize, _max: usize, _t: &T) {}
+
+#[inline(always)]
+pub fn size_between<T>(min: usize, max: usize, t: T) -> T {
+    size_between_ref(min, max, &t);
+    t
+}
--- a/src/baggage.rs
+++ b/src/baggage.rs
@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use crate::strng::Strng;
 use hyper::{
    header::{GetAll, ToStrError},
    http::HeaderValue,
@ -19,11 +20,13 @@ use hyper::{

 #[derive(Default)]
 pub struct Baggage {
-    pub cluster_id: Option<String>,
-    pub namespace: Option<String>,
-    pub workload_name: Option<String>,
-    pub service_name: Option<String>,
-    pub revision: Option<String>,
+    pub cluster_id: Option<Strng>,
+    pub namespace: Option<Strng>,
+    pub workload_name: Option<Strng>,
+    pub service_name: Option<Strng>,
+    pub revision: Option<Strng>,
+    pub region: Option<Strng>,
+    pub zone: Option<Strng>,
 }

 pub fn parse_baggage_header(headers: GetAll<HeaderValue>) -> Result<Baggage, ToStrError> {
@ -37,7 +40,7 @@ pub fn parse_baggage_header(headers: GetAll<HeaderValue>) -> Result<Baggage, ToS
            if parts.len() > 1 {
                let val = match parts[1] {
                    "" => None,
-                    s => Some(s.to_string()),
+                    s => Some(s.into()),
                };
                match parts[0] {
                    "k8s.cluster.name" => baggage.cluster_id = val,
@ -48,6 +51,9 @@ pub fn parse_baggage_header(headers: GetAll<HeaderValue>) -> Result<Baggage, ToS
                    | "k8s.job.name" => baggage.workload_name = val,
                    "service.name" => baggage.service_name = val,
                    "service.version" => baggage.revision = val,
+                    // https://opentelemetry.io/docs/specs/semconv/attributes-registry/cloud/
+                    "cloud.region" => baggage.region = val,
+                    "cloud.availability_zone" => baggage.zone = val,
                    _ => {}
                }
            }
@ -58,7 +64,7 @@ pub fn parse_baggage_header(headers: GetAll<HeaderValue>) -> Result<Baggage, ToS

 #[cfg(test)]
 pub mod tests {
-    use hyper::{http::HeaderValue, HeaderMap};
+    use hyper::{HeaderMap, http::HeaderValue};

    use crate::proxy::BAGGAGE_HEADER;

@ -71,11 +77,11 @@ pub mod tests {
        let header_value = HeaderValue::from_str(baggage_str)?;
        hm.append(BAGGAGE_HEADER, header_value);
        let baggage = parse_baggage_header(hm.get_all(BAGGAGE_HEADER))?;
-        assert_eq!(baggage.cluster_id, Some("K1".to_string()));
-        assert_eq!(baggage.namespace, Some("NS1".to_string()));
-        assert_eq!(baggage.workload_name, Some("N1".to_string()));
-        assert_eq!(baggage.service_name, Some("N2".to_string()));
-        assert_eq!(baggage.revision, Some("V1".to_string()));
+        assert_eq!(baggage.cluster_id, Some("K1".into()));
+        assert_eq!(baggage.namespace, Some("NS1".into()));
+        assert_eq!(baggage.workload_name, Some("N1".into()));
+        assert_eq!(baggage.service_name, Some("N2".into()));
+        assert_eq!(baggage.revision, Some("V1".into()));
        Ok(())
    }

@ -112,11 +118,11 @@ pub mod tests {
        hm.append(BAGGAGE_HEADER, HeaderValue::from_str("service.name=N2")?);
        hm.append(BAGGAGE_HEADER, HeaderValue::from_str("service.version=V1")?);
        let baggage = parse_baggage_header(hm.get_all(BAGGAGE_HEADER))?;
-        assert_eq!(baggage.cluster_id, Some("K1".to_string()));
-        assert_eq!(baggage.namespace, Some("NS1".to_string()));
-        assert_eq!(baggage.workload_name, Some("N1".to_string()));
-        assert_eq!(baggage.service_name, Some("N2".to_string()));
-        assert_eq!(baggage.revision, Some("V1".to_string()));
+        assert_eq!(baggage.cluster_id, Some("K1".into()));
+        assert_eq!(baggage.namespace, Some("NS1".into()));
+        assert_eq!(baggage.workload_name, Some("N1".into()));
+        assert_eq!(baggage.service_name, Some("N2".into()));
+        assert_eq!(baggage.revision, Some("V1".into()));
        Ok(())
    }

--- a/src/cert_fetcher.rs
+++ b/src/cert_fetcher.rs
@ -16,7 +16,7 @@ use crate::config;
 use crate::config::ProxyMode;
 use crate::identity::Priority::Warmup;
 use crate::identity::{Identity, Request, SecretManager};
-use crate::state::workload::{Protocol, Workload};
+use crate::state::workload::{InboundProtocol, Workload};
 use std::sync::Arc;
 use tokio::sync::mpsc;
 use tracing::{debug, error, info};
@ -94,9 +94,9 @@ impl CertFetcherImpl {
        // Only shared mode fetches other workloads's certs
        self.proxy_mode == ProxyMode::Shared &&
            // We only get certs for our own node
-            Some(&w.node) == self.local_node.as_ref() &&
+            Some(w.node.as_ref()) == self.local_node.as_deref() &&
            // If it doesn't support HBONE it *probably* doesn't need a cert.
-            (w.native_tunnel || w.protocol == Protocol::HBONE)
+            (w.native_tunnel || w.protocol == InboundProtocol::HBONE)
    }
 }

--- a/src/config.rs
+++ b/src/config.rs
--- a/src/copy.rs
+++ b/src/copy.rs
@ -0,0 +1,538 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::proxy;
+use crate::proxy::ConnectionResult;
+use crate::proxy::Error::{BackendDisconnected, ClientDisconnected, ReceiveError, SendError};
+use bytes::{Buf, Bytes, BytesMut};
+use pin_project_lite::pin_project;
+use std::future::Future;
+use std::io::Error;
+use std::marker::PhantomPinned;
+use std::pin::Pin;
+use std::task::{Context, Poll, ready};
+use tokio::io;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::net::TcpStream;
+use tokio::net::tcp::{OwnedReadHalf, OwnedWriteHalf};
+use tracing::trace;
+
+// BufferedSplitter is a trait to expose splitting an IO object into a buffered reader and a writer
+pub trait BufferedSplitter: Unpin {
+    type R: ResizeBufRead + Unpin;
+    type W: AsyncWriteBuf + Unpin;
+    fn split_into_buffered_reader(self) -> (Self::R, Self::W);
+}
+
+// Generic BufferedSplitter for anything that can Read/Write.
+impl<I> BufferedSplitter for I
+where
+    I: AsyncRead + AsyncWrite + Unpin,
+{
+    type R = BufReader<io::ReadHalf<I>>;
+    type W = WriteAdapter<io::WriteHalf<I>>;
+    fn split_into_buffered_reader(self) -> (Self::R, Self::W) {
+        let (rh, wh) = tokio::io::split(self);
+        let rb = BufReader::new(rh);
+        (rb, WriteAdapter(wh))
+    }
+}
+
+// TcpStreamSplitter is a specialized BufferedSplitter for TcpStream, which is more efficient than the generic
+// `tokio::io::split`. The generic method involves locking to access the read and write halves
+pub struct TcpStreamSplitter(pub TcpStream);
+
+impl BufferedSplitter for TcpStreamSplitter {
+    type R = BufReader<OwnedReadHalf>;
+    type W = WriteAdapter<OwnedWriteHalf>;
+
+    fn split_into_buffered_reader(self) -> (Self::R, Self::W) {
+        let (rh, wh) = self.0.into_split();
+        let rb = BufReader::new(rh);
+        (rb, WriteAdapter(wh))
+    }
+}
+
+// AsyncWriteBuf is like AsyncWrite, but writes a Bytes instead of &[u8]. This allows avoiding copies.
+pub trait AsyncWriteBuf {
+    fn poll_write_buf(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: Bytes,
+    ) -> Poll<std::io::Result<usize>>;
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), std::io::Error>>;
+    fn poll_shutdown(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>>;
+}
+
+// Allow &T to be AsyncWriteBuf
+impl<T: ?Sized + AsyncWriteBuf + Unpin> AsyncWriteBuf for &mut T {
+    fn poll_write_buf(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: Bytes,
+    ) -> Poll<std::io::Result<usize>> {
+        Pin::new(&mut **self).poll_write_buf(cx, buf)
+    }
+
+    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        Pin::new(&mut **self).poll_flush(cx)
+    }
+
+    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        Pin::new(&mut **self).poll_shutdown(cx)
+    }
+}
+
+// Allow anything that is AsyncWrite to be AsyncWriteBuf.
+pub struct WriteAdapter<T>(T);
+
+impl<T: AsyncWrite + Unpin> AsyncWriteBuf for WriteAdapter<T> {
+    fn poll_write_buf(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        mut buf: Bytes,
+    ) -> Poll<std::io::Result<usize>> {
+        tokio_util::io::poll_write_buf(Pin::new(&mut self.0), cx, &mut buf)
+    }
+
+    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        Pin::new(&mut self.0).poll_flush(cx)
+    }
+
+    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        Pin::new(&mut self.0).poll_shutdown(cx)
+    }
+}
+
+// ResizeBufRead is like AsyncBufRead, but allows triggering a resize.
+pub trait ResizeBufRead {
+    fn poll_bytes(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<Bytes>>;
+    fn resize(self: Pin<&mut Self>, new_size: usize);
+}
+
+// Initially we create a 1k buffer for each connection. Note currently there are 3 buffers per connection.
+// Outbound: downstream to app. Upstream HBONE is optimized to avoid.
+// Inbound: downstream HBONE, upstream to app. Downstream HBONE can be optimized, but is not yet.
+const INITIAL_BUFFER_SIZE: usize = 1024;
+// We increase up to 16k for high traffic connections.
+// TLS record size max is 16k. But we also have an H2 frame header, so leave a bit of room for that.
+const LARGE_BUFFER_SIZE: usize = 16_384 - 64;
+// For ultra-high bandwidth connections, increase up to 256Kb
+const JUMBO_BUFFER_SIZE: usize = (16 * 16_384) - 64;
+// After 128k of data we will trigger a resize from INITIAL to LARGE
+// Loosely inspired by https://github.com/golang/go/blame/5122a6796ef98e3453c994c95abd640596540bea/src/crypto/tls/conn.go#L873
+const RESIZE_THRESHOLD_LARGE: u64 = 128 * 1024;
+// After 10Mb of data we will trigger a resize from LARGE to JUMBO
+const RESIZE_THRESHOLD_JUMBO: u64 = 10 * 1024 * 1024;
+
+pub async fn copy_bidirectional<A, B>(
+    downstream: A,
+    upstream: B,
+    stats: &ConnectionResult,
+) -> Result<(), crate::proxy::Error>
+where
+    A: BufferedSplitter,
+    B: BufferedSplitter,
+{
+    let (mut rd, mut wd) = downstream.split_into_buffered_reader();
+    let (mut ru, mut wu) = upstream.split_into_buffered_reader();
+    let downstream_to_upstream = async {
+        let translate_error = |e: io::Error| {
+            SendError(Box::new(match e.kind() {
+                io::ErrorKind::NotConnected => BackendDisconnected,
+                io::ErrorKind::WriteZero => BackendDisconnected,
+                io::ErrorKind::UnexpectedEof => ClientDisconnected,
+                _ => e.into(),
+            }))
+        };
+        let res = ignore_io_errors(copy_buf(&mut rd, &mut wu, stats, false).await)
+            .map_err(translate_error);
+        trace!(?res, "send");
+        ignore_shutdown_errors(shutdown(&mut wu).await)
+            .map_err(translate_error)
+            .map_err(|e| proxy::Error::ShutdownError(Box::new(e)))?;
+        res
+    };
+
+    let upstream_to_downstream = async {
+        let translate_error = |e: io::Error| {
+            ReceiveError(Box::new(match e.kind() {
+                io::ErrorKind::NotConnected => ClientDisconnected,
+                io::ErrorKind::WriteZero => ClientDisconnected,
+                _ => e.into(),
+            }))
+        };
+        let res = ignore_io_errors(copy_buf(&mut ru, &mut wd, stats, true).await)
+            .map_err(translate_error);
+        trace!(?res, "receive");
+        ignore_shutdown_errors(shutdown(&mut wd).await)
+            .map_err(translate_error)
+            .map_err(|e| proxy::Error::ShutdownError(Box::new(e)))?;
+        res
+    };
+
+    // join!() them rather than try_join!() so that we keep complete either end once one side is complete.
+    let (sent, received) = tokio::join!(downstream_to_upstream, upstream_to_downstream);
+
+    // Convert some error messages to easier to understand
+    let sent = sent?;
+    let received = received?;
+    trace!(sent, received, "copy complete");
+    Ok(())
+}
+
+// During copying, we may encounter errors from either side closing their connection. Typically, we
+// get a fully graceful shutdown with no errors on either end, but can if one end sends a RST directly,
+// or if we have other non-graceful behavior, we may see errors. This is generally ok - a TCP connection
+// can close at any time, really. Avoid reporting these as errors, as generally users expect errors to
+// occur only when we cannot connect to the backend at all.
+fn ignore_io_errors<T: Default>(res: Result<T, io::Error>) -> Result<T, io::Error> {
+    use io::ErrorKind::*;
+    match &res {
+        Err(e) => match e.kind() {
+            NotConnected | UnexpectedEof | ConnectionReset | BrokenPipe => {
+                trace!(err=%e, "io terminated ungracefully");
+                // Returning Default here is very hacky, but the data we are returning isn't critical so its no so bad to lose it.
+                // Changing this would require refactoring all the interfaces to always return the bytes written even on error.
+                Ok(Default::default())
+            }
+            _ => res,
+        },
+        _ => res,
+    }
+}
+
+// During shutdown, the other end may have already disconnected. That is fine, they shutdown for us.
+// Ignore it.
+fn ignore_shutdown_errors(res: Result<(), io::Error>) -> Result<(), io::Error> {
+    match &res {
+        Err(e)
+            if e.kind() == io::ErrorKind::NotConnected
+                || e.kind() == io::ErrorKind::UnexpectedEof =>
+        {
+            trace!(err=%e, "failed to shutdown peer, they already shutdown");
+            Ok(())
+        }
+        _ => res,
+    }
+}
+
+// CopyBuf is a fork of Tokio's same struct, with additional support for resizing and metrics reporting.
+#[must_use = "futures do nothing unless you `.await` or poll them"]
+struct CopyBuf<'a, R: ?Sized, W: ?Sized> {
+    send: bool,
+    reader: &'a mut R,
+    writer: &'a mut W,
+    buf: Option<Bytes>,
+    metrics: &'a ConnectionResult,
+    amt: u64,
+}
+
+async fn copy_buf<'a, R, W>(
+    reader: &'a mut R,
+    writer: &'a mut W,
+    metrics: &ConnectionResult,
+    is_send: bool,
+) -> std::io::Result<u64>
+where
+    R: ResizeBufRead + Unpin + ?Sized,
+    W: AsyncWriteBuf + Unpin + ?Sized,
+{
+    CopyBuf {
+        send: is_send,
+        reader,
+        writer,
+        buf: None,
+        metrics,
+        amt: 0,
+    }
+    .await
+}
+
+impl<R, W> Future for CopyBuf<'_, R, W>
+where
+    R: ResizeBufRead + Unpin + ?Sized,
+    W: AsyncWriteBuf + Unpin + ?Sized,
+{
+    type Output = std::io::Result<u64>;
+
+    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        loop {
+            let me = &mut *self;
+
+            // Get our stored buffer if there is any remaining, or fetch some more.
+            let buffer = if let Some(buffer) = me.buf.take() {
+                buffer
+            } else {
+                ready!(Pin::new(&mut *me.reader).poll_bytes(cx))?
+            };
+            if buffer.is_empty() {
+                ready!(AsyncWriteBuf::poll_flush(Pin::new(&mut self.writer), cx))?;
+                return Poll::Ready(Ok(self.amt));
+            }
+
+            // This is just a reference counter. Hold onto it in case the write() is not complete.
+            let mut our_copy = buffer.clone();
+            let i = match Pin::new(&mut *me.writer).poll_write_buf(cx, buffer) {
+                Poll::Ready(written) => written?,
+                Poll::Pending => {
+                    me.buf = Some(our_copy);
+                    return Poll::Pending;
+                }
+            };
+            if i == 0 {
+                return Poll::Ready(Err(std::io::ErrorKind::WriteZero.into()));
+            }
+            if i < our_copy.len() {
+                // We only partially consumed it; store it back for a future call, skipping the number of bytes we did read.
+                our_copy.advance(i);
+                me.buf = Some(our_copy);
+            }
+            if me.send {
+                me.metrics.increment_send(i as u64);
+            } else {
+                me.metrics.increment_recv(i as u64);
+            }
+            let old = self.amt;
+            self.amt += i as u64;
+
+            // If we were below the resize threshold before but are now above it, trigger the buffer to resize
+            if old < RESIZE_THRESHOLD_LARGE && RESIZE_THRESHOLD_LARGE <= self.amt {
+                Pin::new(&mut *self.reader).resize(LARGE_BUFFER_SIZE);
+            }
+            if old < RESIZE_THRESHOLD_JUMBO && RESIZE_THRESHOLD_JUMBO <= self.amt {
+                Pin::new(&mut *self.reader).resize(JUMBO_BUFFER_SIZE);
+            }
+        }
+    }
+}
+
+// BufReader is a fork of Tokio's type with resize support
+pin_project! {
+    pub struct BufReader<R> {
+        #[pin]
+        inner: R,
+        buf: BytesMut,
+        buffer_size: usize
+    }
+}
+
+impl<R: AsyncRead> BufReader<R> {
+    /// Creates a new `BufReader` with a default buffer capacity. The default is currently INITIAL_BUFFER_SIZE
+    pub fn new(inner: R) -> Self {
+        Self {
+            inner,
+            buf: BytesMut::with_capacity(INITIAL_BUFFER_SIZE),
+            buffer_size: INITIAL_BUFFER_SIZE,
+        }
+    }
+}
+
+impl<R: AsyncRead> ResizeBufRead for BufReader<R> {
+    fn poll_bytes(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<Bytes>> {
+        let me = self.project();
+
+        // Give us enough space to read a full chunk
+        me.buf.reserve(*me.buffer_size);
+        ready!(tokio_util::io::poll_read_buf(me.inner, cx, me.buf))?;
+        Poll::Ready(Ok(me.buf.split().freeze()))
+    }
+
+    fn resize(self: Pin<&mut Self>, new_size: usize) {
+        let me = self.project();
+        *me.buffer_size = new_size;
+    }
+}
+
+pin_project! {
+    /// A future used to shutdown an I/O object.
+    ///
+    /// Created by the [`AsyncWriteExt::shutdown`][shutdown] function.
+    /// [shutdown]: [`crate::io::AsyncWriteExt::shutdown`]
+    #[must_use = "futures do nothing unless you `.await` or poll them"]
+    #[derive(Debug)]
+    pub struct Shutdown<'a, A: ?Sized> {
+        a: &'a mut A,
+        // Make this future `!Unpin` for compatibility with async trait methods.
+        #[pin]
+        _pin: PhantomPinned,
+    }
+}
+
+/// Creates a future which will shutdown an I/O object.
+pub(super) fn shutdown<A>(a: &mut A) -> Shutdown<'_, A>
+where
+    A: AsyncWriteBuf + Unpin + ?Sized,
+{
+    Shutdown {
+        a,
+        _pin: PhantomPinned,
+    }
+}
+
+impl<A> Future for Shutdown<'_, A>
+where
+    A: AsyncWriteBuf + Unpin + ?Sized,
+{
+    type Output = std::io::Result<()>;
+
+    fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
+        let me = self.project();
+        AsyncWriteBuf::poll_shutdown(Pin::new(me.a), cx)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_helpers::helpers::initialize_telemetry;
+    use rand::Rng;
+    use tokio::io::AsyncWriteExt;
+    use tokio::io::{AsyncReadExt, ReadBuf};
+
+    #[tokio::test]
+    async fn copy() {
+        initialize_telemetry();
+        let (mut client, ztunnel_downsteam) = tokio::io::duplex(32000);
+        let (mut server, ztunnel_upsteam) = tokio::io::duplex(32000);
+
+        // Spawn copy
+        tokio::task::spawn(async move {
+            let mut registry = prometheus_client::registry::Registry::default();
+            let metrics = std::sync::Arc::new(crate::proxy::Metrics::new(
+                crate::metrics::sub_registry(&mut registry),
+            ));
+            let source_addr = "127.0.0.1:12345".parse().unwrap();
+            let dest_addr = "127.0.0.1:34567".parse().unwrap();
+            let cr = ConnectionResult::new(
+                source_addr,
+                dest_addr,
+                None,
+                std::time::Instant::now(),
+                crate::proxy::metrics::ConnectionOpen {
+                    reporter: crate::proxy::Reporter::destination,
+                    source: None,
+                    derived_source: None,
+                    destination: None,
+                    connection_security_policy: crate::proxy::metrics::SecurityPolicy::unknown,
+                    destination_service: None,
+                },
+                metrics.clone(),
+            );
+            copy_bidirectional(ztunnel_downsteam, ztunnel_upsteam, &cr).await
+        });
+        const ITERS: usize = 1000;
+        const REPEATS: usize = 6400;
+        // Make sure we write enough to trigger the resize
+        if ITERS * REPEATS < JUMBO_BUFFER_SIZE {
+            panic!("not enough writing to test")
+        }
+        for i in 0..ITERS {
+            let body = [1, 2, 3, 4, i as u8].repeat(REPEATS);
+            let mut res = vec![0; body.len()];
+            tokio::try_join!(client.write_all(&body), server.read_exact(&mut res)).unwrap();
+            assert_eq!(res.as_slice(), body);
+        }
+    }
+
+    #[tokio::test]
+    async fn copystress() {
+        initialize_telemetry();
+        let (mut client, ztunnel_downsteam) = tokio::io::duplex(32000);
+        let (mut server, ztunnel_upsteam) = tokio::io::duplex(32000);
+
+        // Spawn copy
+        tokio::task::spawn(async move {
+            let mut registry = prometheus_client::registry::Registry::default();
+            let metrics = std::sync::Arc::new(crate::proxy::Metrics::new(
+                crate::metrics::sub_registry(&mut registry),
+            ));
+            let source_addr = "127.0.0.1:12345".parse().unwrap();
+            let dest_addr = "127.0.0.1:34567".parse().unwrap();
+            let cr = ConnectionResult::new(
+                source_addr,
+                dest_addr,
+                None,
+                std::time::Instant::now(),
+                crate::proxy::metrics::ConnectionOpen {
+                    reporter: crate::proxy::Reporter::destination,
+                    source: None,
+                    derived_source: None,
+                    destination: None,
+                    connection_security_policy: crate::proxy::metrics::SecurityPolicy::unknown,
+                    destination_service: None,
+                },
+                metrics.clone(),
+            );
+            copy_bidirectional(WeirdIO(ztunnel_downsteam), WeirdIO(ztunnel_upsteam), &cr).await
+        });
+        const WRITES: usize = 2560;
+        // Do a bunch of writes of various size, and expect the other end to receive them
+        let writer = tokio::task::spawn(async move {
+            for d in 0..WRITES {
+                let body: Vec<u8> = (0..d).map(|v| (v % 255) as u8).collect();
+                client.write_all(&body).await.unwrap();
+            }
+        });
+        let reader = tokio::task::spawn(async move {
+            for d in 0..WRITES {
+                let want: Vec<u8> = (0..d).map(|v| (v % 255) as u8).collect();
+                let mut got = vec![0; d];
+                server.read_exact(&mut got).await.unwrap();
+                assert_eq!(got.as_slice(), want);
+            }
+        });
+        tokio::try_join!(reader, writer).unwrap();
+    }
+
+    struct WeirdIO<I>(I);
+    impl<I: AsyncWrite + std::marker::Unpin> AsyncWrite for WeirdIO<I> {
+        fn poll_write(
+            mut self: Pin<&mut Self>,
+            cx: &mut Context<'_>,
+            buf: &[u8],
+        ) -> Poll<Result<usize, Error>> {
+            if buf.is_empty() {
+                return Poll::Ready(Ok(0));
+            }
+            let mut rng = rand::rng();
+            let end = rng.random_range(1..=buf.len()); // Ensure at least 1 byte is written
+            Pin::new(&mut self.0).poll_write(cx, &buf[0..end])
+        }
+
+        fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+            Pin::new(&mut self.0).poll_flush(cx)
+        }
+
+        fn poll_shutdown(
+            mut self: Pin<&mut Self>,
+            cx: &mut Context<'_>,
+        ) -> Poll<Result<(), Error>> {
+            Pin::new(&mut self.0).poll_shutdown(cx)
+        }
+    }
+    impl<I: AsyncRead + std::marker::Unpin> AsyncRead for WeirdIO<I> {
+        fn poll_read(
+            mut self: Pin<&mut Self>,
+            cx: &mut Context<'_>,
+            buf: &mut ReadBuf<'_>,
+        ) -> Poll<std::io::Result<()>> {
+            // TODO
+            Pin::new(&mut self.0).poll_read(cx, buf)
+        }
+    }
+}
--- a/src/dns/forwarder.rs
+++ b/src/dns/forwarder.rs
@ -13,21 +13,95 @@
 // limitations under the License.

 use crate::dns::resolver::{Answer, Resolver};
+use crate::proxy::SocketFactory;
+use hickory_proto::runtime::RuntimeProvider;
+use hickory_proto::runtime::iocompat::AsyncIoTokioAsStd;
+use hickory_resolver::ResolveError;
 use hickory_resolver::config::{ResolverConfig, ResolverOpts};
-use hickory_resolver::error::ResolveError;
-use hickory_resolver::name_server::TokioConnectionProvider;
-use hickory_resolver::TokioAsyncResolver;
+use hickory_resolver::name_server::GenericConnector;
 use hickory_server::authority::LookupError;
 use hickory_server::server::Request;
+use std::future::Future;
+use std::io;
+use std::net::SocketAddr;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::net::{TcpStream, UdpSocket};

 /// A forwarding [Resolver] that delegates requests to an upstream [TokioAsyncResolver].
-pub struct Forwarder(TokioAsyncResolver);
+pub struct Forwarder(hickory_resolver::Resolver<GenericConnector<RuntimeProviderAdaptor>>);

 impl Forwarder {
    /// Creates a new [Forwarder] from the provided resolver configuration.
-    pub fn new(cfg: ResolverConfig, opts: ResolverOpts) -> Result<Self, ResolveError> {
-        let resolver = TokioAsyncResolver::new(cfg, opts, TokioConnectionProvider::default());
-        Ok(Self(resolver))
+    pub fn new(
+        cfg: ResolverConfig,
+        socket_factory: Arc<dyn SocketFactory + Send + Sync>,
+        opts: ResolverOpts,
+    ) -> Result<Self, ResolveError> {
+        let provider = GenericConnector::new(RuntimeProviderAdaptor {
+            socket_factory,
+            handle: Default::default(),
+        });
+        let mut resolver = hickory_resolver::Resolver::builder_with_config(cfg, provider);
+        *resolver.options_mut() = opts;
+        Ok(Self(resolver.build()))
+    }
+}
+
+#[derive(Clone)]
+struct RuntimeProviderAdaptor {
+    socket_factory: Arc<dyn SocketFactory + Send + Sync>,
+    handle: hickory_proto::runtime::TokioHandle,
+}
+const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
+impl RuntimeProvider for RuntimeProviderAdaptor {
+    type Handle = hickory_proto::runtime::TokioHandle;
+    type Timer = hickory_proto::runtime::TokioTime;
+    type Udp = UdpSocket;
+    type Tcp = AsyncIoTokioAsStd<TcpStream>;
+
+    fn create_handle(&self) -> Self::Handle {
+        self.handle.clone()
+    }
+
+    fn connect_tcp(
+        &self,
+        server_addr: SocketAddr,
+        bind_addr: Option<SocketAddr>,
+        wait_for: Option<Duration>,
+    ) -> Pin<Box<dyn Send + Future<Output = std::io::Result<Self::Tcp>>>> {
+        let sf = self.socket_factory.clone();
+        Box::pin(async move {
+            let socket = if server_addr.is_ipv4() {
+                sf.new_tcp_v4()
+            } else {
+                sf.new_tcp_v6()
+            }?;
+
+            if let Some(bind_addr) = bind_addr {
+                socket.bind(bind_addr)?;
+            }
+            let future = socket.connect(server_addr);
+            let wait_for = wait_for.unwrap_or(CONNECT_TIMEOUT);
+            match tokio::time::timeout(wait_for, future).await {
+                Ok(Ok(socket)) => Ok(AsyncIoTokioAsStd(socket)),
+                Ok(Err(e)) => Err(e),
+                Err(_) => Err(io::Error::new(
+                    io::ErrorKind::TimedOut,
+                    format!("connection to {server_addr:?} timed out after {wait_for:?}"),
+                )),
+            }
+        })
+    }
+
+    fn bind_udp(
+        &self,
+        local_addr: SocketAddr,
+        _server_addr: SocketAddr,
+    ) -> Pin<Box<dyn Send + Future<Output = std::io::Result<Self::Udp>>>> {
+        let sf = self.socket_factory.clone();
+        Box::pin(async move { sf.udp_bind(local_addr) })
    }
 }

@ -35,8 +109,9 @@ impl Forwarder {
 impl Resolver for Forwarder {
    async fn lookup(&self, request: &Request) -> Result<Answer, LookupError> {
        // TODO(nmittler): Should we allow requests to the upstream resolver to be authoritative?
-        let name = request.query().name();
-        let rr_type = request.query().query_type();
+        let query = request.request_info()?.query;
+        let name = query.name();
+        let rr_type = query.query_type();
        self.0
            .lookup(name, rr_type)
            .await
@ -49,22 +124,29 @@ impl Resolver for Forwarder {
 #[cfg(any(unix, target_os = "windows"))]
 mod tests {
    use crate::dns::resolver::Resolver;
-    use crate::test_helpers::dns::{a_request, n, socket_addr, system_forwarder};
-    use crate::test_helpers::helpers::subscribe;
+    use crate::test_helpers::dns::{a_request, ip, n, run_dns, socket_addr};
+    use crate::test_helpers::helpers::initialize_telemetry;
+    use hickory_proto::ProtoErrorKind;
    use hickory_proto::op::ResponseCode;
    use hickory_proto::rr::RecordType;
-    use hickory_resolver::error::ResolveErrorKind;
-    use hickory_server::server::Protocol;
+    use hickory_proto::xfer::Protocol;
+    use hickory_resolver::ResolveErrorKind;
+    use std::collections::HashMap;

    #[tokio::test]
    async fn found() {
-        let _guard = subscribe();
+        initialize_telemetry();

-        let f = system_forwarder();
+        let f = run_dns(HashMap::from([(
+            n("test.example.com."),
+            vec![ip("1.1.1.1")],
+        )]))
+        .await
+        .unwrap();

        // Lookup a host.
        let req = a_request(
-            n("www.google.com"),
+            n("test.example.com"),
            socket_addr("1.1.1.1:80"),
            Protocol::Udp,
        );
@ -72,15 +154,15 @@ mod tests {
        assert!(!answer.is_authoritative());

        let record = answer.record_iter().next().unwrap();
-        assert_eq!(n("www.google.com."), *record.name());
+        assert_eq!(n("test.example.com."), *record.name());
        assert_eq!(RecordType::A, record.record_type());
    }

    #[tokio::test]
    async fn not_found() {
-        let _guard = subscribe();
+        initialize_telemetry();

-        let f = system_forwarder();
+        let f = run_dns(HashMap::new()).await.unwrap();

        // Lookup a host.
        let req = a_request(
@ -98,12 +180,13 @@ mod tests {
            .expect("expected resolve error");

        // Expect NoRecordsFound with a NXDomain response code.
-        let kind = err.kind();
-        match kind {
-            ResolveErrorKind::NoRecordsFound { response_code, .. } => {
+        if let ResolveErrorKind::Proto(proto) = err.kind() {
+            if let ProtoErrorKind::NoRecordsFound { response_code, .. } = proto.kind() {
+                // Respond with the error code.
                assert_eq!(&ResponseCode::NXDomain, response_code);
+                return;
            }
-            _ => panic!("unexpected error kind {kind}"),
        }
+        panic!("unexpected error kind {}", err.kind())
    }
 }
--- a/src/dns/handler.rs
+++ b/src/dns/handler.rs
@ -13,9 +13,10 @@
 // limitations under the License.

 use crate::dns::resolver::{Answer, Resolver};
+use hickory_proto::ProtoErrorKind;
 use hickory_proto::op::{Edns, Header, MessageType, OpCode, ResponseCode};
 use hickory_proto::rr::Record;
-use hickory_resolver::error::ResolveErrorKind;
+use hickory_resolver::ResolveErrorKind;
 use hickory_server::authority::{LookupError, MessageResponse, MessageResponseBuilder};
 use hickory_server::server::{Request, RequestHandler, ResponseHandler, ResponseInfo};
 use std::sync::Arc;
@ -117,16 +118,14 @@ async fn send_lookup_error<R: ResponseHandler>(
        }
        LookupError::ResponseCode(code) => send_error(request, response_handle, code).await,
        LookupError::ResolveError(e) => {
-            match e.kind() {
-                ResolveErrorKind::NoRecordsFound { response_code, .. } => {
+            if let ResolveErrorKind::Proto(proto) = e.kind() {
+                if let ProtoErrorKind::NoRecordsFound { response_code, .. } = proto.kind() {
                    // Respond with the error code.
-                    send_error(request, response_handle, *response_code).await
-                }
-                _ => {
-                    // TODO(nmittler): log?
-                    send_error(request, response_handle, ResponseCode::ServFail).await
+                    return send_error(request, response_handle, *response_code).await;
                }
            }
+            // TODO(nmittler): log?
+            send_error(request, response_handle, ResponseCode::ServFail).await
        }
        LookupError::Io(_) => {
            // TODO(nmittler): log?
@ -189,7 +188,7 @@ fn response_edns(request: &Request) -> Option<Edns> {
        let mut resp_edns: Edns = Edns::new();
        resp_edns.set_max_payload(req_edns.max_payload().max(512));
        resp_edns.set_version(req_edns.version());
-        resp_edns.set_dnssec_ok(req_edns.dnssec_ok());
+        resp_edns.set_dnssec_ok(req_edns.flags().dnssec_ok);

        Some(resp_edns)
    } else {
@ -203,15 +202,14 @@ mod tests {
    use crate::dns::handler::Handler;
    use crate::dns::resolver::{Answer, Resolver};
    use crate::test_helpers::dns::{a, a_request, n, socket_addr};
-    use crate::test_helpers::helpers::subscribe;
+    use crate::test_helpers::helpers::initialize_telemetry;
    use hickory_proto::op::{Message, MessageType, OpCode, ResponseCode};
    use hickory_proto::rr::{Name, Record, RecordType};
    use hickory_proto::serialize::binary::BinEncoder;
+    use hickory_proto::xfer::Protocol;
    use hickory_server::authority::LookupError;
    use hickory_server::authority::MessageResponse;
-    use hickory_server::server::{
-        Protocol, Request, RequestHandler, ResponseHandler, ResponseInfo,
-    };
+    use hickory_server::server::{Request, RequestHandler, ResponseHandler, ResponseInfo};
    use std::net::Ipv4Addr;
    use std::sync::Arc;
    use tokio::sync::mpsc;
@ -219,7 +217,7 @@ mod tests {

    #[tokio::test]
    async fn record_found() {
-        let _guard = subscribe();
+        initialize_telemetry();

        let p = Handler::new(Arc::new(FakeResolver {}));

@ -262,7 +260,7 @@ mod tests {
    #[async_trait::async_trait]
    impl Resolver for FakeResolver {
        async fn lookup(&self, request: &Request) -> Result<Answer, LookupError> {
-            let name = Name::from(request.query().name().clone());
+            let name = Name::from(request.request_info()?.query.name().clone());
            let records = vec![a(name, Ipv4Addr::new(127, 0, 0, 1))];
            Ok(Answer::new(records, false))
        }
--- a/src/dns/metrics.rs
+++ b/src/dns/metrics.rs
@ -23,6 +23,8 @@ use std::time::Duration;
 use crate::metrics::{DefaultedUnknown, DeferRecorder, Recorder};

 use crate::state::workload::Workload;
+use crate::strng;
+use crate::strng::RichStrng;

 pub struct Metrics {
    pub requests: Family<DnsLabels, Counter>,
@ -55,7 +57,7 @@ impl Metrics {
        );

        let forwarded_duration = Family::<DnsLabels, Histogram>::new_with_constructor(|| {
-            Histogram::new(vec![0.005f64, 0.001, 0.01, 0.1, 1.0, 5.0].into_iter())
+            Histogram::new(vec![0.005f64, 0.001, 0.01, 0.1, 1.0, 5.0])
        });
        registry.register_with_unit(
            "dns_upstream_request_duration",
@ -77,19 +79,24 @@ impl DeferRecorder for Metrics {}

 #[derive(Clone, Hash, Debug, PartialEq, Eq, EncodeLabelSet)]
 pub struct DnsLabels {
-    request_query_type: String,
-    request_protocol: String,
+    request_query_type: DefaultedUnknown<RichStrng>,
+    request_protocol: RichStrng,

    // Source workload.
-    source_canonical_service: DefaultedUnknown<String>,
-    source_canonical_revision: DefaultedUnknown<String>,
+    source_canonical_service: DefaultedUnknown<RichStrng>,
+    source_canonical_revision: DefaultedUnknown<RichStrng>,
 }

 impl DnsLabels {
    pub fn new(r: &Request) -> Self {
        Self {
-            request_query_type: r.query().query_type().to_string().to_lowercase(),
-            request_protocol: r.protocol().to_string().to_lowercase(),
+            request_query_type: r
+                .request_info()
+                .map(|q| q.query.query_type().to_string().to_lowercase())
+                .ok()
+                .map(|s| RichStrng::from(strng::new(s)))
+                .into(),
+            request_protocol: r.protocol().to_string().to_lowercase().into(),
            source_canonical_service: Default::default(),
            source_canonical_revision: Default::default(),
        }
--- a/src/dns/name_util.rs
+++ b/src/dns/name_util.rs
@ -39,7 +39,9 @@ pub fn trim_domain(name: &Name, domain: &Name) -> Option<Name> {
        // Create a Name from the labels leading up to the domain.
        let iter = name.iter();
        let num_labels = iter.len() - domain.num_labels() as usize;
-        Some(Name::from_labels(iter.take(num_labels)).unwrap())
+        let mut name = Name::from_labels(iter.take(num_labels)).unwrap();
+        name.set_fqdn(false);
+        Some(name)
    } else {
        None
    }
--- a/src/dns/server.rs
+++ b/src/dns/server.rs
--- a/src/drain.rs
+++ b/src/drain.rs
@ -0,0 +1,194 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use tracing::Instrument;
+
+use std::time::Duration;
+use tokio::sync::watch;
+use tracing::{debug, info, warn};
+
+pub use internal::DrainMode;
+pub use internal::ReleaseShutdown as DrainBlocker;
+pub use internal::Signal as DrainTrigger;
+pub use internal::Watch as DrainWatcher;
+
+/// New constructs a new pair for draining
+/// * DrainTrigger can be used to start a draining sequence and wait for it to complete.
+/// * DrainWatcher should be held by anything that wants to participate in the draining. This can be cloned,
+///   and a drain will not complete until all outstanding DrainWatchers are dropped.
+pub fn new() -> (DrainTrigger, DrainWatcher) {
+    let (tx, rx) = internal::channel();
+    (tx, rx)
+}
+
+/// run_with_drain provides a wrapper to run a future with graceful shutdown/draining support.
+/// A caller should construct a future with takes two arguments:
+/// * drain: while holding onto this, the future is marked as active, which will block the server from shutting down.
+///   Additionally, it can be watched (with drain.signaled()) to see when to start a graceful shutdown.
+/// * force_shutdown: when this is triggered, the future must forcefully shutdown any ongoing work ASAP.
+///   This means the graceful drain exceeded the hard deadline, and all work must terminate now.
+///   This is only required for spawned() tasks; otherwise, the future is dropped entirely, canceling all work.
+pub async fn run_with_drain<F, O>(
+    component: String,
+    drain: DrainWatcher,
+    deadline: Duration,
+    make_future: F,
+) where
+    F: AsyncFnOnce(DrainWatcher, watch::Receiver<()>) -> O,
+    O: Send + 'static,
+{
+    let (sub_drain_signal, sub_drain) = new();
+    let (trigger_force_shutdown, force_shutdown) = watch::channel(());
+    // Stop accepting once we drain.
+    // We will then allow connections up to `deadline` to terminate on their own.
+    // After that, they will be forcefully terminated.
+    let fut = make_future(sub_drain, force_shutdown).in_current_span();
+    tokio::select! {
+        _res = fut => {}
+        res = drain.wait_for_drain() => {
+            if res.mode() == DrainMode::Graceful {
+                debug!(component, "drain started, waiting {:?} for any connections to complete", deadline);
+                if tokio::time::timeout(deadline, sub_drain_signal.start_drain_and_wait(DrainMode::Graceful)).await.is_err() {
+                    // Not all connections completed within time, we will force shut them down
+                    warn!(component, "drain duration expired with pending connections, forcefully shutting down");
+                }
+            } else {
+                debug!(component, "terminating");
+            }
+            // Trigger force shutdown. In theory, this is only needed in the timeout case. However,
+            // it doesn't hurt to always trigger it.
+            let _ = trigger_force_shutdown.send(());
+
+            info!(component, "shutdown complete");
+            drop(res);
+        }
+    };
+}
+
+mod internal {
+    use tokio::sync::{mpsc, watch};
+
+    /// Creates a drain channel.
+    ///
+    /// The `Signal` is used to start a drain, and the `Watch` will be notified
+    /// when a drain is signaled.
+    pub fn channel() -> (Signal, Watch) {
+        let (signal_tx, signal_rx) = watch::channel(None);
+        let (drained_tx, drained_rx) = mpsc::channel(1);
+
+        let signal = Signal {
+            drained_rx,
+            signal_tx,
+        };
+        let watch = Watch {
+            drained_tx,
+            signal_rx,
+        };
+        (signal, watch)
+    }
+
+    enum Never {}
+
+    #[derive(Debug, Clone, Copy, PartialEq)]
+    pub enum DrainMode {
+        Immediate,
+        Graceful,
+    }
+
+    /// Send a drain command to all watchers.
+    pub struct Signal {
+        drained_rx: mpsc::Receiver<Never>,
+        signal_tx: watch::Sender<Option<DrainMode>>,
+    }
+
+    /// Watch for a drain command.
+    ///
+    /// All `Watch` instances must be dropped for a `Signal::signal` call to
+    /// complete.
+    #[derive(Clone)]
+    pub struct Watch {
+        drained_tx: mpsc::Sender<Never>,
+        signal_rx: watch::Receiver<Option<DrainMode>>,
+    }
+
+    #[must_use = "ReleaseShutdown should be dropped explicitly to release the runtime"]
+    #[derive(Clone)]
+    #[allow(dead_code)]
+    pub struct ReleaseShutdown(mpsc::Sender<Never>, DrainMode);
+
+    impl ReleaseShutdown {
+        pub fn mode(&self) -> DrainMode {
+            self.1
+        }
+    }
+
+    impl Signal {
+        /// Waits for all [`Watch`] instances to be dropped.
+        pub async fn closed(&mut self) {
+            self.signal_tx.closed().await;
+        }
+
+        /// Asynchronously signals all watchers to begin draining gracefully and waits for all
+        /// handles to be dropped.
+        pub async fn start_drain_and_wait(mut self, mode: DrainMode) {
+            // Update the state of the signal watch so that all watchers are observe
+            // the change.
+            let _ = self.signal_tx.send(Some(mode));
+
+            // Wait for all watchers to release their drain handle.
+            match self.drained_rx.recv().await {
+                None => {}
+                Some(n) => match n {},
+            }
+        }
+    }
+
+    impl Watch {
+        /// Returns a `ReleaseShutdown` handle after the drain has been signaled. The
+        /// handle must be dropped when a shutdown action has been completed to
+        /// unblock graceful shutdown.
+        pub async fn wait_for_drain(mut self) -> ReleaseShutdown {
+            // This future completes once `Signal::signal` has been invoked so that
+            // the channel's state is updated.
+            let mode = self
+                .signal_rx
+                .wait_for(Option::is_some)
+                .await
+                .map(|mode| mode.expect("already asserted it is_some"))
+                // If we got an error, then the signal was dropped entirely. Presumably this means a graceful shutdown is not required.
+                .unwrap_or(DrainMode::Immediate);
+
+            // Return a handle that holds the drain channel, so that the signal task
+            // is only notified when all handles have been dropped.
+            ReleaseShutdown(self.drained_tx, mode)
+        }
+    }
+
+    impl std::fmt::Debug for Signal {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            f.debug_struct("Signal").finish_non_exhaustive()
+        }
+    }
+
+    impl std::fmt::Debug for Watch {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            f.debug_struct("Watch").finish_non_exhaustive()
+        }
+    }
+
+    impl std::fmt::Debug for ReleaseShutdown {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            f.debug_struct("ReleaseShutdown").finish_non_exhaustive()
+        }
+    }
+}
--- a/src/hyper_util.rs
+++ b/src/hyper_util.rs
@ -22,8 +22,9 @@ use std::{
    time::{Duration, Instant},
 };

+use crate::drain::DrainWatcher;
+use crate::{config, proxy};
 use bytes::Bytes;
-use drain::Watch;
 use futures_util::TryFutureExt;
 use http_body_util::Full;
 use hyper::client;
@ -33,7 +34,7 @@ use hyper::{Request, Response};
 use hyper_util::client::legacy::connect::HttpConnector;
 use tokio::net::{TcpListener, TcpStream};
 use tokio_stream::Stream;
-use tracing::{debug, info, warn};
+use tracing::{Instrument, debug, info, warn};

 use crate::tls::ServerCertProvider;

@ -45,6 +46,9 @@ pub fn tls_server<T: ServerCertProvider + Clone + 'static>(

    tls_listener::builder(crate::tls::InboundAcceptor::new(cert_provider))
        .listen(listener)
+        .take_while(|item| {
+            !matches!(item, Err(tls_listener::Error::ListenerError(e)) if proxy::util::is_runtime_shutdown(e))
+        })
        .filter_map(|conn| {
            // Avoid 'By default, if a client fails the TLS handshake, that is treated as an error, and the TlsListener will return an Err'
            match conn {
@ -73,8 +77,9 @@ where
    F: std::future::Future + Send + 'static,
    F::Output: Send + 'static,
 {
+    #[inline]
    fn execute(&self, fut: F) {
-        tokio::task::spawn(fut);
+        tokio::task::spawn(fut.in_current_span());
    }
 }

@ -151,7 +156,7 @@ pub fn http2_client() -> client::conn::http2::Builder<TokioExecutor> {

 pub fn pooling_client<B>() -> ::hyper_util::client::legacy::Client<HttpConnector, B>
 where
-    B: http_body_1::Body + Send,
+    B: http_body::Body + Send,
    B::Data: Send,
 {
    ::hyper_util::client::legacy::Client::builder(::hyper_util::rt::TokioExecutor::new())
@ -179,24 +184,36 @@ pub fn plaintext_response(code: hyper::StatusCode, body: String) -> Response<Ful
 /// * Draining
 pub struct Server<S> {
    name: String,
-    bind: TcpListener,
-    drain_rx: Watch,
+    binds: Vec<TcpListener>,
+    drain_rx: DrainWatcher,
    state: S,
 }

 impl<S> Server<S> {
-    pub async fn bind(name: &str, addr: SocketAddr, drain_rx: Watch, s: S) -> anyhow::Result<Self> {
-        let bind = TcpListener::bind(&addr).await?;
+    pub async fn bind(
+        name: &str,
+        addrs: config::Address,
+        drain_rx: DrainWatcher,
+        s: S,
+    ) -> anyhow::Result<Self> {
+        let mut binds = vec![];
+        for addr in addrs.into_iter() {
+            binds.push(TcpListener::bind(&addr).await?)
+        }
        Ok(Server {
            name: name.to_string(),
-            bind,
+            binds,
            drain_rx,
            state: s,
        })
    }

    pub fn address(&self) -> SocketAddr {
-        self.bind.local_addr().expect("local address must be ready")
+        self.binds
+            .first()
+            .expect("must have at least one address")
+            .local_addr()
+            .expect("local address must be ready")
    }

    pub fn state_mut(&mut self) -> &mut S {
@ -211,10 +228,7 @@ impl<S> Server<S> {
    {
        use futures_util::StreamExt as OtherStreamExt;
        let address = self.address();
-        let drain_stream = self.drain_rx.clone();
-        let drain_connections = self.drain_rx;
-        let _name = self.name.clone();
-        // let (tx, rx) = oneshot::channel();
+        let drain = self.drain_rx;
        let state = Arc::new(self.state);
        let f = Arc::new(f);
        info!(
@ -222,52 +236,61 @@ impl<S> Server<S> {
            component=self.name,
            "listener established",
        );
-        tokio::spawn(async move {
-            let stream = tokio_stream::wrappers::TcpListenerStream::new(self.bind);
-            let mut stream = stream.take_until(Box::pin(drain_stream.signaled()));
-            while let Some(Ok(socket)) = stream.next().await {
-                socket.set_nodelay(true).unwrap();
-                let drain = drain_connections.clone();
-                let f = f.clone();
-                let state = state.clone();
-                tokio::spawn(async move {
-                    let serve =
-                        http1_server()
-                            .half_close(true)
-                            .header_read_timeout(Duration::from_secs(2))
-                            .max_buf_size(8 * 1024)
-                            .serve_connection(
-                                hyper_util::rt::TokioIo::new(socket),
-                                hyper::service::service_fn(move |req| {
-                                    let state = state.clone();
+        for bind in self.binds {
+            let drain_stream = drain.clone();
+            let drain_connections = drain.clone();
+            let state = state.clone();
+            let name = self.name.clone();
+            let f = f.clone();
+            tokio::spawn(async move {
+                let stream = tokio_stream::wrappers::TcpListenerStream::new(bind);
+                let mut stream = stream.take_until(Box::pin(drain_stream.wait_for_drain()));
+                while let Some(Ok(socket)) = stream.next().await {
+                    socket.set_nodelay(true).unwrap();
+                    let drain = drain_connections.clone();
+                    let f = f.clone();
+                    let state = state.clone();
+                    tokio::spawn(async move {
+                        let serve =
+                            http1_server()
+                                .half_close(true)
+                                .header_read_timeout(Duration::from_secs(2))
+                                .max_buf_size(8 * 1024)
+                                .serve_connection(
+                                    hyper_util::rt::TokioIo::new(socket),
+                                    hyper::service::service_fn(move |req| {
+                                        let state = state.clone();

-                                    // Failures would abort the whole connection; we just want to return an HTTP error
-                                    f(state, req).or_else(|err| async move {
-                                        Ok::<Response<Full<Bytes>>, Infallible>(Response::builder()
-                                        .status(hyper::StatusCode::INTERNAL_SERVER_ERROR)
-                                        .body(err.to_string().into())
-                                        .expect("builder with known status code should not fail"))
-                                    })
-                                }),
-                            );
-                    // Wait for drain to signal or connection serving to complete
-                    match futures_util::future::select(Box::pin(drain.signaled()), serve).await {
-                        // We got a shutdown request. Start gracful shutdown and wait for the pending requests to complete.
-                        futures_util::future::Either::Left((_shutdown, mut serve)) => {
-                            let drain = std::pin::Pin::new(&mut serve);
-                            drain.graceful_shutdown();
-                            serve.await
+                                        // Failures would abort the whole connection; we just want to return an HTTP error
+                                        f(state, req).or_else(|err| async move {
+                                            Ok::<Response<Full<Bytes>>, Infallible>(Response::builder()
+                                                .status(hyper::StatusCode::INTERNAL_SERVER_ERROR)
+                                                .body(err.to_string().into())
+                                                .expect("builder with known status code should not fail"))
+                                        })
+                                    }),
+                                );
+                        // Wait for drain to signal or connection serving to complete
+                        match futures_util::future::select(Box::pin(drain.wait_for_drain()), serve)
+                            .await
+                        {
+                            // We got a shutdown request. Start gracful shutdown and wait for the pending requests to complete.
+                            futures_util::future::Either::Left((_shutdown, mut serve)) => {
+                                let drain = std::pin::Pin::new(&mut serve);
+                                drain.graceful_shutdown();
+                                serve.await
+                            }
+                            // Serving finished, just return the result.
+                            futures_util::future::Either::Right((serve, _shutdown)) => serve,
                        }
-                        // Serving finished, just return the result.
-                        futures_util::future::Either::Right((serve, _shutdown)) => serve,
-                    }
-                });
-            }
-            info!(
-                %address,
-                component=self.name,
-                "listener drained",
-            );
-        });
+                    });
+                }
+                info!(
+                    %address,
+                    component=name,
+                    "listener drained",
+                );
+            });
+        }
    }
 }
--- a/src/identity.rs
+++ b/src/identity.rs
@ -23,13 +23,14 @@ pub mod manager;
 pub use manager::*;

 mod auth;
+use crate::state::WorkloadInfo;
 pub use auth::*;

 #[cfg(any(test, feature = "testing"))]
 pub mod mock {
    pub use super::caclient::mock::CaClient;
    pub use super::manager::mock::{
-        new_secret_manager, new_secret_manager_cfg, Config as SecretManagerConfig,
+        Config as SecretManagerConfig, new_secret_manager, new_secret_manager_cfg,
    };
 }

@ -38,7 +39,7 @@ pub enum Error {
    #[error("failed to create CSR: {0}")]
    Signing(Arc<tls::Error>),
    #[error("signing gRPC error ({}): {}", .0.code(), .0.message())]
-    SigningRequest(#[from] tonic::Status),
+    SigningRequest(#[from] Box<tonic::Status>),
    #[error("failed to process string: {0}")]
    Utf8(#[from] Utf8Error),
    #[error("did not find expected SAN: {0}")]
@ -47,8 +48,12 @@ pub enum Error {
    EmptyResponse(Identity),
    #[error("invalid spiffe identity: {0}")]
    Spiffe(String),
+    #[error("workload is unknown: {0}")]
+    UnknownWorkload(Arc<WorkloadInfo>),
    #[error("the identity is no longer needed")]
    Forgotten,
+    #[error("BUG: identity requested {0}, but only allowed {1:?}")]
+    BugInvalidIdentityRequest(Identity, Arc<WorkloadInfo>),
 }

 impl From<tls::Error> for Error {
--- a/src/identity/auth.rs
+++ b/src/identity/auth.rs
@ -15,55 +15,50 @@
 use std::io;
 use std::path::PathBuf;

-use tonic::metadata::AsciiMetadataValue;
-use tonic::service::Interceptor;
-use tonic::{Code, Request, Status};
-
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum AuthSource {
    // JWT authentication source which contains the token file path and the cluster id.
    Token(PathBuf, String),
+    // JWT authentication source which contains a static token file.
+    // Note that this token is not refreshed, so its lifetime ought to be longer than ztunnel's
+    StaticToken(String, String),
    None,
 }

-fn load_token(path: &PathBuf) -> io::Result<Vec<u8>> {
-    let t = std::fs::read(path)?;
+impl AuthSource {
+    pub async fn insert_headers(&self, request: &mut http::HeaderMap) -> anyhow::Result<()> {
+        const AUTHORIZATION: &str = "authorization";
+        const CLUSTER: &str = "clusterid";
+        match self {
+            AuthSource::Token(path, cluster_id) => {
+                let token = load_token(path).await.map(|mut t| {
+                    let mut bearer: Vec<u8> = b"Bearer ".to_vec();
+                    bearer.append(&mut t);
+                    bearer
+                })?;
+                request.insert(AUTHORIZATION, token.try_into()?);
+                request.insert(CLUSTER, cluster_id.try_into()?);
+            }
+            AuthSource::StaticToken(token, cluster_id) => {
+                let token = {
+                    let mut bearer: Vec<u8> = b"Bearer ".to_vec();
+                    bearer.extend_from_slice(token.as_bytes());
+                    bearer
+                };
+                request.insert(AUTHORIZATION, token.try_into()?);
+                request.insert(CLUSTER, cluster_id.try_into()?);
+            }
+            AuthSource::None => {}
+        }
+        Ok(())
+    }
+}
+
+async fn load_token(path: &PathBuf) -> io::Result<Vec<u8>> {
+    let t = tokio::fs::read(path).await?;

    if t.is_empty() {
-        return Err(io::Error::new(
-            io::ErrorKind::Other,
-            "token file exists, but was empty",
-        ));
+        return Err(io::Error::other("token file exists, but was empty"));
    }
    Ok(t)
 }
-
-impl Interceptor for AuthSource {
-    fn call(&mut self, mut request: Request<()>) -> Result<Request<()>, Status> {
-        match self {
-            AuthSource::Token(path, cluster_id) => {
-                let token = load_token(path)
-                    .map_err(|e| Status::new(Code::Unauthenticated, e.to_string()))
-                    .map(|mut t| {
-                        let mut bearer: Vec<u8> = b"Bearer ".to_vec();
-                        bearer.append(&mut t);
-                        bearer
-                    })
-                    .and_then(|b| {
-                        AsciiMetadataValue::try_from(b)
-                            .map_err(|e| Status::new(Code::Unauthenticated, e.to_string()))
-                    })?;
-
-                request.metadata_mut().insert("authorization", token);
-                if !cluster_id.is_empty() {
-                    let id = AsciiMetadataValue::try_from(cluster_id.as_bytes().to_vec())
-                        .map_err(|e| Status::new(Code::Unauthenticated, e.to_string()))?;
-                    request.metadata_mut().insert("clusterid", id);
-                }
-            }
-            // When no token based authentication is required, do not load or insert the token.
-            AuthSource::None => {}
-        }
-        Ok(request)
-    }
-}
--- a/src/identity/caclient.rs
+++ b/src/identity/caclient.rs
@ -15,42 +15,44 @@
 use std::collections::BTreeMap;

 use async_trait::async_trait;
-use prost_types::value::Kind;
 use prost_types::Struct;
-use tonic::codegen::InterceptedService;
-
-use tracing::{error, instrument, warn};
+use prost_types::value::Kind;
+use tonic::IntoRequest;
+use tonic::metadata::{AsciiMetadataKey, AsciiMetadataValue};
+use tracing::{debug, error, instrument, warn};

+use crate::identity::Error;
 use crate::identity::auth::AuthSource;
 use crate::identity::manager::Identity;
-use crate::identity::Error;
 use crate::tls::{self, TlsGrpcChannel};
-use crate::xds::istio::ca::istio_certificate_service_client::IstioCertificateServiceClient;
 use crate::xds::istio::ca::IstioCertificateRequest;
+use crate::xds::istio::ca::istio_certificate_service_client::IstioCertificateServiceClient;

 pub struct CaClient {
-    pub client: IstioCertificateServiceClient<InterceptedService<TlsGrpcChannel, AuthSource>>,
+    pub client: IstioCertificateServiceClient<TlsGrpcChannel>,
    pub enable_impersonated_identity: bool,
    pub secret_ttl: i64,
+    ca_headers: Vec<(AsciiMetadataKey, AsciiMetadataValue)>,
 }

 impl CaClient {
    pub async fn new(
        address: String,
-        cert_provider: Box<dyn tls::ClientCertProvider>,
+        alt_hostname: Option<String>,
+        cert_provider: Box<dyn tls::ControlPlaneClientCertProvider>,
        auth: AuthSource,
        enable_impersonated_identity: bool,
        secret_ttl: i64,
+        ca_headers: Vec<(AsciiMetadataKey, AsciiMetadataValue)>,
    ) -> Result<CaClient, Error> {
-        let svc = tls::grpc_connector(address, cert_provider.fetch_cert().await?)?;
-        // let client = IstioCertificateServiceClient::new(svc);
-        // let svc =
-        //     tower_hyper_http_body_compat::Hyper1HttpServiceAsTowerService03HttpService::new(svc);
-        let client = IstioCertificateServiceClient::with_interceptor(svc, auth);
+        let svc =
+            tls::grpc_connector(address, auth, cert_provider.fetch_cert(alt_hostname).await?)?;
+        let client = IstioCertificateServiceClient::new(svc);
        Ok(CaClient {
            client,
            enable_impersonated_identity,
            secret_ttl,
+            ca_headers,
        })
    }
 }
@ -65,7 +67,7 @@ impl CaClient {
        let csr = cs.csr;
        let private_key = cs.private_key;

-        let req = IstioCertificateRequest {
+        let mut req = tonic::Request::new(IstioCertificateRequest {
            csr,
            validity_duration: self.secret_ttl,
            metadata: {
@ -82,13 +84,23 @@ impl CaClient {
                    None
                }
            },
-        };
+        });
+        self.ca_headers.iter().for_each(|(k, v)| {
+            req.metadata_mut().insert(k.clone(), v.clone());
+
+            if let Ok(v_str) = v.to_str() {
+                debug!("CA header added: {}={}", k, v_str);
+            }
+        });
+
        let resp = self
            .client
            .clone()
-            .create_certificate(req)
-            .await?
+            .create_certificate(req.into_request())
+            .await
+            .map_err(Box::new)?
            .into_inner();
+
        let leaf = resp
            .cert_chain
            .first()
@ -102,12 +114,8 @@ impl CaClient {
        };
        let certs = tls::WorkloadCertificate::new(&private_key, leaf, chain)?;
        // Make the certificate actually matches the identity we requested.
-        if self.enable_impersonated_identity && certs.cert.identity().as_ref() != Some(id) {
-            error!(
-                "expected identity {:?}, got {:?}",
-                id,
-                certs.cert.identity()
-            );
+        if self.enable_impersonated_identity && certs.identity().as_ref() != Some(id) {
+            error!("expected identity {:?}, got {:?}", id, certs.identity());
            return Err(Error::SanError(id.to_owned()));
        }
        Ok(certs)
@ -136,7 +144,8 @@ pub mod mock {
    #[derive(Default)]
    struct ClientState {
        fetches: Vec<Identity>,
-        gen: tls::mock::CertGenerator,
+        error: bool,
+        cert_gen: tls::mock::CertGenerator,
    }

    #[derive(Clone)]
@ -217,29 +226,19 @@ pub mod mock {
            let not_after = not_before + self.cfg.cert_lifetime;

            let mut state = self.state.write().await;
+            if state.error {
+                return Err(Error::Spiffe("injected test error".into()));
+            }
            let certs = state
-                .gen
+                .cert_gen
                .new_certs(&id.to_owned().into(), not_before, not_after);
            state.fetches.push(id.to_owned());
            Ok(certs)
        }

        pub async fn set_error(&mut self, error: bool) {
-            if error {
-                let mut state = self.state.write().await;
-                state.fetches.push(Identity::Spiffe {
-                    trust_domain: "error".to_string(),
-                    namespace: "error".to_string(),
-                    service_account: "error".to_string(),
-                });
-            } else {
-                let mut state = self.state.write().await;
-                state.fetches.push(Identity::Spiffe {
-                    trust_domain: "success".to_string(),
-                    namespace: "success".to_string(),
-                    service_account: "success".to_string(),
-                });
-            }
+            let mut state = self.state.write().await;
+            state.error = error;
        }
    }

@ -256,7 +255,7 @@ pub mod mock {

 #[cfg(test)]
 mod tests {
-    use std::iter;
+
    use std::time::Duration;

    use matches::assert_matches;
@ -285,9 +284,9 @@ mod tests {
    #[tokio::test]
    async fn wrong_identity() {
        let id = Identity::Spiffe {
-            service_account: "wrong-sa".to_string(),
-            namespace: "foo".to_string(),
-            trust_domain: "cluster.local".to_string(),
+            service_account: "wrong-sa".into(),
+            namespace: "foo".into(),
+            trust_domain: "cluster.local".into(),
        };
        let certs = tls::mock::generate_test_certs(
            &id.into(),
@ -296,10 +295,7 @@ mod tests {
        );

        let res = test_ca_client_with_response(IstioCertificateResponse {
-            cert_chain: iter::once(certs.cert)
-                .chain(certs.chain)
-                .map(|c| c.as_pem())
-                .collect(),
+            cert_chain: certs.full_chain_and_roots(),
        })
        .await;
        assert_matches!(res, Err(Error::SanError(_)));
@ -314,10 +310,7 @@ mod tests {
        );

        let res = test_ca_client_with_response(IstioCertificateResponse {
-            cert_chain: iter::once(certs.cert)
-                .chain(certs.chain)
-                .map(|c| c.as_pem())
-                .collect(),
+            cert_chain: certs.full_chain_and_roots(),
        })
        .await;
        assert_matches!(res, Ok(_));
--- a/src/identity/manager.rs
+++ b/src/identity/manager.rs
@ -24,45 +24,32 @@ use crate::config::ProxyMode;
 use async_trait::async_trait;

 use prometheus_client::encoding::{EncodeLabelValue, LabelValueEncoder};
-use tokio::sync::{mpsc, watch, Mutex};
-use tokio::time::{sleep_until, Duration, Instant};
+use tokio::sync::{Mutex, mpsc, watch};
+use tokio::time::{Duration, Instant, sleep_until};

-use crate::tls;
+use crate::{strng, tls};

 use super::CaClient;
 use super::Error::{self, Spiffe};

-use backoff::{backoff::Backoff, ExponentialBackoff};
+use crate::strng::Strng;
+use backoff::{ExponentialBackoff, backoff::Backoff};
 use keyed_priority_queue::KeyedPriorityQueue;

 const CERT_REFRESH_FAILURE_RETRY_DELAY_MAX_INTERVAL: Duration = Duration::from_secs(150);

+/// Default trust domain to use if not otherwise specified.
+pub const DEFAULT_TRUST_DOMAIN: &str = "cluster.local";
+
 #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
 pub enum Identity {
    Spiffe {
-        trust_domain: String,
-        namespace: String,
-        service_account: String,
+        trust_domain: Strng,
+        namespace: Strng,
+        service_account: Strng,
    },
 }

-// struct PrioritizedFetch {
-//     identity: Identity,
-//     priority: Priority
-// }
-//
-// impl Ord for PrioritizedFetch {
-//     fn cmp(&self, other: &Self) -> Ordering {
-//         self.cmp(other)
-//     }
-// }
-//
-// impl PartialOrd for PrioritizedFetch {
-//     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-//         Some(self.cmp(other))
-//     }
-// }
-
 impl EncodeLabelValue for Identity {
    fn encode(&self, writer: &mut LabelValueEncoder) -> Result<(), std::fmt::Error> {
        writer.write_str(&self.to_string())
@ -95,9 +82,9 @@ impl FromStr for Identity {
            return Err(Spiffe(s.to_string()));
        }
        Ok(Identity::Spiffe {
-            trust_domain: split[0].to_string(),
-            namespace: split[2].to_string(),
-            service_account: split[4].to_string(),
+            trust_domain: split[0].into(),
+            namespace: split[2].into(),
+            service_account: split[4].into(),
        })
    }
 }
@ -117,15 +104,41 @@ impl fmt::Display for Identity {
    }
 }

+impl Identity {
+    pub fn from_parts(td: Strng, ns: Strng, sa: Strng) -> Identity {
+        Identity::Spiffe {
+            trust_domain: td,
+            namespace: ns,
+            service_account: sa,
+        }
+    }
+
+    pub fn to_strng(self: &Identity) -> Strng {
+        match self {
+            Identity::Spiffe {
+                trust_domain,
+                namespace,
+                service_account,
+            } => strng::format!("spiffe://{trust_domain}/ns/{namespace}/sa/{service_account}"),
+        }
+    }
+
+    pub fn trust_domain(&self) -> Strng {
+        match self {
+            Identity::Spiffe { trust_domain, .. } => trust_domain.clone(),
+        }
+    }
+}
+
+#[cfg(any(test, feature = "testing"))]
 impl Default for Identity {
    fn default() -> Self {
-        const TRUST_DOMAIN: &str = "cluster.local";
        const SERVICE_ACCOUNT: &str = "ztunnel";
        const NAMESPACE: &str = "istio-system";
        Identity::Spiffe {
-            trust_domain: TRUST_DOMAIN.to_string(),
-            namespace: NAMESPACE.to_string(),
-            service_account: SERVICE_ACCOUNT.to_string(),
+            trust_domain: DEFAULT_TRUST_DOMAIN.into(),
+            namespace: NAMESPACE.into(),
+            service_account: SERVICE_ACCOUNT.into(),
        }
    }
 }
@ -237,8 +250,8 @@ impl Worker {
    // Manages certificate updates. Since all the work is done in a single task, the code is
    // lock-free. This is OK as the code is I/O bound so we don't need the extra parallelism.
    async fn run(&self, mut requests: mpsc::Receiver<Request>) {
-        use futures::stream::FuturesUnordered;
        use futures::StreamExt;
+        use futures::stream::FuturesUnordered;

        #[derive(Eq, PartialEq)]
        enum Fetch {
@ -261,19 +274,11 @@ impl Worker {
        // refresh. In other words, at any point in time, there are no high-priority
        // (not Background) items scheduled to run in the future.
        let mut pending: KeyedPriorityQueue<Identity, PendingPriority> = KeyedPriorityQueue::new();
-        // The backoff strategy used for retrying operations. Sets the initial values for the backoff.
-        // The values are chosen to be reasonable for the CA client to be able to recover from transient
-        // errors.
-        let mut cert_backoff = ExponentialBackoff {
-            initial_interval: Duration::from_millis(500),
-            current_interval: Duration::from_secs(1),
-            // The maximum interval is set to 150 seconds, which is the maximum time the backoff will
-            // wait to retry a cert again.
-            max_interval: CERT_REFRESH_FAILURE_RETRY_DELAY_MAX_INTERVAL,
-            multiplier: 2.0,
-            randomization_factor: 0.2,
-            ..Default::default()
-        };
+        // The set of pending Identity requests with backoffs (i.e. pending requests that have already failed at least once).
+        // Basically, each cert fetch attempt gets its own backoff.
+        // This avoids delays where a fetch of identity A for pod A needlessly stalls the refetch of
+        // identity B for pod B. Kept separate from the `pending` KeyedPriorityKey for convenience.
+        let mut pending_backoffs_by_id: HashMap<Identity, ExponentialBackoff> = HashMap::new();

        'main: loop {
            let next = pending.peek().map(|(_, PendingPriority(_, ts))| *ts);
@ -332,6 +337,7 @@ impl Worker {

                // Handle fetch results.
                Some((id, res)) = fetches.next() => {
+                    tracing::trace!(%id, "fetch complete");
                    match processing.remove(&id) {
                        Some(Fetch::Processing) => (),
                        Some(Fetch::Forgetting) => continue 'main,
@ -352,13 +358,41 @@ impl Worker {
                            //
                            // randomized interval =
                            //     retry_interval * (random value in range [1 - randomization_factor, 1 + randomization_factor])
-                            let refresh_at = Instant::now() + cert_backoff.next_backoff().unwrap_or(CERT_REFRESH_FAILURE_RETRY_DELAY_MAX_INTERVAL);
+                            //
+                            // Note that we are using a backoff-per-unique-identity-request. This is to prevent issues
+                            // when a cert cannot be fetched for Pod A, but that should not stall retries for
+                            // pods B, C, and D.
+                            let mut keyed_backoff = match pending_backoffs_by_id.remove(&id) {
+                                Some(backoff) => {
+                                    backoff
+                                },
+                                None => {
+                                    // The backoff strategy used for retrying operations. Sets the initial values for the backoff.
+                                    // The values are chosen to be reasonable for the CA client to be able to recover from transient
+                                    // errors.
+                                    ExponentialBackoff {
+                                        initial_interval: Duration::from_millis(500),
+                                        current_interval: Duration::from_secs(1),
+                                        // The maximum interval is set to 150 seconds, which is the maximum time the backoff will
+                                        // wait to retry a cert again.
+                                        max_interval: CERT_REFRESH_FAILURE_RETRY_DELAY_MAX_INTERVAL,
+                                        multiplier: 2.0,
+                                        randomization_factor: 0.2,
+                                        ..Default::default()
+                                    }
+                                }
+                            };
+                            let retry = keyed_backoff.next_backoff().unwrap_or(CERT_REFRESH_FAILURE_RETRY_DELAY_MAX_INTERVAL);
+                            // Store the per-key backoff, we're gonna retry.
+                            pending_backoffs_by_id.insert(id.clone(), keyed_backoff);
+                            tracing::debug!(%id, "certificate fetch failed ({err}), retrying in {retry:?}");
+                            let refresh_at = Instant::now() + retry;
                            (CertState::Unavailable(err), refresh_at)
                        },
                        Ok(certs) => {
-                            // Reset the backoff on success.
-                            // [`reset`](https://docs.rs/backoff/0.4.0/backoff/backoff/trait.Backoff.html#method.reset)
-                            cert_backoff.reset();
+                             tracing::debug!(%id, "certificate fetch succeeded");
+                            // Reset (pop and drop) the backoff on success.
+                            pending_backoffs_by_id.remove(&id);
                            let certs: tls::WorkloadCertificate = certs; // Type annotation.
                            let refresh_at = self.time_conv.system_time_to_instant(certs.refresh_at());
                            let refresh_at = if let Some(t) = refresh_at {
@ -441,7 +475,7 @@ fn push_increase<TKey: Hash + Eq, TPriority: Ord>(
    key: TKey,
    priority: TPriority,
 ) {
-    if kp.get_priority(&key).map_or(true, |p| priority > *p) {
+    if kp.get_priority(&key).is_none_or(|p| priority > *p) {
        kp.push(key, priority);
    }
 }
@ -463,15 +497,19 @@ impl fmt::Debug for SecretManager {
 }

 impl SecretManager {
-    pub async fn new(cfg: crate::config::Config) -> Result<Self, Error> {
+    pub async fn new(cfg: Arc<crate::config::Config>) -> Result<Self, Error> {
        let caclient = CaClient::new(
-            cfg.ca_address.expect("ca_address must be set to use CA"),
+            cfg.ca_address
+                .clone()
+                .expect("ca_address must be set to use CA"),
+            cfg.alt_ca_hostname.clone(),
            Box::new(tls::ControlPlaneAuthentication::RootCert(
                cfg.ca_root_cert.clone(),
            )),
-            cfg.auth,
+            cfg.auth.clone(),
            cfg.proxy_mode == ProxyMode::Shared,
            cfg.secret_ttl.as_secs().try_into().unwrap_or(60 * 60 * 24),
+            cfg.ca_headers.vec.clone(),
        )
        .await?;
        Ok(Self::new_with_client(caclient))
@ -672,15 +710,16 @@ mod tests {

    use crate::identity::caclient::mock::CaClient as MockCaClient;
    use crate::identity::{self, *};
+    use crate::strng;

    use super::{mock, *};

    async fn stress_many_ids(sm: Arc<SecretManager>, iterations: u32) {
        for i in 0..iterations {
            let id = identity::Identity::Spiffe {
-                trust_domain: "cluster.local".to_string(),
-                namespace: "istio-system".to_string(),
-                service_account: format!("ztunnel{i}"),
+                trust_domain: "cluster.local".into(),
+                namespace: "istio-system".into(),
+                service_account: strng::format!("ztunnel{i}"),
            };
            sm.fetch_certificate(&id)
                .await
@ -702,19 +741,12 @@ mod tests {
        }
    }

-    async fn verify_cert_updates(
-        sm: Arc<SecretManager>,
-        id: Identity,
-        dur: Duration,
-        cert_lifetime: Duration,
-    ) {
-        let start_time = time::Instant::now();
-        let expected_update_interval = cert_lifetime.as_millis() / 2;
-        let mut total_updates = 0;
-        let mut current_cert = sm
+    async fn verify_cert_updates(sm: Arc<SecretManager>, id: Identity) {
+        let current_cert = sm
            .fetch_certificate(&id)
            .await
            .expect("Didn't get a cert as expected.");
+        // We should loop until we get a new cert provisioned
        loop {
            let new_cert = sm
                .fetch_certificate(&id)
@ -722,15 +754,13 @@ mod tests {
                .expect("Didn't get a cert as expected.");

            if current_cert.cert.serial() != new_cert.cert.serial() {
-                total_updates += 1;
-                current_cert = new_cert;
-            }
-            if time::Instant::now() - start_time > dur {
-                break;
+                let new = new_cert.cert.expiration().not_before;
+                let old = current_cert.cert.expiration().not_before;
+                assert!(old < new, "new cert should be newer");
+                return;
            }
            tokio::time::sleep(Duration::from_micros(100)).await;
        }
-        assert_eq!(total_updates, dur.as_millis() / expected_update_interval);
    }

    #[tokio::test(flavor = "multi_thread", worker_threads = 8)]
@ -748,38 +778,31 @@ mod tests {
        assert_eq!(100, secret_manager.cache_len().await);
    }

-    #[tokio::test(flavor = "multi_thread", worker_threads = 8)]
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
    async fn test_cache_refresh() {
-        let mut tasks: Vec<tokio::task::JoinHandle<()>> = Vec::new();
        let test_dur = Duration::from_millis(200);

        let id: Identity = Default::default();

-        // Certs added to the cache should be refreshed every 80 millis
-        let cert_lifetime = Duration::from_millis(160);
+        // Certs added to the cache should be refreshed every 25 millis
+        let cert_lifetime = Duration::from_millis(50);
        let secret_manager = mock::new_secret_manager(cert_lifetime);

-        // Spawn task that verifies cert updates.
-        tasks.push(tokio::spawn(verify_cert_updates(
-            secret_manager.clone(),
-            id.clone(),
-            test_dur,
-            cert_lifetime,
-        )));
-
        // Start spamming fetches for that cert.
-        for _n in 0..7 {
-            tasks.push(tokio::spawn(stress_single_id(
+        for _n in 0..3 {
+            tokio::spawn(stress_single_id(
                secret_manager.clone(),
                id.clone(),
                test_dur,
-            )));
+            ));
        }

-        let results = futures::future::join_all(tasks).await;
-        for result in results.iter() {
-            assert!(result.is_ok());
-        }
+        tokio::time::timeout(
+            Duration::from_secs(2),
+            verify_cert_updates(secret_manager.clone(), id.clone()),
+        )
+        .await
+        .unwrap();
    }

    fn collect_strings<T: IntoIterator>(xs: T) -> Vec<String>
@ -847,17 +870,17 @@ mod tests {

    fn identity(name: &str) -> Identity {
        Identity::Spiffe {
-            trust_domain: "test".to_string(),
-            namespace: "test".to_string(),
-            service_account: name.to_string(),
+            trust_domain: "test".into(),
+            namespace: "test".into(),
+            service_account: name.into(),
        }
    }

    fn identity_n(name: &str, n: u8) -> Identity {
        Identity::Spiffe {
-            trust_domain: "test".to_string(),
-            namespace: "test".to_string(),
-            service_account: format!("{name}{n}"),
+            trust_domain: "test".into(),
+            namespace: "test".into(),
+            service_account: strng::format!("{name}{n}"),
        }
    }

@ -1073,23 +1096,17 @@ mod tests {
        test.tear_down().await;
    }

-    #[tokio::test]
+    #[tokio::test(start_paused = true)]
    async fn test_backoff_resets_on_successful_fetch_after_failure() {
        let mut test = setup(1);
        let id = identity("test");
        let sm = test.secret_manager.clone();
-        let fetch = tokio::spawn(async move { sm.fetch_certificate(&id).await });
-        tokio::time::sleep(SEC).await;
-        // The first fetch will fail, but the backoff should reset after the second fetch.
        test.caclient.set_error(true).await;
-        tokio::time::sleep(SEC).await;
-        // The second fetch should fail.
-        test.caclient.set_error(true).await;
-        tokio::time::sleep(SEC).await;
-        // The third fetch should succeed.
+        assert!(sm.fetch_certificate(&id).await.is_err());
        test.caclient.set_error(false).await;
-        assert_matches!(fetch.await.unwrap(), Ok(_));
-        test.tear_down().await;
+        assert!(sm.fetch_certificate(&id).await.is_err());
+        tokio::time::sleep(SEC * 3).await;
+        assert!(sm.fetch_certificate(&id).await.is_ok());
    }

    #[test]
@ -1097,33 +1114,33 @@ mod tests {
        assert_eq!(
            Identity::from_str("spiffe://cluster.local/ns/namespace/sa/service-account").ok(),
            Some(Identity::Spiffe {
-                trust_domain: "cluster.local".to_string(),
-                namespace: "namespace".to_string(),
-                service_account: "service-account".to_string(),
+                trust_domain: "cluster.local".into(),
+                namespace: "namespace".into(),
+                service_account: "service-account".into(),
            })
        );
        assert_eq!(
            Identity::from_str("spiffe://td/ns/ns/sa/sa").ok(),
            Some(Identity::Spiffe {
-                trust_domain: "td".to_string(),
-                namespace: "ns".to_string(),
-                service_account: "sa".to_string(),
+                trust_domain: "td".into(),
+                namespace: "ns".into(),
+                service_account: "sa".into(),
            })
        );
        assert_eq!(
            Identity::from_str("spiffe://td.with.dots/ns/ns.with.dots/sa/sa.with.dots").ok(),
            Some(Identity::Spiffe {
-                trust_domain: "td.with.dots".to_string(),
-                namespace: "ns.with.dots".to_string(),
-                service_account: "sa.with.dots".to_string(),
+                trust_domain: "td.with.dots".into(),
+                namespace: "ns.with.dots".into(),
+                service_account: "sa.with.dots".into(),
            })
        );
        assert_eq!(
            Identity::from_str("spiffe://td/ns//sa/").ok(),
            Some(Identity::Spiffe {
-                trust_domain: "td".to_string(),
-                namespace: "".to_string(),
-                service_account: "".to_string()
+                trust_domain: "td".into(),
+                namespace: "".into(),
+                service_account: "".into()
            })
        );
        assert_matches!(Identity::from_str("td/ns/ns/sa/sa"), Err(_));
--- a/src/inpod.rs
+++ b/src/inpod.rs
@ -42,16 +42,18 @@ pub mod istio {

 #[derive(thiserror::Error, Debug)]
 pub enum Error {
-    #[error("error creating proxy: {0}")]
-    ProxyError(crate::proxy::Error),
+    #[error("error creating proxy {0}: {1}")]
+    ProxyError(String, crate::proxy::Error),
    #[error("error receiving message: {0}")]
    ReceiveMessageError(String),
    #[error("error sending ack: {0}")]
    SendAckError(String),
    #[error("error sending nack: {0}")]
    SendNackError(String),
-    #[error("protocol error")]
-    ProtocolError,
+    #[error("protocol error: {0}")]
+    ProtocolError(String),
+    #[error("announce error: {0}")]
+    AnnounceError(String),
 }

 #[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize)]
--- a/src/inpod/admin.rs
+++ b/src/inpod/admin.rs
@ -39,8 +39,7 @@ pub struct ProxyState {
    )]
    pub connections: Option<ConnectionManager>,

-    #[serde(skip_serializing_if = "Option::is_none", default)]
-    pub info: Option<WorkloadInfo>,
+    pub info: WorkloadInfo,

    // using reference counts to account for possible race between the proxy task that notifies us
    // that a proxy is down, and the proxy factory task that notifies us when it is up.
@ -62,11 +61,7 @@ pub struct WorkloadManagerAdminHandler {
 }

 impl WorkloadManagerAdminHandler {
-    pub fn proxy_pending(
-        &self,
-        uid: &crate::inpod::WorkloadUid,
-        workload_info: &Option<WorkloadInfo>,
-    ) {
+    pub fn proxy_pending(&self, uid: &crate::inpod::WorkloadUid, workload_info: &WorkloadInfo) {
        let mut state = self.state.write().unwrap();

        // don't increment count here, as it is only for up and down. see comment in count.
@ -90,7 +85,7 @@ impl WorkloadManagerAdminHandler {
    pub fn proxy_up(
        &self,
        uid: &crate::inpod::WorkloadUid,
-        workload_info: &Option<WorkloadInfo>,
+        workload_info: &WorkloadInfo,
        cm: Option<ConnectionManager>,
    ) {
        let mut state = self.state.write().unwrap();
@ -100,7 +95,7 @@ impl WorkloadManagerAdminHandler {
                key.count += 1;
                key.state = State::Up;
                key.connections = cm;
-                key.info = workload_info.clone();
+                key.info.clone_from(workload_info);
            }
            None => {
                state.insert(
@ -142,9 +137,9 @@ impl WorkloadManagerAdminHandler {
    }
 }

-impl crate::admin::AdminHandler2 for WorkloadManagerAdminHandler {
+impl crate::admin::AdminHandler for WorkloadManagerAdminHandler {
    fn key(&self) -> &'static str {
-        "workload_state"
+        "workloadState"
    }

    fn handle(&self) -> anyhow::Result<serde_json::Value> {
@ -162,21 +157,20 @@ mod test {
        let data = || serde_json::to_string(&handler.to_json().unwrap()).unwrap();

        let uid1 = crate::inpod::WorkloadUid::new("uid1".to_string());
-        handler.proxy_pending(&uid1, &None);
-        assert_eq!(data(), r#"{"uid1":{"state":"Pending"}}"#);
-        handler.proxy_up(
-            &uid1,
-            &Some(crate::state::WorkloadInfo {
-                name: "name".to_string(),
-                namespace: "ns".to_string(),
-                trust_domain: "td".to_string(),
-                service_account: "sa".to_string(),
-            }),
-            None,
-        );
+        let wli = WorkloadInfo {
+            name: "name".to_string(),
+            namespace: "ns".to_string(),
+            service_account: "sa".to_string(),
+        };
+        handler.proxy_pending(&uid1, &wli);
        assert_eq!(
            data(),
-            r#"{"uid1":{"info":{"name":"name","namespace":"ns","serviceAccount":"sa","trustDomain":"td"},"state":"Up"}}"#
+            r#"{"uid1":{"info":{"name":"name","namespace":"ns","serviceAccount":"sa"},"state":"Pending"}}"#
+        );
+        handler.proxy_up(&uid1, &wli, None);
+        assert_eq!(
+            data(),
+            r#"{"uid1":{"info":{"name":"name","namespace":"ns","serviceAccount":"sa"},"state":"Up"}}"#
        );
        handler.proxy_down(&uid1);
        assert_eq!(data(), "{}");
--- a/src/inpod/config.rs
+++ b/src/inpod/config.rs
@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use crate::config;
+use crate::proxy::DefaultSocketFactory;
+use crate::{config, socket};
 use std::sync::Arc;

 use super::netns::InpodNetns;
@ -21,21 +22,24 @@ pub struct InPodConfig {
    cur_netns: Arc<std::os::fd::OwnedFd>,
    mark: Option<std::num::NonZeroU32>,
    reuse_port: bool,
+    socket_config: config::SocketConfig,
 }

 impl InPodConfig {
    pub fn new(cfg: &config::Config) -> std::io::Result<Self> {
        Ok(InPodConfig {
            cur_netns: Arc::new(InpodNetns::current()?),
-            mark: std::num::NonZeroU32::new(cfg.inpod_mark),
+            mark: std::num::NonZeroU32::new(cfg.packet_mark.expect("in pod requires packet mark")),
            reuse_port: cfg.inpod_port_reuse,
+            socket_config: cfg.socket_config,
        })
    }
    pub fn socket_factory(
        &self,
        netns: InpodNetns,
    ) -> Box<dyn crate::proxy::SocketFactory + Send + Sync> {
-        let sf = InPodSocketFactory::from_cfg(self, netns);
+        let base = crate::proxy::DefaultSocketFactory(self.socket_config);
+        let sf = InPodSocketFactory::from_cfg(base, self, netns);
        if self.reuse_port {
            Box::new(InPodSocketPortReuseFactory::new(sf))
        } else {
@ -52,16 +56,30 @@ impl InPodConfig {
 }

 struct InPodSocketFactory {
+    inner: DefaultSocketFactory,
    netns: InpodNetns,
    mark: Option<std::num::NonZeroU32>,
 }

 impl InPodSocketFactory {
-    fn from_cfg(inpod_config: &InPodConfig, netns: InpodNetns) -> Self {
-        Self::new(netns, inpod_config.mark())
+    fn from_cfg(
+        inner: DefaultSocketFactory,
+        inpod_config: &InPodConfig,
+        netns: InpodNetns,
+    ) -> Self {
+        Self::new(inner, netns, inpod_config.mark())
    }
-    fn new(netns: InpodNetns, mark: Option<std::num::NonZeroU32>) -> Self {
-        Self { netns, mark }
+
+    fn new(
+        inner: DefaultSocketFactory,
+        netns: InpodNetns,
+        mark: Option<std::num::NonZeroU32>,
+    ) -> Self {
+        Self { inner, netns, mark }
+    }
+
+    fn run_in_ns<S, F: FnOnce() -> std::io::Result<S>>(&self, f: F) -> std::io::Result<S> {
+        self.netns.run(f)?
    }

    fn configure<S: std::os::unix::io::AsFd, F: FnOnce() -> std::io::Result<S>>(
@ -79,17 +97,17 @@ impl InPodSocketFactory {

 impl crate::proxy::SocketFactory for InPodSocketFactory {
    fn new_tcp_v4(&self) -> std::io::Result<tokio::net::TcpSocket> {
-        self.configure(tokio::net::TcpSocket::new_v4)
+        self.configure(|| self.inner.new_tcp_v4())
    }

    fn new_tcp_v6(&self) -> std::io::Result<tokio::net::TcpSocket> {
-        self.configure(tokio::net::TcpSocket::new_v6)
+        self.configure(|| self.inner.new_tcp_v6())
    }

-    fn tcp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<tokio::net::TcpListener> {
+    fn tcp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<socket::Listener> {
        let std_sock = self.configure(|| std::net::TcpListener::bind(addr))?;
        std_sock.set_nonblocking(true)?;
-        tokio::net::TcpListener::from_std(std_sock)
+        tokio::net::TcpListener::from_std(std_sock).map(socket::Listener::new)
    }

    fn udp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<tokio::net::UdpSocket> {
@ -97,6 +115,10 @@ impl crate::proxy::SocketFactory for InPodSocketFactory {
        std_sock.set_nonblocking(true)?;
        tokio::net::UdpSocket::from_std(std_sock)
    }
+
+    fn ipv6_enabled_localhost(&self) -> std::io::Result<bool> {
+        self.run_in_ns(|| self.inner.ipv6_enabled_localhost())
+    }
 }

 // Same as socket factory, but sets SO_REUSEPORT
@ -119,7 +141,7 @@ impl crate::proxy::SocketFactory for InPodSocketPortReuseFactory {
        self.sf.new_tcp_v6()
    }

-    fn tcp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<tokio::net::TcpListener> {
+    fn tcp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<socket::Listener> {
        let sock = self.sf.configure(|| match addr {
            std::net::SocketAddr::V4(_) => tokio::net::TcpSocket::new_v4(),
            std::net::SocketAddr::V6(_) => tokio::net::TcpSocket::new_v6(),
@ -130,7 +152,7 @@ impl crate::proxy::SocketFactory for InPodSocketPortReuseFactory {
        }

        sock.bind(addr)?;
-        sock.listen(128)
+        sock.listen(128).map(socket::Listener::new)
    }

    fn udp_bind(&self, addr: std::net::SocketAddr) -> std::io::Result<tokio::net::UdpSocket> {
@ -166,6 +188,10 @@ impl crate::proxy::SocketFactory for InPodSocketPortReuseFactory {
        std_sock.set_nonblocking(true)?;
        tokio::net::UdpSocket::from_std(std_sock)
    }
+
+    fn ipv6_enabled_localhost(&self) -> std::io::Result<bool> {
+        self.sf.ipv6_enabled_localhost()
+    }
 }

 #[cfg(test)]
@ -182,7 +208,7 @@ mod test {
            }

            crate::config::Config {
-                inpod_mark: 123,
+                packet_mark: Some(123),
                ..crate::config::parse_config().unwrap()
            }
        }};
@ -210,7 +236,7 @@ mod test {

        let sock_addr: std::net::SocketAddr = "127.0.0.1:8080".parse().unwrap();
        {
-            let s = sf.tcp_bind(sock_addr).unwrap();
+            let s = sf.tcp_bind(sock_addr).unwrap().inner();

            // make sure mark nad port re-use are set
            let sock_ref = socket2::SockRef::from(&s);
@ -257,7 +283,7 @@ mod test {

        let sock_addr: std::net::SocketAddr = "127.0.0.1:8080".parse().unwrap();
        {
-            let s = sf.tcp_bind(sock_addr).unwrap();
+            let s = sf.tcp_bind(sock_addr).unwrap().inner();

            // make sure mark nad port re-use are set
            let sock_ref = socket2::SockRef::from(&s);
--- a/src/inpod/metrics.rs
+++ b/src/inpod/metrics.rs
@ -12,23 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use prometheus_client::encoding::EncodeLabelSet;
 use prometheus_client::metrics::counter::Counter;
-use prometheus_client::metrics::family::Family;
 use prometheus_client::metrics::gauge::Gauge;
 use prometheus_client::registry::Registry;

-#[derive(Clone, Hash, Default, Debug, PartialEq, Eq, EncodeLabelSet)]
-struct ProxyLabels {
-    uid: String,
-}
-
 #[derive(Default)]
 pub struct Metrics {
-    pub(super) active_proxy_count: Family<(), Gauge>,
-    pub(super) pending_proxy_count: Family<(), Gauge>,
-    pub(super) proxies_started: Family<(), Counter>,
-    pub(super) proxies_stopped: Family<(), Counter>,
+    pub(super) active_proxy_count: Gauge,
+    pub(super) pending_proxy_count: Gauge,
+    pub(super) proxies_started: Counter,
+    pub(super) proxies_stopped: Counter,
 }

 impl Metrics {
--- a/src/inpod/netns.rs
+++ b/src/inpod/netns.rs
@ -12,11 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use nix::sched::{setns, CloneFlags};
+use nix::sched::{CloneFlags, setns};
 use std::os::fd::OwnedFd;
 use std::os::unix::io::AsRawFd;
 use std::sync::Arc;

+#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
+pub struct NetnsID {
+    pub inode: libc::ino_t,
+    pub dev: libc::dev_t,
+}
+
 // This is similar to netns_rs, but because we know we always have the same netns to revert to,
 // we can make it more efficient with less chances of errors.

@ -28,7 +34,7 @@ pub struct InpodNetns {
 struct NetnsInner {
    cur_netns: Arc<OwnedFd>,
    netns: OwnedFd,
-    netns_inode: libc::ino_t,
+    netns_id: NetnsID,
 }

 impl InpodNetns {
@ -49,12 +55,13 @@ impl InpodNetns {
    pub fn new(cur_netns: Arc<OwnedFd>, workload_netns: OwnedFd) -> std::io::Result<Self> {
        let res = nix::sys::stat::fstat(workload_netns.as_raw_fd())
            .map_err(|e| std::io::Error::from_raw_os_error(e as i32))?;
-        let netns_inode = res.st_ino;
+        let inode = res.st_ino;
+        let dev = res.st_dev;
        Ok(InpodNetns {
            inner: Arc::new(NetnsInner {
                cur_netns,
                netns: workload_netns,
-                netns_inode,
+                netns_id: NetnsID { inode, dev },
            }),
        })
    }
@ -62,9 +69,10 @@ impl InpodNetns {
        use std::os::fd::AsFd;
        self.inner.netns.as_fd()
    }
+
    // useful for logging / debugging
-    pub fn workload_inode(&self) -> libc::ino_t {
-        self.inner.netns_inode
+    pub fn workload_netns_id(&self) -> NetnsID {
+        self.inner.netns_id
    }

    pub fn run<F, T>(&self, f: F) -> std::io::Result<T>
@ -85,6 +93,16 @@ impl std::os::unix::io::AsRawFd for InpodNetns {
    }
 }

+impl PartialEq for InpodNetns {
+    fn eq(&self, other: &Self) -> bool {
+        // Two netnses can be considered the same if the ino and dev they point to are the same
+        // (see - cilium, vishvananda/netns, others)
+        self.inner.netns_id == other.inner.netns_id
+    }
+}
+
+impl Eq for InpodNetns {}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/src/inpod/packet.rs
+++ b/src/inpod/packet.rs
@ -16,9 +16,10 @@
 // It is not implemented in rust, so this provides an implementation for it.

 use nix::sys::socket::{
-    bind as nixbind, connect as nixconnect, listen, socket, AddressFamily, SockFlag, SockType,
-    UnixAddr,
+    AddressFamily, SockFlag, SockType, UnixAddr, bind as nixbind, connect as nixconnect, listen,
+    socket,
 };
+use std::cmp;
 use std::os::fd::AsRawFd;
 use std::path::Path;
 use tokio::net::{UnixListener, UnixStream};
@ -34,7 +35,9 @@ pub fn bind(path: &Path) -> std::io::Result<UnixListener> {
    let addr = UnixAddr::new(path)?;

    nixbind(socket.as_raw_fd(), &addr)?;
-    listen(&socket, nix::sys::socket::Backlog::new(1024)?)?;
+    // Do not exceed maximum
+    let backlog = cmp::min(1024, libc::SOMAXCONN - 1);
+    listen(&socket, nix::sys::socket::Backlog::new(backlog)?)?;

    let std_socket = std::os::unix::net::UnixListener::from(socket);
    UnixListener::from_std(std_socket)
--- a/src/inpod/protocol.rs
+++ b/src/inpod/protocol.rs
@ -14,8 +14,8 @@

 use super::istio::zds::{self, Ack, Version, WorkloadRequest, WorkloadResponse, ZdsHello};
 use super::{WorkloadData, WorkloadMessage};
-use drain::Watch;
-use nix::sys::socket::{recvmsg, sendmsg, ControlMessageOwned, MsgFlags};
+use crate::drain::DrainWatcher;
+use nix::sys::socket::{ControlMessageOwned, MsgFlags, recvmsg, sendmsg};
 use prost::Message;
 use std::io::{IoSlice, IoSliceMut};
 use std::os::fd::OwnedFd;
@ -28,12 +28,12 @@ use zds::workload_request::Payload;
 #[allow(dead_code)]
 pub struct WorkloadStreamProcessor {
    stream: UnixStream,
-    drain: Watch,
+    drain: DrainWatcher,
 }

 #[allow(dead_code)]
 impl WorkloadStreamProcessor {
-    pub fn new(stream: UnixStream, drain: Watch) -> Self {
+    pub fn new(stream: UnixStream, drain: DrainWatcher) -> Self {
        WorkloadStreamProcessor { stream, drain }
    }

@ -91,7 +91,7 @@ impl WorkloadStreamProcessor {
            let res = loop {
                tokio::select! {
                    biased; // check drain first, so we don't read from the socket if we are draining.
-                    _ =   self.drain.clone().signaled() => {
+                    _ =   self.drain.clone().wait_for_drain() => {
                        info!("workload proxy manager: drain requested");
                        return Ok(None);
                    }
@ -125,7 +125,7 @@ impl WorkloadStreamProcessor {
            };

            // call maybe_get_fd first (and not get_info_from_data), so that if it fails we will close the FDs.
-            let maybe_our_fd = maybe_get_fd(res.cmsgs())?;
+            let maybe_our_fd = maybe_get_fd(res.cmsgs()?)?;
            let flags = res.flags;
            (flags, maybe_our_fd, res.bytes)
        };
@ -288,7 +288,6 @@ mod tests {
            name: "test".to_string(),
            namespace: "default".to_string(),
            service_account: "defaultsvc".to_string(),
-            trust_domain: "cluster.local".to_string(),
        };
        let uid = uid(0);
        let data = prep_request(zds::workload_request::Payload::Add(
--- a/src/inpod/statemanager.rs
+++ b/src/inpod/statemanager.rs
@ -12,24 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use drain::Signal;
+use crate::drain;
+use crate::drain::DrainTrigger;
 use std::sync::Arc;
-use tracing::{debug, info, Instrument};
+use tracing::{Instrument, debug, info};

-use super::{metrics::Metrics, Error, WorkloadMessage};
+use super::{Error, WorkloadMessage, metrics::Metrics};

 use crate::proxyfactory::ProxyFactory;
 use crate::state::WorkloadInfo;

 use super::config::InPodConfig;

-use super::netns::InpodNetns;
 use super::WorkloadUid;
+use super::netns::{InpodNetns, NetnsID};

 // Note: we can't drain on drop, as drain is async (it waits for the drain to finish).
 pub(super) struct WorkloadState {
-    drain: Signal,
-    workload_netns_inode: libc::ino_t,
+    drain: DrainTrigger,
+    netns_id: NetnsID,
 }

 #[derive(Default)]
@ -38,8 +39,13 @@ struct DrainingTasks {
 }

 impl DrainingTasks {
-    fn drain_workload(&mut self, workload_state: WorkloadState) {
-        let handle = tokio::spawn(workload_state.drain.drain());
+    fn shutdown_workload(&mut self, workload_state: WorkloadState) {
+        // Workload is gone, so no need to gracefully clean it up
+        let handle = tokio::spawn(
+            workload_state
+                .drain
+                .start_drain_and_wait(drain::DrainMode::Immediate),
+        );
        // before we push to draining, try to clear done entries, so the vector doesn't grow too much
        self.draining.retain(|x| !x.is_finished());
        // add deleted pod to draining. we do this so we make sure to wait for it incase we
@ -62,7 +68,7 @@ pub struct WorkloadProxyManagerState {
    // workloads we wanted to start but couldn't because we had an error starting them.
    // This happened to use mainly in testing when we redeploy ztunnel, and the old pod was
    // not completely drained yet.
-    pending_workloads: hashbrown::HashMap<WorkloadUid, (Option<WorkloadInfo>, InpodNetns)>,
+    pending_workloads: hashbrown::HashMap<WorkloadUid, (WorkloadInfo, InpodNetns)>,
    draining: DrainingTasks,

    // new connection stuff
@ -109,48 +115,86 @@ impl WorkloadProxyManagerState {
        match msg {
            WorkloadMessage::AddWorkload(poddata) => {
                info!(
-                    "pod {:?} received netns, starting proxy",
-                    poddata.workload_uid
+                    uid = poddata.workload_uid.0,
+                    name = poddata
+                        .workload_info
+                        .as_ref()
+                        .map(|w| w.name.as_str())
+                        .unwrap_or_default(),
+                    namespace = poddata
+                        .workload_info
+                        .as_ref()
+                        .map(|w| w.namespace.as_str())
+                        .unwrap_or_default(),
+                    "pod received, starting proxy",
                );
+                let Some(wli) = poddata.workload_info else {
+                    return Err(Error::ProtocolError(
+                        "workload_info is required but not present".into(),
+                    ));
+                };
                if !self.snapshot_received {
+                    debug!("got workload add before snapshot");
                    self.snapshot_names.insert(poddata.workload_uid.clone());
                }
-                let netns = InpodNetns::new(self.inpod_config.cur_netns(), poddata.netns)
-                    .map_err(|e| Error::ProxyError(crate::proxy::Error::Io(e)))?;
-                let info = poddata.workload_info.map(|w| WorkloadInfo {
-                    name: w.name,
-                    namespace: w.namespace,
-                    service_account: w.service_account,
-                    trust_domain: w.trust_domain,
-                });
+                let netns =
+                    InpodNetns::new(self.inpod_config.cur_netns(), poddata.netns).map_err(|e| {
+                        Error::ProxyError(
+                            poddata.workload_uid.0.clone(),
+                            crate::proxy::Error::Io(e),
+                        )
+                    })?;
+                let info = WorkloadInfo {
+                    name: wli.name,
+                    namespace: wli.namespace,
+                    service_account: wli.service_account,
+                };
                self.add_workload(&poddata.workload_uid, info, netns)
                    .await
-                    .map_err(Error::ProxyError)
+                    .map_err(|e| Error::ProxyError(poddata.workload_uid.0, e))
            }
            WorkloadMessage::KeepWorkload(workload_uid) => {
-                debug!("pod keep received. will not delete it when snapshot is sent");
+                info!(
+                    uid = workload_uid.0,
+                    "pod keep received. will not delete it when snapshot is sent"
+                );
                if self.snapshot_received {
                    // this can only happen before snapshot is received.
-                    return Err(Error::ProtocolError);
+                    return Err(Error::ProtocolError(
+                        "pod keep received after snapshot".into(),
+                    ));
                }
                self.snapshot_names.insert(workload_uid);
                Ok(())
            }
            WorkloadMessage::DelWorkload(workload_uid) => {
-                info!("pod delete request, draining proxy");
+                info!(
+                    uid = workload_uid.0,
+                    "pod delete request, shutting down proxy"
+                );
                if !self.snapshot_received {
-                    // TODO: consider if this is an error. if not, do this instead:
-                    // self.snapshot_names.remove(&workload_uid)
-                    // self.pending_workloads.remove(&workload_uid)
-                    return Err(Error::ProtocolError);
+                    debug!("got workload delete before snapshot");
+                    // Since we insert here on AddWorkload before we get a snapshot,
+                    // make sure we also opportunistically remove here before we
+                    // get a snapshot
+                    //
+                    // Note that even though AddWorkload starts the workload, we do *not* need
+                    // to stop it here, as it should be auto-dropped subsequently during snapshot
+                    // reconcile(), when we actually get the `SnapshotSent` notification.
+                    self.snapshot_names.remove(&workload_uid);
+                    // `reconcile()` will drop this workload later, but if the workload never successfully
+                    // starts it will stay in the pending queue (which `reconcile()` can't remove it from),
+                    // so clear the pending queue here.
+                    self.pending_workloads.remove(&workload_uid);
+                    return Ok(());
                }
                self.del_workload(&workload_uid);
                Ok(())
            }
            WorkloadMessage::WorkloadSnapshotSent => {
-                info!("pod received snapshot sent");
+                info!("received snapshot sent");
                if self.snapshot_received {
-                    return Err(Error::ProtocolError);
+                    return Err(Error::ProtocolError("pod snapshot received already".into()));
                }
                self.reconcile();
                // mark ready
@ -167,17 +211,17 @@ impl WorkloadProxyManagerState {
            .workload_states
            .extract_if(|uid, _| !self.snapshot_names.contains(uid))
        {
-            self.draining.drain_workload(workload_state);
+            self.draining.shutdown_workload(workload_state);
        }
        self.snapshot_names.clear();
        self.update_proxy_count_metrics();
    }

    pub async fn drain(self) {
-        let drain_futures = self
-            .workload_states
-            .into_iter()
-            .map(|(_, v)| v.drain.drain() /* do not .await here!!! */);
+        let drain_futures =
+            self.workload_states.into_iter().map(|(_, v)| {
+                v.drain.start_drain_and_wait(drain::DrainMode::Graceful)
+            } /* do not .await here!!! */);
        // join these first, as we need to drive these to completion
        futures::future::join_all(drain_futures).await;
        // these are join handles that are driven by tokio, we just need to wait for them, so join these
@ -188,7 +232,7 @@ impl WorkloadProxyManagerState {
    async fn add_workload(
        &mut self,
        workload_uid: &WorkloadUid,
-        workload_info: Option<WorkloadInfo>,
+        workload_info: WorkloadInfo,
        netns: InpodNetns,
    ) -> Result<(), crate::proxy::Error> {
        match self
@ -196,6 +240,8 @@ impl WorkloadProxyManagerState {
            .await
        {
            Ok(()) => {
+                // If the workload is already pending, make sure we drop it, so we don't retry.
+                self.pending_workloads.remove(workload_uid);
                self.update_proxy_count_metrics();
                Ok(())
            }
@ -210,13 +256,13 @@ impl WorkloadProxyManagerState {
    async fn add_workload_inner(
        &mut self,
        workload_uid: &WorkloadUid,
-        workload_info: &Option<WorkloadInfo>,
+        workload_info: &WorkloadInfo,
        netns: InpodNetns,
    ) -> Result<(), crate::proxy::Error> {
        // check if we have a proxy already
        let maybe_existing = self.workload_states.get(workload_uid);
        if let Some(existing) = maybe_existing {
-            if existing.workload_netns_inode != netns.workload_inode() {
+            if existing.netns_id != netns.workload_netns_id() {
                // inodes are different, we have a new netns.
                // this can happen when there's a CNI failure (that's unrelated to us) which triggers
                // pod sandobx to be re-created with a fresh new netns.
@ -232,17 +278,18 @@ impl WorkloadProxyManagerState {
        self.admin_handler
            .proxy_pending(workload_uid, workload_info);

+        let workload_netns_id = netns.workload_netns_id();
+
        debug!(
            workload=?workload_uid,
            workload_info=?workload_info,
-            inode=?netns.workload_inode(),
+            netns_id=?workload_netns_id,
            "starting proxy",
        );

        // We create a per workload drain here. If the main loop in WorkloadProxyManager::run drains,
        // we drain all these per-workload drains before exiting the loop
-        let workload_netns_inode = netns.workload_inode();
-        let (drain_tx, drain_rx) = drain::channel();
+        let (drain_tx, drain_rx) = drain::new();

        let proxies = self
            .proxy_gen
@ -260,29 +307,28 @@ impl WorkloadProxyManagerState {

        let metrics = self.metrics.clone();
        let admin_handler = self.admin_handler.clone();
-        metrics.proxies_started.get_or_create(&()).inc();
+
+        metrics.proxies_started.inc();
        if let Some(proxy) = proxies.proxy {
            tokio::spawn(
                async move {
                    proxy.run().await;
                    debug!("proxy for workload {:?} exited", uid);
-                    metrics.proxies_stopped.get_or_create(&()).inc();
+                    metrics.proxies_stopped.inc();
                    admin_handler.proxy_down(&uid);
                }
-                .instrument(tracing::info_span!("proxy", uid=%workload_uid.clone().into_string())),
+                .instrument(tracing::info_span!("proxy", wl=%format!("{}/{}", workload_info.namespace, workload_info.name))),
            );
        }
        if let Some(proxy) = proxies.dns_proxy {
-            tokio::spawn(proxy.run().instrument(
-                tracing::info_span!("dns_proxy", uid=%workload_uid.clone().into_string()),
-            ));
+            tokio::spawn(proxy.run().instrument(tracing::info_span!("dns_proxy", wl=%format!("{}/{}", workload_info.namespace, workload_info.name))));
        }

        self.workload_states.insert(
            workload_uid.clone(),
            WorkloadState {
                drain: drain_tx,
-                workload_netns_inode,
+                netns_id: workload_netns_id,
            },
        );

@ -293,6 +339,10 @@ impl WorkloadProxyManagerState {
        !self.pending_workloads.is_empty()
    }

+    pub fn pending_uids(&self) -> Vec<String> {
+        self.pending_workloads.keys().map(|k| k.0.clone()).collect()
+    }
+
    pub fn ready(&self) -> bool {
        // We are ready after we received our first snapshot and don't have any proxies that failed to start.
        self.snapshot_received && !self.have_pending()
@ -302,11 +352,11 @@ impl WorkloadProxyManagerState {
        let current_pending_workloads = std::mem::take(&mut self.pending_workloads);

        for (uid, (info, netns)) in current_pending_workloads {
-            info!("retrying workload {:?}", uid);
+            info!(uid = uid.0, "retrying workload");
            match self.add_workload(&uid, info, netns).await {
                Ok(()) => {}
                Err(e) => {
-                    info!("retrying workload {:?} failed: {}", uid, e);
+                    info!(uid = uid.0, "retrying workload failed: {}", e);
                }
            }
        }
@ -322,17 +372,15 @@ impl WorkloadProxyManagerState {

        self.update_proxy_count_metrics();

-        self.draining.drain_workload(workload_state);
+        self.draining.shutdown_workload(workload_state);
    }

    fn update_proxy_count_metrics(&self) {
        self.metrics
            .active_proxy_count
-            .get_or_create(&())
            .set(self.workload_states.len().try_into().unwrap_or(-1));
        self.metrics
            .pending_proxy_count
-            .get_or_create(&())
            .set(self.pending_workloads.len().try_into().unwrap_or(-1));
    }
 }
@ -340,15 +388,27 @@ impl WorkloadProxyManagerState {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::inpod::test_helpers::{self, create_proxy_confilct, new_netns, uid};
    use crate::inpod::WorkloadData;
+    use crate::inpod::test_helpers::{self, create_proxy_conflict, new_netns, uid};
+
+    use crate::inpod::istio::zds;
+    use matches::assert_matches;
    use std::sync::Arc;
+    use std::time::Duration;

    struct Fixture {
        state: WorkloadProxyManagerState,
        metrics: Arc<crate::inpod::Metrics>,
    }

+    fn workload_info() -> Option<zds::WorkloadInfo> {
+        Some(zds::WorkloadInfo {
+            name: "name".to_string(),
+            namespace: "ns".to_string(),
+            service_account: "sa".to_string(),
+        })
+    }
+
    macro_rules! fixture {
        () => {{
            if !crate::test_helpers::can_run_privilged_test() {
@ -376,7 +436,7 @@ mod tests {
        let data = WorkloadData {
            netns: new_netns(),
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };
        state
            .process_msg(WorkloadMessage::AddWorkload(data))
@ -393,7 +453,7 @@ mod tests {
        let data = WorkloadData {
            netns: ns.try_clone().unwrap(),
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };
        state
            .process_msg(WorkloadMessage::AddWorkload(data))
@ -402,7 +462,7 @@ mod tests {
        let data = WorkloadData {
            netns: ns,
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };
        state
            .process_msg(WorkloadMessage::AddWorkload(data))
@ -418,12 +478,12 @@ mod tests {
        let mut state = fixture.state;
        let ns = new_netns();
        // to make the proxy fail, bind to its ports in its netns
-        let sock = create_proxy_confilct(&ns);
+        let sock = create_proxy_conflict(&ns);

        let data = WorkloadData {
            netns: ns,
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };

        let ret = state.process_msg(WorkloadMessage::AddWorkload(data)).await;
@ -431,26 +491,70 @@ mod tests {
        assert!(state.have_pending());

        std::mem::drop(sock);
+        // Unfortunate but necessary. When we close a socket in listener, the port is not synchronously freed.
+        // This can lead to our retry failing due to a conflict. There doesn't seem to be a great way to reliably detect this.
+        // Sleeping 10ms, however, is quite small and seems very reliable.
+        tokio::time::sleep(Duration::from_millis(10)).await;

        state.retry_pending().await;
        assert!(!state.have_pending());
        state.drain().await;
-        assert_eq!(m.proxies_started.get_or_create(&()).get(), 1);
+        assert_eq!(m.proxies_started.get(), 1);
    }

    #[tokio::test]
-    async fn idemepotency_add_workload_fails_and_then_deleted() {
+    async fn workload_added_while_pending() {
+        // Regression test for https://github.com/istio/istio/issues/52858
+        // Workload is added and fails, so put on the pending queue. Then it is added and succeeds.
+        // The bug is that when we retry with the failed netns, we (1) never succeed and (2) drop the running proxy.
+        let fixture = fixture!();
+        let m = fixture.metrics.clone();
+        let mut state = fixture.state;
+        let ns1 = new_netns();
+        let ns2 = new_netns();
+        // to make the proxy fail, bind to its ports in its netns
+        let _sock = create_proxy_conflict(&ns1);
+
+        // Add the pod in netns1
+        let ret = state
+            .process_msg(WorkloadMessage::AddWorkload(WorkloadData {
+                netns: ns1,
+                workload_uid: uid(0),
+                workload_info: workload_info(),
+            }))
+            .await;
+        assert!(ret.is_err());
+        assert!(state.have_pending());
+
+        // Add it again with another netns. The original pod should still be present in the retry queue with ns1
+        state
+            .process_msg(WorkloadMessage::AddWorkload(WorkloadData {
+                netns: ns2,
+                workload_uid: uid(0),
+                workload_info: workload_info(),
+            }))
+            .await
+            .expect("should start");
+
+        state.retry_pending().await;
+        assert!(!state.have_pending());
+        state.drain().await;
+        assert_eq!(m.proxies_started.get(), 1);
+    }
+
+    #[tokio::test]
+    async fn idempotency_add_workload_fails_and_then_deleted() {
        let fixture = fixture!();
        let mut state = fixture.state;

        let ns = new_netns();
        // to make the proxy fail, bind to its ports in its netns
-        let _sock = create_proxy_confilct(&ns);
+        let _sock = create_proxy_conflict(&ns);

        let data = WorkloadData {
            netns: ns,
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };
        state
            .process_msg(WorkloadMessage::WorkloadSnapshotSent)
@ -470,6 +574,45 @@ mod tests {
        state.drain().await;
    }

+    #[tokio::test]
+    async fn del_workload_before_snapshot_removes_from_snapshot_and_pending() {
+        let fixture = fixture!();
+        let mut state = fixture.state;
+
+        let ns = new_netns();
+
+        // to make the proxy fail, bind to its ports in its netns
+        let _sock = create_proxy_conflict(&ns);
+
+        let data = WorkloadData {
+            netns: ns,
+            workload_uid: uid(0),
+            workload_info: workload_info(),
+        };
+
+        let ret = state.process_msg(WorkloadMessage::AddWorkload(data)).await;
+
+        assert!(state.snapshot_names.len() == 1);
+        assert!(ret.is_err());
+        assert!(state.have_pending());
+
+        state
+            .process_msg(WorkloadMessage::DelWorkload(uid(0)))
+            .await
+            .unwrap();
+
+        assert!(state.snapshot_names.is_empty());
+
+        state
+            .process_msg(WorkloadMessage::WorkloadSnapshotSent)
+            .await
+            .unwrap();
+
+        assert!(state.snapshot_names.is_empty());
+        assert!(!state.have_pending());
+        state.drain().await;
+    }
+
    #[tokio::test]
    async fn add_delete_add_workload_starts_only_one_proxy() {
        let fixture = fixture!();
@ -479,7 +622,7 @@ mod tests {
        let data = WorkloadData {
            netns: ns.try_clone().unwrap(),
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };

        let workload_uid = data.workload_uid.clone();
@ -489,7 +632,7 @@ mod tests {
        let msg3 = WorkloadMessage::AddWorkload(WorkloadData {
            netns: ns,
            workload_uid,
-            workload_info: None,
+            workload_info: workload_info(),
        });

        state
@ -513,7 +656,7 @@ mod tests {
        let data = WorkloadData {
            netns: new_netns(),
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };

        let workload_uid = data.workload_uid.clone();
@ -533,7 +676,7 @@ mod tests {
            .await
            .unwrap();

-        assert_eq!(m.proxies_started.get_or_create(&()).get(), 1);
+        assert_eq!(m.proxies_started.get(), 1);

        state.drain().await;
    }
@ -547,7 +690,7 @@ mod tests {
        let data = WorkloadData {
            netns: new_netns(),
            workload_uid: uid(0),
-            workload_info: None,
+            workload_info: workload_info(),
        };
        let workload_uid = data.workload_uid.clone();

@ -555,14 +698,30 @@ mod tests {
        let add2 = WorkloadMessage::AddWorkload(WorkloadData {
            netns: new_netns(),
            workload_uid,
-            workload_info: None,
+            workload_info: workload_info(),
        });

        state.process_msg(add1).await.unwrap();
        state.process_msg(add2).await.unwrap();
        state.drain().await;

-        assert_eq!(m.proxies_started.get_or_create(&()).get(), 2);
-        assert_eq!(m.active_proxy_count.get_or_create(&()).get(), 1);
+        assert_eq!(m.proxies_started.get(), 2);
+        assert_eq!(m.active_proxy_count.get(), 1);
+    }
+
+    #[tokio::test]
+    async fn no_workload_info_rejected() {
+        let fixture = fixture!();
+        let mut state = fixture.state;
+
+        let data = WorkloadData {
+            netns: new_netns(),
+            workload_uid: uid(0),
+            workload_info: None,
+        };
+
+        let add = WorkloadMessage::AddWorkload(data);
+
+        assert_matches!(state.process_msg(add).await, Err(_));
    }
 }
--- a/src/inpod/test_helpers.rs
+++ b/src/inpod/test_helpers.rs
@ -17,7 +17,7 @@ use super::netns::InpodNetns;

 use crate::proxyfactory::ProxyFactory;
 use crate::state::{DemandProxyState, ProxyState};
-use nix::sched::{unshare, CloneFlags};
+use nix::sched::{CloneFlags, unshare};
 use prometheus_client::registry::Registry;

 use std::sync::{Arc, RwLock};
@ -28,8 +28,11 @@ use hickory_resolver::config::{ResolverConfig, ResolverOpts};
 use prost::Message;
 use tokio::io::{AsyncReadExt, AsyncWriteExt};

-use super::istio::zds::{WorkloadRequest, WorkloadResponse, ZdsHello};
+use super::istio::zds::{WorkloadInfo, WorkloadRequest, WorkloadResponse, ZdsHello};

+use crate::drain::{DrainTrigger, DrainWatcher};
+use crate::{dns, drain};
+use once_cell::sync::Lazy;
 use std::os::fd::{AsRawFd, OwnedFd};
 use tracing::debug;

@ -41,46 +44,52 @@ pub struct Fixture {
    pub proxy_factory: ProxyFactory,
    pub ipc: InPodConfig,
    pub inpod_metrics: Arc<crate::inpod::Metrics>,
-    pub drain_tx: drain::Signal,
-    pub drain_rx: drain::Watch,
+    pub drain_tx: DrainTrigger,
+    pub drain_rx: DrainWatcher,
 }
+// Ensure that the `tracing` stack is only initialised once using `once_cell`
+static UNSHARE: Lazy<()> = Lazy::new(|| {
+    unshare(CloneFlags::CLONE_NEWNET).unwrap();
+    let lo_set = std::process::Command::new("ip")
+        .args(["link", "set", "lo", "up"])
+        .status()
+        .unwrap()
+        .success();
+    assert!(lo_set);
+});

 impl Default for Fixture {
    fn default() -> Fixture {
        crate::test_helpers::helpers::initialize_telemetry();
-        unshare(CloneFlags::CLONE_NEWNET).unwrap();
-        let lo_set = std::process::Command::new("ip")
-            .args(["link", "set", "lo", "up"])
-            .status()
-            .unwrap()
-            .success();
-        assert!(lo_set);
+        Lazy::force(&UNSHARE);
        let mut registry = Registry::default();

        let cfg = crate::config::Config {
-            inpod_mark: 1,
+            packet_mark: Some(1),
            ..crate::config::construct_config(Default::default()).unwrap()
        };
-        let state = Arc::new(RwLock::new(ProxyState::default()));
+        let state = Arc::new(RwLock::new(ProxyState::new(None)));
        let cert_manager: Arc<crate::identity::SecretManager> =
            crate::identity::mock::new_secret_manager(std::time::Duration::from_secs(10));
-        let metrics = crate::proxy::Metrics::new(&mut registry);
-        let (drain_tx, drain_rx) = drain::channel();
+        let metrics = Arc::new(crate::proxy::Metrics::new(&mut registry));
+        let (drain_tx, drain_rx) = drain::new();
+        let dns_metrics = Some(dns::Metrics::new(&mut registry));

        let dstate = DemandProxyState::new(
            state.clone(),
            None,
            ResolverConfig::default(),
            ResolverOpts::default(),
+            metrics.clone(),
        );

        let ipc = InPodConfig::new(&cfg).unwrap();
        let proxy_gen = ProxyFactory::new(
-            cfg,
+            Arc::new(cfg),
            dstate,
            cert_manager,
-            Some(metrics),
-            None,
+            metrics,
+            dns_metrics,
            drain_rx.clone(),
        )
        .unwrap();
@ -155,6 +164,7 @@ pub async fn send_snap_sent(s: &mut UnixStream) {
 pub async fn send_workload_added(
    s: &mut UnixStream,
    uid: super::WorkloadUid,
+    info: Option<WorkloadInfo>,
    fd: impl std::os::fd::AsRawFd,
 ) {
    let fds = [fd.as_raw_fd()];
@ -165,7 +175,7 @@ pub async fn send_workload_added(
        payload: Some(crate::inpod::istio::zds::workload_request::Payload::Add(
            crate::inpod::istio::zds::AddWorkload {
                uid: uid.into_string(),
-                ..Default::default()
+                workload_info: info,
            },
        )),
    };
@ -216,7 +226,7 @@ pub async fn send_workload_del(s: &mut UnixStream, uid: super::WorkloadUid) {
    .expect("failed to sendmsg");
 }

-pub fn create_proxy_confilct(ns: &std::os::fd::OwnedFd) -> std::os::fd::OwnedFd {
+pub fn create_proxy_conflict(ns: &std::os::fd::OwnedFd) -> std::os::fd::OwnedFd {
    let inpodns = InpodNetns::new(
        Arc::new(crate::inpod::netns::InpodNetns::current().unwrap()),
        ns.try_clone().unwrap(),
--- a/src/inpod/workloadmanager.rs
+++ b/src/inpod/workloadmanager.rs
@ -12,20 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use crate::drain::DrainWatcher;
 use crate::readiness;
-use drain::Watch;
+use backoff::{ExponentialBackoff, backoff::Backoff};
 use std::path::PathBuf;
 use std::time::Duration;
 use tokio::net::UnixStream;
 use tracing::{debug, error, info, warn};

-use super::statemanager::WorkloadProxyManagerState;
 use super::Error;
+use super::statemanager::WorkloadProxyManagerState;

 use super::protocol::WorkloadStreamProcessor;

 const RETRY_DURATION: Duration = Duration::from_secs(5);

+const CONNECTION_FAILURE_RETRY_DELAY_MAX_INTERVAL: Duration = Duration::from_secs(15);
+
 struct WorkloadProxyNetworkHandler {
    uds: PathBuf,
 }
@ -35,6 +38,7 @@ struct WorkloadProxyReadinessHandler {
    // Manually drop as we don't want to mark ready if we are dropped.
    // This can happen when the server drains.
    block_ready: Option<std::mem::ManuallyDrop<readiness::BlockReady>>,
+    backoff: ExponentialBackoff,
 }

 pub struct WorkloadProxyManager {
@ -52,10 +56,19 @@ struct WorkloadProxyManagerProcessor<'a> {
 }

 impl WorkloadProxyReadinessHandler {
-    fn new(ready: readiness::Ready) -> Self {
+    fn new(ready: readiness::Ready, reconnect_backoff: Option<ExponentialBackoff>) -> Self {
+        let backoff = reconnect_backoff.unwrap_or(ExponentialBackoff {
+            initial_interval: Duration::from_millis(5),
+            max_interval: CONNECTION_FAILURE_RETRY_DELAY_MAX_INTERVAL,
+            multiplier: 2.0,
+            randomization_factor: 0.2,
+            ..Default::default()
+        });
+
        let mut r = Self {
            ready,
            block_ready: None,
+            backoff,
        };
        r.not_ready();
        r
@ -71,9 +84,12 @@ impl WorkloadProxyReadinessHandler {

            std::mem::drop(block_ready);
        }
+
+        self.backoff.reset()
    }

    fn not_ready(&mut self) {
+        debug!("workload proxy manager is NOT ready");
        if self.block_ready.is_none() {
            self.block_ready = Some(std::mem::ManuallyDrop::new(
                self.ready.register_task("workload proxy manager"),
@ -88,7 +104,6 @@ impl WorkloadProxyNetworkHandler {
    }

    async fn connect(&self) -> UnixStream {
-        const MAX_BACKOFF: Duration = Duration::from_secs(15);
        let mut backoff = Duration::from_millis(10);

        debug!("connecting to server: {:?}", self.uds);
@ -96,10 +111,11 @@ impl WorkloadProxyNetworkHandler {
        loop {
            match super::packet::connect(&self.uds).await {
                Err(e) => {
-                    backoff = std::cmp::min(MAX_BACKOFF, backoff * 2);
+                    backoff =
+                        std::cmp::min(CONNECTION_FAILURE_RETRY_DELAY_MAX_INTERVAL, backoff * 2);
                    warn!(
-                        "failed to connect to server: {:?}. retrying in {:?}",
-                        e, backoff
+                        "failed to connect to the Istio CNI node agent over {:?}, is the node agent healthy? details: {:?}. retrying in {:?}",
+                        &self.uds, e, backoff
                    );
                    tokio::time::sleep(backoff).await;
                    continue;
@ -137,30 +153,37 @@ impl WorkloadProxyManager {
        let mgr = WorkloadProxyManager {
            state,
            networking,
-            readiness: WorkloadProxyReadinessHandler::new(ready),
+            readiness: WorkloadProxyReadinessHandler::new(ready, None),
        };
        Ok(mgr)
    }

-    pub async fn run(mut self, drain: Watch) -> Result<(), anyhow::Error> {
-        self.run_internal(drain).await;
+    pub async fn run(mut self, drain: DrainWatcher) -> Result<(), anyhow::Error> {
+        self.run_internal(drain).await?;

-        // we broke the loop, this can only happen when drain was signaled. drain our proxies.
+        // We broke the loop, this can only happen when drain was signaled
+        // or we got a terminal protocol error. Drain our proxies.
        debug!("workload proxy manager waiting for proxies to drain");
        self.state.drain().await;
        debug!("workload proxy manager proxies drained");
        Ok(())
    }

-    async fn run_internal(&mut self, drain: Watch) {
+    // This func will run and attempt to (re)connect to the node agent over uds, until
+    // - a drain is signaled
+    // - we have a ProtocolError (we have a serious version mismatch)
+    // We should never _have_ a protocol error as the gRPC proto should be forwards+backwards compatible,
+    // so this is mostly a safeguard
+    async fn run_internal(&mut self, drain: DrainWatcher) -> Result<(), anyhow::Error> {
        // for now just drop block_ready, until we support knowing that our state is in sync.
        debug!("workload proxy manager is running");
        // hold the  release shutdown until we are done with `state.drain` below.
+
        let _rs = loop {
            // Accept a connection
            let stream = tokio::select! {
                biased; // check the drain first
-                rs = drain.clone().signaled() => {
+                rs = drain.clone().wait_for_drain() => {
                    info!("drain requested");
                    break rs;
                }
@ -178,13 +201,44 @@ impl WorkloadProxyManager {
                Ok(()) => {
                    info!("process stream ended with eof");
                }
+                // If we successfully accepted a connection, but the first thing we try (announce)
+                // fails, it can mean 2 things:
+                // 1. The connection was killed because the node agent happened to restart at a bad time
+                // 2. The connection was killed because we have a protocol mismatch with the node agent.
+                //
+                // For case 1, we must keep retrying. For case 2 we shouldn't retry, as we will spam
+                // an incompatible server with messages and connections it can't understand.
+                //
+                // We also cannot easily tell these cases apart due to the simplistic protocol in use here,
+                // so a happy medium is to backoff if we get announce errors - they could be legit or
+                // non-legit disconnections, we can't tell.
+                Err(Error::AnnounceError(e)) => {
+                    self.readiness.not_ready();
+                    // This will retry infinitely for as long as the socket doesn't EOF, but not immediately.
+                    let wait = self
+                        .readiness
+                        .backoff
+                        .next_backoff()
+                        .unwrap_or(CONNECTION_FAILURE_RETRY_DELAY_MAX_INTERVAL);
+                    error!("node agent announcement failed ({e}), retrying in {wait:?}");
+                    tokio::time::sleep(wait).await;
+                    continue;
+                }
+                Err(Error::ProtocolError(e)) => {
+                    error!("protocol mismatch error while processing stream, shutting down");
+                    self.readiness.not_ready();
+                    return Err(anyhow::anyhow!("protocol error {:?}", e));
+                }
                Err(e) => {
+                    // for other errors, just retry
                    warn!("process stream ended: {:?}", e);
                }
            };
-            debug!("workload proxy manager is NOT ready");
+
            self.readiness.not_ready();
        };
+
+        Ok(())
    }
 }

@ -249,7 +303,7 @@ impl<'a> WorkloadProxyManagerProcessor<'a> {
        processor
            .send_hello()
            .await
-            .map_err(|_| Error::ProtocolError)?;
+            .map_err(|_| Error::AnnounceError("could not announce to node agent".into()))?;

        loop {
            let msg = match self.read_message_and_retry_proxies(&mut processor).await {
@ -281,12 +335,12 @@ impl<'a> WorkloadProxyManagerProcessor<'a> {
                        .await
                        .map_err(|e| Error::SendAckError(e.to_string()))?;
                }
-                Err(Error::ProxyError(e)) => {
+                Err(Error::ProxyError(uid, e)) => {
+                    error!(%uid, "failed to start proxy: {:?}", e);
                    // setup the retry timer:
                    self.schedule_retry();
                    // proxy error is a transient error, so report it but don't disconnect
                    // TODO: raise metrics
-                    error!("failed to start proxy: {:?}", e);
                    processor
                        .send_nack(anyhow::anyhow!("failure to start proxy : {:?}", e))
                        .await
@ -309,10 +363,11 @@ impl<'a> WorkloadProxyManagerProcessor<'a> {

    fn schedule_retry(&mut self) {
        if self.next_pending_retry.is_none() {
-            info!("scheduling retry");
+            info!(uids=?self.state.pending_uids(), "scheduling retry");
            self.next_pending_retry = Some(Box::pin(tokio::time::sleep(RETRY_DURATION)));
        }
    }
+
    fn check_ready(&mut self) {
        if self.state.ready() {
            self.readiness.mark_ready();
@ -327,13 +382,17 @@ pub(crate) mod tests {

    use super::super::protocol::WorkloadStreamProcessor;

+    use tokio::io::AsyncWriteExt;
+
    use super::*;

    use crate::inpod::test_helpers::{
-        self, create_proxy_confilct, new_netns, read_hello, read_msg, send_snap_sent,
+        self, create_proxy_conflict, new_netns, read_hello, read_msg, send_snap_sent,
        send_workload_added, send_workload_del, uid,
    };

+    use crate::drain::DrainTrigger;
+    use crate::inpod::istio::zds;
    use std::{collections::HashSet, sync::Arc};

    fn assert_end_stream(res: Result<(), Error>) {
@ -346,11 +405,26 @@ pub(crate) mod tests {
        }
    }

+    fn assert_announce_error(res: Result<(), Error>) {
+        match res {
+            Err(Error::AnnounceError(_)) => {}
+            _ => panic!("expected announce error"),
+        }
+    }
+
+    fn workload_info() -> Option<zds::WorkloadInfo> {
+        Some(zds::WorkloadInfo {
+            name: "name".to_string(),
+            namespace: "ns".to_string(),
+            service_account: "sa".to_string(),
+        })
+    }
+
    struct Fixture {
        state: WorkloadProxyManagerState,
        inpod_metrics: Arc<crate::inpod::Metrics>,
-        drain_rx: drain::Watch,
-        _drain_tx: drain::Signal,
+        drain_rx: DrainWatcher,
+        _drain_tx: DrainTrigger,
    }

    macro_rules! fixture {
@ -384,11 +458,11 @@ pub(crate) mod tests {

        let server = tokio::spawn(async move {
            read_hello(&mut s2).await;
-            send_workload_added(&mut s2, uid(0), new_netns()).await;
+            send_workload_added(&mut s2, uid(0), workload_info(), new_netns()).await;
            read_msg(&mut s2).await;
        });

-        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new());
+        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new(), None);
        let mut processor_helper = WorkloadProxyManagerProcessor::new(&mut state, &mut readiness);

        let res = processor_helper.process(processor).await;
@ -399,6 +473,29 @@ pub(crate) mod tests {
        server.await.unwrap();
    }

+    #[tokio::test]
+    async fn test_process_failed_announce() {
+        let f = fixture!();
+        let (s1, mut s2) = UnixStream::pair().unwrap();
+        let processor = WorkloadStreamProcessor::new(s1, f.drain_rx.clone());
+        let mut state = f.state;
+
+        // fake server that simply slams the socket shut and bails
+        let server = tokio::spawn(async move {
+            let _ = s2.shutdown().await;
+        });
+
+        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new(), None);
+        let mut processor_helper = WorkloadProxyManagerProcessor::new(&mut state, &mut readiness);
+
+        let res = processor_helper.process(processor).await;
+        // make sure that the error is due to announce fail:
+        assert_announce_error(res);
+        assert!(!readiness.ready.pending().is_empty());
+        state.drain().await;
+        server.await.unwrap();
+    }
+
    #[tokio::test]
    async fn test_process_failed() {
        let f = fixture!();
@ -408,17 +505,17 @@ pub(crate) mod tests {
        let mut state = f.state;

        let podns = new_netns();
-        let socket = create_proxy_confilct(&podns);
+        let socket = create_proxy_conflict(&podns);

        let server = tokio::spawn(async move {
            read_hello(&mut s2).await;
-            send_workload_added(&mut s2, uid(0), podns).await;
+            send_workload_added(&mut s2, uid(0), workload_info(), podns).await;
            read_msg(&mut s2).await;
            send_snap_sent(&mut s2).await;
            read_msg(&mut s2).await;
        });

-        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new());
+        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new(), None);
        let mut processor_helper = WorkloadProxyManagerProcessor::new(&mut state, &mut readiness);

        let res = processor_helper.process(processor).await;
@ -451,7 +548,7 @@ pub(crate) mod tests {
        let podns = new_netns();
        let server = tokio::spawn(async move {
            read_hello(&mut s2).await;
-            send_workload_added(&mut s2, uid(0), podns).await;
+            send_workload_added(&mut s2, uid(0), workload_info(), podns).await;
            read_msg(&mut s2).await;
            send_snap_sent(&mut s2).await;
            read_msg(&mut s2).await;
@ -459,7 +556,7 @@ pub(crate) mod tests {
            read_msg(&mut s2).await;
        });

-        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new());
+        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new(), None);
        let mut processor_helper = WorkloadProxyManagerProcessor::new(&mut state, &mut readiness);

        let res = processor_helper.process(processor).await;
@ -468,7 +565,7 @@ pub(crate) mod tests {
        assert_end_stream(res);

        assert_eq!(state.workload_states().len(), 0);
-        assert_eq!(m.active_proxy_count.get_or_create(&()).get(), 0);
+        assert_eq!(m.active_proxy_count.get(), 0);
        assert!(readiness.ready.pending().is_empty());

        state.drain().await;
@ -484,15 +581,15 @@ pub(crate) mod tests {

        let server = tokio::spawn(async move {
            read_hello(&mut s2).await;
-            send_workload_added(&mut s2, uid(0), new_netns()).await;
+            send_workload_added(&mut s2, uid(0), workload_info(), new_netns()).await;
            read_msg(&mut s2).await;
-            send_workload_added(&mut s2, uid(1), new_netns()).await;
+            send_workload_added(&mut s2, uid(1), workload_info(), new_netns()).await;
            read_msg(&mut s2).await;
            send_snap_sent(&mut s2).await;
            read_msg(&mut s2).await;
        });

-        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new());
+        let mut readiness = WorkloadProxyReadinessHandler::new(readiness::Ready::new(), None);

        let mut processor_helper = WorkloadProxyManagerProcessor::new(&mut state, &mut readiness);
        let res = processor_helper.process(processor).await;
@ -505,13 +602,10 @@ pub(crate) mod tests {
        assert_eq!(state.workload_states().len(), 2);
        let key_set: HashSet<crate::inpod::WorkloadUid> =
            state.workload_states().keys().cloned().collect();
-        let expected_key_set: HashSet<crate::inpod::WorkloadUid> = [0, 1]
-            .into_iter()
-            .map(uid)
-            .map(crate::inpod::WorkloadUid::from)
-            .collect();
+        let expected_key_set: HashSet<crate::inpod::WorkloadUid> =
+            [0, 1].into_iter().map(uid).collect();
        assert_eq!(key_set, expected_key_set);
-        assert_eq!(m.active_proxy_count.get_or_create(&()).get(), 2);
+        assert_eq!(m.active_proxy_count.get(), 2);

        // second connection - don't send the one of the proxies here, to see ztunnel reconciles and removes it:
        let (s1, mut s2) = UnixStream::pair().unwrap();
@ -519,7 +613,7 @@ pub(crate) mod tests {

        let server = tokio::spawn(async move {
            read_hello(&mut s2).await;
-            send_workload_added(&mut s2, uid(1), new_netns()).await;
+            send_workload_added(&mut s2, uid(1), workload_info(), new_netns()).await;
            read_msg(&mut s2).await;
            send_snap_sent(&mut s2).await;
            read_msg(&mut s2).await;
@ -534,7 +628,7 @@ pub(crate) mod tests {
        // only second workload should remain
        assert_eq!(state.workload_states().len(), 1);
        assert_eq!(state.workload_states().keys().next(), Some(&uid(1)));
-        assert_eq!(m.active_proxy_count.get_or_create(&()).get(), 1);
+        assert_eq!(m.active_proxy_count.get(), 1);
        assert!(readiness.ready.pending().is_empty());

        state.drain().await;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -12,12 +12,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use once_cell::sync::Lazy;
+use std::env;
+
 pub mod admin;
 pub mod app;
+pub mod assertions;
 pub mod baggage;
 pub mod cert_fetcher;
 pub mod config;
+pub mod copy;
 pub mod dns;
+pub mod drain;
 pub mod hyper_util;
 pub mod identity;
 #[cfg(target_os = "linux")]
@ -30,6 +36,7 @@ pub mod readiness;
 pub mod signal;
 pub mod socket;
 pub mod state;
+pub mod strng;
 pub mod telemetry;
 pub mod time;
 pub mod tls;
@ -38,3 +45,7 @@ pub mod xds;

 #[cfg(any(test, feature = "testing"))]
 pub mod test_helpers;
+
+#[allow(dead_code)]
+static PQC_ENABLED: Lazy<bool> =
+    Lazy::new(|| env::var("COMPLIANCE_POLICY").unwrap_or_default() == "pqc");
--- a/src/main.rs
+++ b/src/main.rs
@ -14,23 +14,43 @@

 extern crate core;

-use tracing::info;
+use nix::sys::resource::{Resource, getrlimit, setrlimit};
+use std::sync::Arc;
+use tracing::{info, warn};
 use ztunnel::*;

 #[cfg(feature = "jemalloc")]
-use tikv_jemallocator;
 #[cfg(feature = "jemalloc")]
 #[global_allocator]
 static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

 #[cfg(feature = "jemalloc")]
 #[allow(non_upper_case_globals)]
-#[export_name = "malloc_conf"]
+#[unsafe(export_name = "malloc_conf")]
 pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0";

+// We use this on Unix systems to increase the number of open file descriptors
+// if possible. This is useful for high-load scenarios where the default limit
+// is too low, which can lead to droopped connections and other issues:
+// see: https://github.com/istio/ztunnel/issues/1585
+fn increase_open_files_limit() {
+    #[cfg(unix)]
+    if let Ok((soft_limit, hard_limit)) = getrlimit(Resource::RLIMIT_NOFILE) {
+        if let Err(e) = setrlimit(Resource::RLIMIT_NOFILE, hard_limit, hard_limit) {
+            warn!("failed to set file descriptor limits: {e}");
+        } else {
+            info!(
+                "set file descriptor limits from {} to {}",
+                soft_limit, hard_limit
+            );
+        }
+    } else {
+        warn!("failed to get file descriptor limits");
+    }
+}
+
 fn main() -> anyhow::Result<()> {
-    telemetry::setup_logging();
-    let config: config::Config = config::parse_config()?;
+    let _log_flush = telemetry::setup_logging();

    // For now we don't need a complex CLI, so rather than pull in dependencies just use basic argv[1]
    match std::env::args().nth(1).as_deref() {
@ -48,7 +68,10 @@ fn main() -> anyhow::Result<()> {
        .enable_all()
        .build()
        .unwrap()
-        .block_on(async move { proxy(config).await })
+        .block_on(async move {
+            let config = Arc::new(config::parse_config()?);
+            proxy(config).await
+        })
 }

 fn help() -> anyhow::Result<()> {
@ -70,8 +93,9 @@ fn version() -> anyhow::Result<()> {
    Ok(())
 }

-async fn proxy(cfg: config::Config) -> anyhow::Result<()> {
+async fn proxy(cfg: Arc<config::Config>) -> anyhow::Result<()> {
    info!("version: {}", version::BuildInfo::new());
+    increase_open_files_limit();
    info!("running with config: {}", serde_yaml::to_string(&cfg)?);
    app::build(cfg).await?.wait_termination().await
 }
--- a/src/metrics.rs
+++ b/src/metrics.rs
@ -18,12 +18,15 @@ use std::mem;
 use prometheus_client::encoding::{EncodeLabelValue, LabelValueEncoder};
 use prometheus_client::registry::Registry;
 use tracing::error;
+use tracing::field::{DisplayValue, display};
+use tracing_core::field::Value;

 use crate::identity::Identity;

 pub mod meta;
 pub mod server;

+use crate::strng::{RichStrng, Strng};
 pub use server::*;

 /// Creates a metrics sub registry for Istio.
@ -98,10 +101,19 @@ where
    }
 }

-#[derive(Default, Hash, PartialEq, Eq, Clone, Debug)]
+#[derive(Hash, PartialEq, Eq, Clone, Debug)]
 // DefaultedUnknown is a wrapper around an Option that encodes as "unknown" when missing, rather than ""
 pub struct DefaultedUnknown<T>(Option<T>);

+impl DefaultedUnknown<RichStrng> {
+    pub fn display(&self) -> Option<DisplayValue<&str>> {
+        self.as_ref().map(|rs| display(rs.as_str()))
+    }
+    pub fn to_value(&self) -> Option<impl Value + '_> {
+        self.as_ref().map(|rs| rs.as_str())
+    }
+}
+
 impl<T> DefaultedUnknown<T> {
    pub fn inner(self) -> Option<T> {
        self.0
@ -111,6 +123,14 @@ impl<T> DefaultedUnknown<T> {
    }
 }

+impl<T> Default for DefaultedUnknown<T> {
+    fn default() -> Self {
+        Self(None)
+    }
+}
+
+// Surely there is a less verbose way to do this, but I cannot find one.
+
 impl From<String> for DefaultedUnknown<String> {
    fn from(t: String) -> Self {
        if t.is_empty() {
@ -121,6 +141,42 @@ impl From<String> for DefaultedUnknown<String> {
    }
 }

+impl From<RichStrng> for DefaultedUnknown<RichStrng> {
+    fn from(t: RichStrng) -> Self {
+        if t.is_empty() {
+            DefaultedUnknown(None)
+        } else {
+            DefaultedUnknown(Some(t))
+        }
+    }
+}
+
+impl From<String> for DefaultedUnknown<RichStrng> {
+    fn from(t: String) -> Self {
+        if t.is_empty() {
+            DefaultedUnknown(None)
+        } else {
+            DefaultedUnknown(Some(t.into()))
+        }
+    }
+}
+
+impl From<Strng> for DefaultedUnknown<RichStrng> {
+    fn from(t: Strng) -> Self {
+        if t.is_empty() {
+            DefaultedUnknown(None)
+        } else {
+            DefaultedUnknown(Some(t.into()))
+        }
+    }
+}
+
+impl From<Option<Strng>> for DefaultedUnknown<RichStrng> {
+    fn from(t: Option<Strng>) -> Self {
+        DefaultedUnknown(t.map(RichStrng::from))
+    }
+}
+
 impl<T> From<Option<T>> for DefaultedUnknown<T> {
    fn from(t: Option<T>) -> Self {
        DefaultedUnknown(t)
--- a/src/metrics/server.rs
+++ b/src/metrics/server.rs
@ -16,7 +16,6 @@ use bytes::Bytes;
 use std::sync::Mutex;
 use std::{net::SocketAddr, sync::Arc};

-use drain::Watch;
 use http_body_util::Full;
 use hyper::body::Incoming;
 use hyper::{Request, Response};
@ -24,6 +23,7 @@ use prometheus_client::encoding::text::encode;
 use prometheus_client::registry::Registry;

 use crate::config::Config;
+use crate::drain::DrainWatcher;
 use crate::hyper_util;

 pub struct Server {
@ -31,7 +31,11 @@ pub struct Server {
 }

 impl Server {
-    pub async fn new(config: Config, drain_rx: Watch, registry: Registry) -> anyhow::Result<Self> {
+    pub async fn new(
+        config: Arc<Config>,
+        drain_rx: DrainWatcher,
+        registry: Registry,
+    ) -> anyhow::Result<Self> {
        hyper_util::Server::<Mutex<Registry>>::bind(
            "stats",
            config.stats_addr,
@ -58,7 +62,7 @@ impl Server {

 async fn handle_metrics(
    reg: Arc<Mutex<Registry>>,
-    _req: Request<Incoming>,
+    req: Request<Incoming>,
 ) -> Response<Full<Bytes>> {
    let mut buf = String::new();
    let reg = reg.lock().expect("mutex");
@ -69,12 +73,82 @@ async fn handle_metrics(
            .expect("builder with known status code should not fail");
    }

+    let response_content_type = content_type(&req);
+
    Response::builder()
        .status(hyper::StatusCode::OK)
-        .header(
-            hyper::header::CONTENT_TYPE,
-            "application/openmetrics-text;charset=utf-8;version=1.0.0",
-        )
+        .header(hyper::header::CONTENT_TYPE, response_content_type)
        .body(buf.into())
        .expect("builder with known status code should not fail")
 }
+
+#[derive(Default)]
+enum ContentType {
+    #[default]
+    PlainText,
+    OpenMetrics,
+}
+
+impl From<ContentType> for &str {
+    fn from(c: ContentType) -> Self {
+        match c {
+            ContentType::PlainText => "text/plain; charset=utf-8",
+            ContentType::OpenMetrics => "application/openmetrics-text;charset=utf-8;version=1.0.0",
+        }
+    }
+}
+
+#[inline(always)]
+fn content_type<T>(req: &Request<T>) -> &str {
+    req.headers()
+        .get_all(http::header::ACCEPT)
+        .iter()
+        .find_map(|v| {
+            match v
+                .to_str()
+                .unwrap_or_default()
+                .to_lowercase()
+                .split(";")
+                .collect::<Vec<_>>()
+                .first()
+            {
+                Some(&"application/openmetrics-text") => Some(ContentType::OpenMetrics),
+                _ => None,
+            }
+        })
+        .unwrap_or_default()
+        .into()
+}
+
+mod test {
+
+    #[test]
+    fn test_content_type() {
+        let plain_text_req = http::Request::new("I want some plain text");
+        assert_eq!(
+            super::content_type(&plain_text_req),
+            "text/plain; charset=utf-8"
+        );
+
+        let openmetrics_req = http::Request::builder()
+            .header("X-Custom-Beep", "boop")
+            .header("Accept", "application/json")
+            .header("Accept", "application/openmetrics-text; other stuff")
+            .body("I would like openmetrics")
+            .unwrap();
+        assert_eq!(
+            super::content_type(&openmetrics_req),
+            "application/openmetrics-text;charset=utf-8;version=1.0.0"
+        );
+
+        let unsupported_req_accept = http::Request::builder()
+            .header("Accept", "application/json")
+            .body("I would like some json")
+            .unwrap();
+        // asking for something we don't support, fall back to plaintext
+        assert_eq!(
+            super::content_type(&unsupported_req_accept),
+            "text/plain; charset=utf-8"
+        )
+    }
+}
--- a/src/proxy.rs
+++ b/src/proxy.rs
--- a/src/proxy/connection_manager.rs
+++ b/src/proxy/connection_manager.rs
@ -12,45 +12,53 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use crate::proxy::error;
-use crate::rbac;
+use crate::proxy::Error;
+
 use crate::state::DemandProxyState;
 use crate::state::ProxyRbacContext;
-use drain;
 use serde::{Serialize, Serializer};
-use std::collections::HashMap;
+use std::collections::hash_map::Entry;
+use std::collections::{HashMap, HashSet};
 use std::fmt::Formatter;
+use std::net::SocketAddr;
+
+use crate::drain;
+use crate::drain::{DrainTrigger, DrainWatcher};
+use crate::state::workload::{InboundProtocol, OutboundProtocol};
 use std::sync::Arc;
 use std::sync::RwLock;
-use tracing::info;
+use tracing::{debug, error, info, warn};

 struct ConnectionDrain {
    // TODO: this should almost certainly be changed to a type which has counted references exposed.
    // tokio::sync::watch can be subscribed without taking a write lock and exposes references
    // and also a receiver_count method
-    tx: drain::Signal,
-    rx: drain::Watch,
+    tx: DrainTrigger,
+    rx: DrainWatcher,
    count: usize,
 }

 impl ConnectionDrain {
    fn new() -> Self {
-        let (tx, rx) = drain::channel();
-        ConnectionDrain { tx, rx, count: 0 }
+        let (tx, rx) = drain::new();
+        ConnectionDrain { tx, rx, count: 1 }
    }

    /// drain drops the internal reference to rx and then signals drain on the tx
    // always inline, this is for convenience so that we don't forget to drop the rx but there's really no reason it needs to grow the stack
    #[inline(always)]
    async fn drain(self) {
-        drop(self.rx); // very important, drain cannont complete if there are outstand rx
-        self.tx.drain().await;
+        drop(self.rx); // very important, drain cannot complete if there are outstand rx
+        self.tx
+            .start_drain_and_wait(drain::DrainMode::Immediate)
+            .await;
    }
 }

 #[derive(Clone)]
 pub struct ConnectionManager {
-    drains: Arc<RwLock<HashMap<ProxyRbacContext, ConnectionDrain>>>,
+    drains: Arc<RwLock<HashMap<InboundConnection, ConnectionDrain>>>,
+    outbound_connections: Arc<RwLock<HashSet<OutboundConnection>>>,
 }

 impl std::fmt::Debug for ConnectionManager {
@ -63,45 +71,165 @@ impl Default for ConnectionManager {
    fn default() -> Self {
        ConnectionManager {
            drains: Arc::new(RwLock::new(HashMap::new())),
+            outbound_connections: Arc::new(RwLock::new(HashSet::new())),
        }
    }
 }

+pub struct ConnectionGuard {
+    cm: ConnectionManager,
+    conn: InboundConnection,
+    watch: Option<DrainWatcher>,
+}
+
+// For reasons that I don't fully understand, this uses an obscene amount of stack space when written as a normal function,
+// amounting to ~1kb overhead per connection.
+// Inlining it removes this entirely, and the macro ensures we do it consistently across the various areas we use it.
+#[macro_export]
+macro_rules! handle_connection {
+    ($connguard:expr, $future:expr) => {{
+        let watch = $connguard.watcher();
+        tokio::select! {
+            res = $future => {
+                $connguard.release();
+                res
+            }
+            _signaled = watch.wait_for_drain() => Err(Error::AuthorizationPolicyLateRejection)
+        }
+    }};
+}
+
+impl ConnectionGuard {
+    pub fn watcher(&mut self) -> drain::DrainWatcher {
+        self.watch.take().expect("watch cannot be taken twice")
+    }
+    pub fn release(self) {
+        self.cm.release(&self.conn);
+    }
+}
+
+impl Drop for ConnectionGuard {
+    fn drop(&mut self) {
+        if self.watch.is_some() {
+            debug!("rbac context {:?} auto-dropped", &self.conn);
+            self.cm.release(&self.conn)
+        }
+    }
+}
+
+pub struct OutboundConnectionGuard {
+    cm: ConnectionManager,
+    conn: OutboundConnection,
+}
+
+impl Drop for OutboundConnectionGuard {
+    fn drop(&mut self) {
+        self.cm.release_outbound(&self.conn)
+    }
+}
+
+#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct OutboundConnection {
+    pub src: SocketAddr,
+    pub original_dst: SocketAddr,
+    pub actual_dst: SocketAddr,
+    pub protocol: OutboundProtocol,
+}
+
+#[derive(Debug, Clone, Eq, Hash, Ord, PartialEq, PartialOrd, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct InboundConnectionDump {
+    pub src: SocketAddr,
+    pub original_dst: Option<String>,
+    pub actual_dst: SocketAddr,
+    pub protocol: InboundProtocol,
+}
+
+#[derive(Debug, Clone, Eq, PartialEq, Hash, serde::Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct InboundConnection {
+    #[serde(flatten)]
+    pub ctx: ProxyRbacContext,
+    pub dest_service: Option<String>,
+}
+
 impl ConnectionManager {
+    pub fn track_outbound(
+        &self,
+        src: SocketAddr,
+        original_dst: SocketAddr,
+        actual_dst: SocketAddr,
+        protocol: OutboundProtocol,
+    ) -> OutboundConnectionGuard {
+        let c = OutboundConnection {
+            src,
+            original_dst,
+            actual_dst,
+            protocol,
+        };
+
+        self.outbound_connections
+            .write()
+            .expect("mutex")
+            .insert(c.clone());
+
+        OutboundConnectionGuard {
+            cm: self.clone(),
+            conn: c,
+        }
+    }
+
+    pub async fn assert_rbac(
+        &self,
+        state: &DemandProxyState,
+        ctx: &ProxyRbacContext,
+        dest_service: Option<String>,
+    ) -> Result<ConnectionGuard, Error> {
+        // Register before our initial assert. This prevents a race if policy changes between assert() and
+        // track()
+        let conn = InboundConnection {
+            ctx: ctx.clone(),
+            dest_service,
+        };
+        let Some(watch) = self.register(&conn) else {
+            warn!("failed to track {conn:?}");
+            debug_assert!(false, "failed to track {conn:?}");
+            return Err(Error::ConnectionTrackingFailed);
+        };
+        if let Err(err) = state.assert_rbac(ctx).await {
+            self.release(&conn);
+            return Err(Error::AuthorizationPolicyRejection(err));
+        }
+        Ok(ConnectionGuard {
+            cm: self.clone(),
+            conn,
+            watch: Some(watch),
+        })
+    }
    // register a connection with the connection manager
    // this must be done before a connection can be tracked
    // allows policy to be asserted against the connection
    // even no tasks have a receiver channel yet
-    pub fn register(&self, c: &ProxyRbacContext) {
-        self.drains
-            .write()
-            .expect("mutex")
-            .entry(c.clone())
-            .or_insert(ConnectionDrain::new());
-    }
-
-    // get a channel to receive close on for your connection
-    // requires that the connection be registered first
-    // if you receive None this connection is invalid and should close
-    pub fn track(&self, c: &ProxyRbacContext) -> Option<drain::Watch> {
-        match self
-            .drains
-            .write()
-            .expect("mutex")
-            .entry(c.to_owned())
-            .and_modify(|cd| cd.count += 1)
-        {
-            std::collections::hash_map::Entry::Occupied(cd) => {
+    fn register(&self, c: &InboundConnection) -> Option<DrainWatcher> {
+        match self.drains.write().expect("mutex").entry(c.clone()) {
+            Entry::Occupied(mut cd) => {
+                cd.get_mut().count += 1;
                let rx = cd.get().rx.clone();
                Some(rx)
            }
-            std::collections::hash_map::Entry::Vacant(_) => None,
+            Entry::Vacant(entry) => {
+                let drain = ConnectionDrain::new();
+                let rx = drain.rx.clone();
+                entry.insert(drain);
+                Some(rx)
+            }
        }
    }

    // releases tracking on a connection
    // uses a counter to determine if there are other tracked connections or not so it may retain the tx/rx channels when necessary
-    pub fn release(&self, c: &ProxyRbacContext) {
+    pub fn release(&self, c: &InboundConnection) {
        let mut drains = self.drains.write().expect("mutex");
        if let Some((k, mut v)) = drains.remove_entry(c) {
            if v.count > 1 {
@ -112,8 +240,12 @@ impl ConnectionManager {
        }
    }

+    fn release_outbound(&self, c: &OutboundConnection) {
+        self.outbound_connections.write().expect("mutex").remove(c);
+    }
+
    // signal all connections listening to this channel to take action (typically terminate traffic)
-    async fn close(&self, c: &ProxyRbacContext) {
+    async fn close(&self, c: &InboundConnection) {
        let drain = { self.drains.write().expect("mutex").remove(c) };
        if let Some(cd) = drain {
            cd.drain().await;
@ -124,23 +256,16 @@ impl ConnectionManager {
    }

    //  get a list of all connections being tracked
-    pub fn connections(&self) -> Vec<ProxyRbacContext> {
+    pub fn connections(&self) -> Vec<InboundConnection> {
        // potentially large copy under read lock, could require optimization
        self.drains.read().expect("mutex").keys().cloned().collect()
    }
+}

-    // get a dump (for admin API) for connects.
-    // This just avoids the redundant dest_workload_info
-    pub fn connections_dump(&self) -> Vec<rbac::Connection> {
-        // potentially large copy under read lock, could require optimization
-        self.drains
-            .read()
-            .expect("mutex")
-            .keys()
-            .cloned()
-            .map(|c| c.conn)
-            .collect()
-    }
+#[derive(serde::Serialize)]
+struct ConnectionManagerDump {
+    inbound: Vec<InboundConnectionDump>,
+    outbound: Vec<OutboundConnection>,
 }

 impl Serialize for ConnectionManager {
@ -148,21 +273,45 @@ impl Serialize for ConnectionManager {
    where
        S: Serializer,
    {
-        let conns = self.connections_dump();
-        conns.serialize(serializer)
+        let inbound: Vec<_> = self
+            .drains
+            .read()
+            .expect("mutex")
+            .keys()
+            .cloned()
+            .map(|c| InboundConnectionDump {
+                src: c.ctx.conn.src,
+                original_dst: c.dest_service,
+                actual_dst: c.ctx.conn.dst,
+                protocol: if c.ctx.conn.src_identity.is_some() {
+                    InboundProtocol::HBONE
+                } else {
+                    InboundProtocol::TCP
+                },
+            })
+            .collect();
+        let outbound: Vec<_> = self
+            .outbound_connections
+            .read()
+            .expect("mutex")
+            .iter()
+            .cloned()
+            .collect();
+        let dump = ConnectionManagerDump { inbound, outbound };
+        dump.serialize(serializer)
    }
 }

 pub struct PolicyWatcher {
    state: DemandProxyState,
-    stop: drain::Watch,
+    stop: DrainWatcher,
    connection_manager: ConnectionManager,
 }

 impl PolicyWatcher {
    pub fn new(
        state: DemandProxyState,
-        stop: drain::Watch,
+        stop: DrainWatcher,
        connection_manager: ConnectionManager,
    ) -> Self {
        PolicyWatcher {
@ -176,15 +325,15 @@ impl PolicyWatcher {
        let mut policies_changed = self.state.read().policies.subscribe();
        loop {
            tokio::select! {
-                _ = self.stop.clone().signaled() => {
+                _ = self.stop.clone().wait_for_drain() => {
                    break;
                }
                _ = policies_changed.changed() => {
                    let connections = self.connection_manager.connections();
                    for conn in connections {
-                        if !self.state.assert_rbac(&conn).await {
+                        if self.state.assert_rbac(&conn.ctx).await.is_err() {
                            self.connection_manager.close(&conn).await;
-                            info!("connection {conn} closed because it's no longer allowed after a policy update");
+                            info!("connection {} closed because it's no longer allowed after a policy update", conn.ctx);
                        }
                    }
                }
@ -195,231 +344,248 @@ impl PolicyWatcher {

 #[cfg(test)]
 mod tests {
-    use drain::Watch;
+    use crate::drain;
+    use crate::drain::DrainWatcher;
    use hickory_resolver::config::{ResolverConfig, ResolverOpts};
+    use prometheus_client::registry::Registry;
    use std::net::{Ipv4Addr, SocketAddrV4};
    use std::sync::{Arc, RwLock};
    use std::time::Duration;

    use crate::rbac::Connection;
    use crate::state::{DemandProxyState, ProxyState};
-    use crate::xds::istio::security::{Action, Authorization, Scope};
+    use crate::test_helpers::test_default_workload;
    use crate::xds::ProxyStateUpdateMutator;
+    use crate::xds::istio::security::{Action, Authorization, Scope};

-    use super::{ConnectionManager, PolicyWatcher};
+    use super::{ConnectionGuard, ConnectionManager, InboundConnection, PolicyWatcher};

    #[tokio::test]
    async fn test_connection_manager_close() {
        // setup a new ConnectionManager
-        let connection_manager = ConnectionManager::default();
+        let cm = ConnectionManager::default();
        // ensure drains is empty
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 0);
-        assert_eq!(connection_manager.connections().len(), 0);
+        assert_eq!(cm.drains.read().unwrap().len(), 0);
+        assert_eq!(cm.connections().len(), 0);
+
+        let register = |cm: &ConnectionManager, c: &InboundConnection| {
+            let cm = cm.clone();
+            let c = c.clone();
+
+            let watch = cm.register(&c).unwrap();
+            ConnectionGuard {
+                cm,
+                conn: c,
+                watch: Some(watch),
+            }
+        };

        // track a new connection
-        let rbac_ctx1 = crate::state::ProxyRbacContext {
-            conn: Connection {
-                src_identity: None,
-                src: std::net::SocketAddr::new(std::net::Ipv4Addr::new(192, 168, 0, 1).into(), 80),
-                dst_network: "".to_string(),
-                dst: std::net::SocketAddr::V4(SocketAddrV4::new(
-                    Ipv4Addr::new(192, 168, 0, 2),
-                    8080,
-                )),
+        let rbac_ctx1 = InboundConnection {
+            ctx: crate::state::ProxyRbacContext {
+                conn: Connection {
+                    src_identity: None,
+                    src: std::net::SocketAddr::new(
+                        std::net::Ipv4Addr::new(192, 168, 0, 1).into(),
+                        80,
+                    ),
+                    dst_network: "".into(),
+                    dst: std::net::SocketAddr::V4(SocketAddrV4::new(
+                        Ipv4Addr::new(192, 168, 0, 2),
+                        8080,
+                    )),
+                },
+                dest_workload: Arc::new(test_default_workload()),
            },
-            dest_workload_info: None,
+            dest_service: None,
        };

-        // assert that tracking an unregistered connection is None
-        let close1 = connection_manager.track(&rbac_ctx1);
-        assert!(close1.is_none());
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 0);
-        assert_eq!(connection_manager.connections().len(), 0);
-
-        connection_manager.register(&rbac_ctx1);
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(rbac_ctx1.clone()));
-
-        let close1 = connection_manager
-            .track(&rbac_ctx1)
-            .expect("should not be None");
-
        // ensure drains contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(rbac_ctx1.clone()));
+        let mut close1 = register(&cm, &rbac_ctx1);
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(rbac_ctx1.clone()));

        // setup a second track on the same connection
-        let another_conn1 = rbac_ctx1.clone();
-        let another_close1 = connection_manager
-            .track(&another_conn1)
-            .expect("should not be None");
+        let mut another_close1 = register(&cm, &rbac_ctx1);

        // ensure drains contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(rbac_ctx1.clone()));
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(rbac_ctx1.clone()));

        // track a second connection
-        let rbac_ctx2 = crate::state::ProxyRbacContext {
-            conn: Connection {
-                src_identity: None,
-                src: std::net::SocketAddr::new(std::net::Ipv4Addr::new(192, 168, 0, 3).into(), 80),
-                dst_network: "".to_string(),
-                dst: std::net::SocketAddr::V4(SocketAddrV4::new(
-                    Ipv4Addr::new(192, 168, 0, 2),
-                    8080,
-                )),
+        let rbac_ctx2 = InboundConnection {
+            ctx: crate::state::ProxyRbacContext {
+                conn: Connection {
+                    src_identity: None,
+                    src: std::net::SocketAddr::new(
+                        std::net::Ipv4Addr::new(192, 168, 0, 3).into(),
+                        80,
+                    ),
+                    dst_network: "".into(),
+                    dst: std::net::SocketAddr::V4(SocketAddrV4::new(
+                        Ipv4Addr::new(192, 168, 0, 2),
+                        8080,
+                    )),
+                },
+                dest_workload: Arc::new(test_default_workload()),
            },
-            dest_workload_info: None,
+            dest_service: None,
        };

-        connection_manager.register(&rbac_ctx2);
-        let close2 = connection_manager
-            .track(&rbac_ctx2)
-            .expect("should not be None");
-
+        let mut close2 = register(&cm, &rbac_ctx2);
        // ensure drains contains exactly 2 items
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 2);
-        assert_eq!(connection_manager.connections().len(), 2);
-        let mut connections = connection_manager.connections();
-        connections.sort(); // ordering cannot be guaranteed without sorting
+        assert_eq!(cm.drains.read().unwrap().len(), 2);
+        assert_eq!(cm.connections().len(), 2);
+        let mut connections = cm.connections();
+        // ordering cannot be guaranteed without sorting
+        connections.sort_by(|a, b| a.ctx.conn.cmp(&b.ctx.conn));
        assert_eq!(connections, vec![rbac_ctx1.clone(), rbac_ctx2.clone()]);

        // spawn tasks to assert that we close in a timely manner for rbac_ctx1
-        tokio::spawn(assert_close(close1));
-        tokio::spawn(assert_close(another_close1));
+        tokio::spawn(assert_close(close1.watch.take().unwrap()));
+        tokio::spawn(assert_close(another_close1.watch.take().unwrap()));
        // close rbac_ctx1
-        connection_manager.close(&rbac_ctx1).await;
+        cm.close(&rbac_ctx1).await;
        // ensure drains contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(rbac_ctx2.clone()));
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(rbac_ctx2.clone()));

        // spawn a task to assert that we close in a timely manner for rbac_ctx2
-        tokio::spawn(assert_close(close2));
+        tokio::spawn(assert_close(close2.watch.take().unwrap()));
        // close rbac_ctx2
-        connection_manager.close(&rbac_ctx2).await;
+        cm.close(&rbac_ctx2).await;
        // assert that drains is empty again
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 0);
-        assert_eq!(connection_manager.connections().len(), 0);
+        assert_eq!(cm.drains.read().unwrap().len(), 0);
+        assert_eq!(cm.connections().len(), 0);
    }

    #[tokio::test]
    async fn test_connection_manager_release() {
        // setup a new ConnectionManager
-        let connection_manager = ConnectionManager::default();
+        let cm = ConnectionManager::default();
        // ensure drains is empty
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 0);
-        assert_eq!(connection_manager.connections().len(), 0);
+        assert_eq!(cm.drains.read().unwrap().len(), 0);
+        assert_eq!(cm.connections().len(), 0);
+
+        let register = |cm: &ConnectionManager, c: &InboundConnection| {
+            let cm = cm.clone();
+            let c = c.clone();
+
+            let watch = cm.register(&c).unwrap();
+            ConnectionGuard {
+                cm,
+                conn: c,
+                watch: Some(watch),
+            }
+        };

        // create a new connection
-        let conn1 = crate::state::ProxyRbacContext {
-            conn: Connection {
-                src_identity: None,
-                src: std::net::SocketAddr::new(std::net::Ipv4Addr::new(192, 168, 0, 1).into(), 80),
-                dst_network: "".to_string(),
-                dst: std::net::SocketAddr::V4(SocketAddrV4::new(
-                    Ipv4Addr::new(192, 168, 0, 2),
-                    8080,
-                )),
+        let conn1 = InboundConnection {
+            ctx: crate::state::ProxyRbacContext {
+                conn: Connection {
+                    src_identity: None,
+                    src: std::net::SocketAddr::new(
+                        std::net::Ipv4Addr::new(192, 168, 0, 1).into(),
+                        80,
+                    ),
+                    dst_network: "".into(),
+                    dst: std::net::SocketAddr::V4(SocketAddrV4::new(
+                        Ipv4Addr::new(192, 168, 0, 2),
+                        8080,
+                    )),
+                },
+                dest_workload: Arc::new(test_default_workload()),
            },
-            dest_workload_info: None,
+            dest_service: None,
        };
+
        // create a second connection
-        let conn2 = crate::state::ProxyRbacContext {
-            conn: Connection {
-                src_identity: None,
-                src: std::net::SocketAddr::new(std::net::Ipv4Addr::new(192, 168, 0, 3).into(), 80),
-                dst_network: "".to_string(),
-                dst: std::net::SocketAddr::V4(SocketAddrV4::new(
-                    Ipv4Addr::new(192, 168, 0, 2),
-                    8080,
-                )),
+        let conn2 = InboundConnection {
+            ctx: crate::state::ProxyRbacContext {
+                conn: Connection {
+                    src_identity: None,
+                    src: std::net::SocketAddr::new(
+                        std::net::Ipv4Addr::new(192, 168, 0, 3).into(),
+                        80,
+                    ),
+                    dst_network: "".into(),
+                    dst: std::net::SocketAddr::V4(SocketAddrV4::new(
+                        Ipv4Addr::new(192, 168, 0, 2),
+                        8080,
+                    )),
+                },
+                dest_workload: Arc::new(test_default_workload()),
            },
-            dest_workload_info: None,
+            dest_service: None,
        };
        let another_conn1 = conn1.clone();

-        connection_manager.register(&conn1);
+        let close1 = register(&cm, &conn1);
+        let another_close1 = register(&cm, &another_conn1);

-        // watch the connections
-        let close1 = connection_manager
-            .track(&conn1)
-            .expect("should not be None");
-        let another_close1 = connection_manager
-            .track(&another_conn1)
-            .expect("should not be None");
        // ensure drains contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(conn1.clone()));
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(conn1.clone()));

        // release conn1's clone
        drop(another_close1);
-        connection_manager.release(&another_conn1);
        // ensure drains still contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(conn1.clone()));
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(conn1.clone()));

-        connection_manager.register(&conn2);
-        // track conn2
-        let close2 = connection_manager
-            .track(&conn2)
-            .expect("should not be None");
+        let close2 = register(&cm, &conn2);
        // ensure drains contains exactly 2 items
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 2);
-        assert_eq!(connection_manager.connections().len(), 2);
-        let mut connections = connection_manager.connections();
-        connections.sort(); // ordering cannot be guaranteed without sorting
+        assert_eq!(cm.drains.read().unwrap().len(), 2);
+        assert_eq!(cm.connections().len(), 2);
+        let mut connections = cm.connections();
+        // ordering cannot be guaranteed without sorting
+        connections.sort_by(|a, b| a.ctx.conn.cmp(&b.ctx.conn));
        assert_eq!(connections, vec![conn1.clone(), conn2.clone()]);

        // release conn1
        drop(close1);
-        connection_manager.release(&conn1);
        // ensure drains contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(connection_manager.connections(), vec!(conn2.clone()));
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(conn2.clone()));

        // clone conn2 and track it
        let another_conn2 = conn2.clone();
-        let another_close2 = connection_manager
-            .track(&another_conn2)
-            .expect("should not be None");
-        drop(close2);
+        let another_close2 = register(&cm, &another_conn2);
        // release tracking on conn2
-        connection_manager.release(&conn2);
+        drop(close2);
        // ensure drains still contains exactly 1 item
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 1);
-        assert_eq!(connection_manager.connections().len(), 1);
-        assert_eq!(
-            connection_manager.connections(),
-            vec!(another_conn2.clone())
-        );
+        assert_eq!(cm.drains.read().unwrap().len(), 1);
+        assert_eq!(cm.connections().len(), 1);
+        assert_eq!(cm.connections(), vec!(another_conn2.clone()));

        // release tracking on conn2's clone
        drop(another_close2);
-        connection_manager.release(&another_conn2);
        // ensure drains contains exactly 0 items
-        assert_eq!(connection_manager.drains.read().unwrap().len(), 0);
-        assert_eq!(connection_manager.connections().len(), 0);
+        assert_eq!(cm.drains.read().unwrap().len(), 0);
+        assert_eq!(cm.connections().len(), 0);
    }

    #[tokio::test]
    async fn test_policy_watcher_lifecycle() {
        // preamble: setup an environment
-        let state = Arc::new(RwLock::new(ProxyState::default()));
+        let state = Arc::new(RwLock::new(ProxyState::new(None)));
+        let mut registry = Registry::default();
+        let metrics = Arc::new(crate::proxy::Metrics::new(&mut registry));
        let dstate = DemandProxyState::new(
            state.clone(),
            None,
            ResolverConfig::default(),
            ResolverOpts::default(),
+            metrics,
        );
        let connection_manager = ConnectionManager::default();
-        let (tx, stop) = drain::channel();
+        let (tx, stop) = drain::new();
        let state_mutator = ProxyStateUpdateMutator::new_no_fetch();

        // clones to move into spawned task
@ -433,32 +599,43 @@ mod tests {
        });

        // create a test connection
-        let conn1 = crate::state::ProxyRbacContext {
-            conn: Connection {
-                src_identity: None,
-                src: std::net::SocketAddr::new(std::net::Ipv4Addr::new(192, 168, 0, 1).into(), 80),
-                dst_network: "".to_string(),
-                dst: std::net::SocketAddr::V4(SocketAddrV4::new(
-                    Ipv4Addr::new(192, 168, 0, 2),
-                    8080,
-                )),
+        let conn1 = InboundConnection {
+            ctx: crate::state::ProxyRbacContext {
+                conn: Connection {
+                    src_identity: None,
+                    src: std::net::SocketAddr::new(
+                        std::net::Ipv4Addr::new(192, 168, 0, 1).into(),
+                        80,
+                    ),
+                    dst_network: "".into(),
+                    dst: std::net::SocketAddr::V4(SocketAddrV4::new(
+                        Ipv4Addr::new(192, 168, 0, 2),
+                        8080,
+                    )),
+                },
+                dest_workload: Arc::new(test_default_workload()),
            },
-            dest_workload_info: None,
+            dest_service: None,
        };
        // watch the connection
-        connection_manager.register(&conn1);
        let close1 = connection_manager
-            .track(&conn1)
+            .register(&conn1)
            .expect("should not be None");

        // generate policy which denies everything
+        let auth_name = "allow-nothing";
+        let auth_namespace = "default";
        let auth = Authorization {
-            name: "allow-nothing".to_string(),
+            name: auth_name.into(),
            action: Action::Deny as i32,
            scope: Scope::Global as i32,
-            namespace: "default".to_string(),
+            namespace: auth_namespace.into(),
            rules: vec![],
        };
+        let mut auth_xds_name = String::with_capacity(1 + auth_namespace.len() + auth_name.len());
+        auth_xds_name.push_str(auth_namespace);
+        auth_xds_name.push('/');
+        auth_xds_name.push_str(auth_name);

        // spawn an assertion that our connection close is received
        tokio::spawn(assert_close(close1));
@ -469,18 +646,19 @@ mod tests {
            let mut s = state
                .write()
                .expect("test fails if we're unable to get a write lock on state");
-            let res = state_mutator.insert_authorization(&mut s, auth);
+            let res =
+                state_mutator.insert_authorization(&mut s, auth_xds_name.clone().into(), auth);
            // assert that the update was OK
            assert!(res.is_ok());
        } // release lock

        // send the signal which stops policy watcher
-        tx.drain().await;
+        tx.start_drain_and_wait(drain::DrainMode::Immediate).await;
    }

    // small helper to assert that the Watches are working in a timely manner
-    async fn assert_close(c: Watch) {
-        let result = tokio::time::timeout(Duration::from_secs(1), c.signaled()).await;
+    async fn assert_close(c: DrainWatcher) {
+        let result = tokio::time::timeout(Duration::from_secs(1), c.wait_for_drain()).await;
        assert!(result.is_ok())
    }
 }
--- a/src/proxy/h2.rs
+++ b/src/proxy/h2.rs
@ -0,0 +1,310 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::copy;
+use bytes::Bytes;
+use futures_core::ready;
+use h2::Reason;
+use std::io::Error;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
+use std::task::{Context, Poll};
+use std::time::Duration;
+use tokio::sync::oneshot;
+use tracing::trace;
+
+pub mod client;
+pub mod server;
+
+async fn do_ping_pong(
+    mut ping_pong: h2::PingPong,
+    tx: oneshot::Sender<()>,
+    dropped: Arc<AtomicBool>,
+) {
+    const PING_INTERVAL: Duration = Duration::from_secs(10);
+    const PING_TIMEOUT: Duration = Duration::from_secs(20);
+    // delay before sending the first ping, no need to race with the first request
+    tokio::time::sleep(PING_INTERVAL).await;
+    loop {
+        if dropped.load(Ordering::Relaxed) {
+            return;
+        }
+        let ping_fut = ping_pong.ping(h2::Ping::opaque());
+        log::trace!("ping sent");
+        match tokio::time::timeout(PING_TIMEOUT, ping_fut).await {
+            Err(_) => {
+                // We will log this again up in drive_connection, so don't worry about a high log level
+                log::trace!("ping timeout");
+                let _ = tx.send(());
+                return;
+            }
+            Ok(r) => match r {
+                Ok(_) => {
+                    log::trace!("pong received");
+                    tokio::time::sleep(PING_INTERVAL).await;
+                }
+                Err(e) => {
+                    if dropped.load(Ordering::Relaxed) {
+                        // drive_connection() exits first, no need to error again
+                        return;
+                    }
+                    log::error!("ping error: {e}");
+                    let _ = tx.send(());
+                    return;
+                }
+            },
+        }
+    }
+}
+
+// H2Stream represents an active HTTP2 stream. Consumers can only Read/Write
+pub struct H2Stream {
+    read: H2StreamReadHalf,
+    write: H2StreamWriteHalf,
+}
+
+pub struct H2StreamReadHalf {
+    recv_stream: h2::RecvStream,
+    _dropped: Option<DropCounter>,
+}
+
+pub struct H2StreamWriteHalf {
+    send_stream: h2::SendStream<Bytes>,
+    _dropped: Option<DropCounter>,
+}
+
+pub struct TokioH2Stream {
+    stream: H2Stream,
+    buf: Bytes,
+}
+
+struct DropCounter {
+    // Whether the other end of this shared counter has already dropped.
+    // We only decrement if they have, so we do not double count
+    half_dropped: Arc<()>,
+    active_count: Arc<AtomicU16>,
+}
+
+impl DropCounter {
+    pub fn new(active_count: Arc<AtomicU16>) -> (Option<DropCounter>, Option<DropCounter>) {
+        let half_dropped = Arc::new(());
+        let d1 = DropCounter {
+            half_dropped: half_dropped.clone(),
+            active_count: active_count.clone(),
+        };
+        let d2 = DropCounter {
+            half_dropped,
+            active_count,
+        };
+        (Some(d1), Some(d2))
+    }
+}
+
+impl crate::copy::BufferedSplitter for H2Stream {
+    type R = H2StreamReadHalf;
+    type W = H2StreamWriteHalf;
+    fn split_into_buffered_reader(self) -> (H2StreamReadHalf, H2StreamWriteHalf) {
+        let H2Stream { read, write } = self;
+        (read, write)
+    }
+}
+
+impl H2StreamWriteHalf {
+    fn write_slice(&mut self, buf: Bytes, end_of_stream: bool) -> Result<(), std::io::Error> {
+        self.send_stream
+            .send_data(buf, end_of_stream)
+            .map_err(h2_to_io_error)
+    }
+}
+
+impl Drop for DropCounter {
+    fn drop(&mut self) {
+        let mut half_dropped = Arc::new(());
+        std::mem::swap(&mut self.half_dropped, &mut half_dropped);
+        if Arc::into_inner(half_dropped).is_none() {
+            // other half already dropped
+            let left = self.active_count.fetch_sub(1, Ordering::SeqCst);
+            trace!("dropping H2Stream, has {} active streams left", left - 1);
+        } else {
+            trace!("dropping H2Stream, other half remains");
+        }
+    }
+}
+
+// We can't directly implement tokio::io::{AsyncRead, AsyncWrite} for H2Stream because
+// then the specific implementation will conflict with the generic one.
+impl TokioH2Stream {
+    pub fn new(stream: H2Stream) -> Self {
+        Self {
+            stream,
+            buf: Bytes::new(),
+        }
+    }
+}
+
+impl tokio::io::AsyncRead for TokioH2Stream {
+    fn poll_read(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut tokio::io::ReadBuf<'_>,
+    ) -> Poll<std::io::Result<()>> {
+        // Just return the bytes we have left over and don't poll the stream because
+        // its unclear what to do if there are bytes left over from the previous read, and when we
+        // poll, we get an error.
+        if self.buf.is_empty() {
+            // If we have no unread bytes, we can poll the stream
+            // and fill self.buf with the bytes we read.
+            let pinned = std::pin::Pin::new(&mut self.stream.read);
+            let res = ready!(copy::ResizeBufRead::poll_bytes(pinned, cx))?;
+            self.buf = res;
+        }
+        // Copy as many bytes as we can from self.buf.
+        let cnt = Ord::min(buf.remaining(), self.buf.len());
+        buf.put_slice(&self.buf[..cnt]);
+        self.buf = self.buf.split_off(cnt);
+        Poll::Ready(Ok(()))
+    }
+}
+
+impl tokio::io::AsyncWrite for TokioH2Stream {
+    fn poll_write(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, tokio::io::Error>> {
+        let pinned = std::pin::Pin::new(&mut self.stream.write);
+        let buf = Bytes::copy_from_slice(buf);
+        copy::AsyncWriteBuf::poll_write_buf(pinned, cx, buf)
+    }
+
+    fn poll_flush(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let pinned = std::pin::Pin::new(&mut self.stream.write);
+        copy::AsyncWriteBuf::poll_flush(pinned, cx)
+    }
+
+    fn poll_shutdown(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let pinned = std::pin::Pin::new(&mut self.stream.write);
+        copy::AsyncWriteBuf::poll_shutdown(pinned, cx)
+    }
+}
+
+impl copy::ResizeBufRead for H2StreamReadHalf {
+    fn poll_bytes(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<Bytes>> {
+        let this = self.get_mut();
+        loop {
+            match ready!(this.recv_stream.poll_data(cx)) {
+                None => return Poll::Ready(Ok(Bytes::new())),
+                Some(Ok(buf)) if buf.is_empty() && !this.recv_stream.is_end_stream() => continue,
+                Some(Ok(buf)) => {
+                    // TODO: Hyper and Go make their pinging data aware and don't send pings when data is received
+                    // Pingora, and our implementation, currently don't do this.
+                    // We may want to; if so, modify here.
+                    // this.ping.record_data(buf.len());
+                    let _ = this.recv_stream.flow_control().release_capacity(buf.len());
+                    return Poll::Ready(Ok(buf));
+                }
+                Some(Err(e)) => {
+                    return Poll::Ready(match e.reason() {
+                        Some(Reason::NO_ERROR) | Some(Reason::CANCEL) => {
+                            return Poll::Ready(Ok(Bytes::new()));
+                        }
+                        Some(Reason::STREAM_CLOSED) => {
+                            Err(Error::new(std::io::ErrorKind::BrokenPipe, e))
+                        }
+                        _ => Err(h2_to_io_error(e)),
+                    });
+                }
+            }
+        }
+    }
+
+    fn resize(self: Pin<&mut Self>, _new_size: usize) {
+        // NOP, we don't need to resize as we are abstracting the h2 buffer
+    }
+}
+
+impl copy::AsyncWriteBuf for H2StreamWriteHalf {
+    fn poll_write_buf(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: Bytes,
+    ) -> Poll<std::io::Result<usize>> {
+        if buf.is_empty() {
+            return Poll::Ready(Ok(0));
+        }
+        self.send_stream.reserve_capacity(buf.len());
+
+        // We ignore all errors returned by `poll_capacity` and `write`, as we
+        // will get the correct from `poll_reset` anyway.
+        let cnt = match ready!(self.send_stream.poll_capacity(cx)) {
+            None => Some(0),
+            Some(Ok(cnt)) => self.write_slice(buf.slice(..cnt), false).ok().map(|()| cnt),
+            Some(Err(_)) => None,
+        };
+
+        if let Some(cnt) = cnt {
+            return Poll::Ready(Ok(cnt));
+        }
+
+        Poll::Ready(Err(h2_to_io_error(
+            match ready!(self.send_stream.poll_reset(cx)) {
+                Ok(Reason::NO_ERROR) | Ok(Reason::CANCEL) | Ok(Reason::STREAM_CLOSED) => {
+                    return Poll::Ready(Err(std::io::ErrorKind::BrokenPipe.into()));
+                }
+                Ok(reason) => reason.into(),
+                Err(e) => e,
+            },
+        )))
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_shutdown(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let r = self.write_slice(Bytes::new(), true);
+        if r.is_ok() {
+            return Poll::Ready(Ok(()));
+        }
+
+        Poll::Ready(Err(h2_to_io_error(
+            match ready!(self.send_stream.poll_reset(cx)) {
+                Ok(Reason::NO_ERROR) => return Poll::Ready(Ok(())),
+                Ok(Reason::CANCEL) | Ok(Reason::STREAM_CLOSED) => {
+                    return Poll::Ready(Err(std::io::ErrorKind::BrokenPipe.into()));
+                }
+                Ok(reason) => reason.into(),
+                Err(e) => e,
+            },
+        )))
+    }
+}
+
+fn h2_to_io_error(e: h2::Error) -> std::io::Error {
+    if e.is_io() {
+        e.into_io().unwrap()
+    } else {
+        std::io::Error::other(e)
+    }
+}
--- a/src/proxy/h2/client.rs
+++ b/src/proxy/h2/client.rs
@ -0,0 +1,231 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::config;
+use crate::identity::Identity;
+use crate::proxy::Error;
+use bytes::{Buf, Bytes};
+use h2::SendStream;
+use h2::client::{Connection, SendRequest};
+use http::Request;
+use std::fmt;
+use std::fmt::{Display, Formatter};
+use std::net::IpAddr;
+use std::net::SocketAddr;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicU16, Ordering};
+use std::task::{Context, Poll};
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::sync::oneshot;
+use tokio::sync::watch::Receiver;
+use tracing::{Instrument, debug, error, trace, warn};
+
+#[derive(Debug, Clone)]
+// H2ConnectClient is a wrapper abstracting h2
+pub struct H2ConnectClient {
+    sender: SendRequest<Bytes>,
+    pub max_allowed_streams: u16,
+    stream_count: Arc<AtomicU16>,
+    wl_key: WorkloadKey,
+}
+
+#[derive(PartialEq, Eq, Hash, Clone, Debug)]
+pub struct WorkloadKey {
+    pub src_id: Identity,
+    pub dst_id: Vec<Identity>,
+    // In theory we can just use src,dst,node. However, the dst has a check that
+    // the L3 destination IP matches the HBONE IP. This could be loosened to just assert they are the same identity maybe.
+    pub dst: SocketAddr,
+    // Because we spoof the source IP, we need to key on this as well. Note: for in-pod its already per-pod
+    // pools anyways.
+    pub src: IpAddr,
+}
+
+impl Display for WorkloadKey {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "{}({})->{}[", self.src, &self.src_id, self.dst,)?;
+        for i in &self.dst_id {
+            write!(f, "{i}")?;
+        }
+        write!(f, "]")
+    }
+}
+
+impl H2ConnectClient {
+    pub fn is_for_workload(&self, wl_key: &WorkloadKey) -> Result<(), crate::proxy::Error> {
+        if !(self.wl_key == *wl_key) {
+            Err(crate::proxy::Error::Generic(
+                "connection does not match workload key!".into(),
+            ))
+        } else {
+            Ok(())
+        }
+    }
+
+    // will_be_at_max_streamcount checks if a stream will be maxed out if we send one more request on it
+    pub fn will_be_at_max_streamcount(&self) -> bool {
+        let future_count = self.stream_count.load(Ordering::Relaxed) + 1;
+        trace!(
+            "checking streamcount: {future_count} >= {}",
+            self.max_allowed_streams
+        );
+        future_count >= self.max_allowed_streams
+    }
+
+    pub fn ready_to_use(&mut self) -> bool {
+        let cx = &mut Context::from_waker(futures::task::noop_waker_ref());
+        match self.sender.poll_ready(cx) {
+            Poll::Ready(Ok(_)) => true,
+            // We may have gotten GoAway, etc
+            Poll::Ready(Err(_)) => false,
+            Poll::Pending => {
+                // Given our current usage, I am not sure this can ever be the case.
+                // If it is, though, err on the safe side and do not use the connection
+                warn!("checked out connection is Pending, skipping");
+                false
+            }
+        }
+    }
+
+    pub async fn send_request(
+        &mut self,
+        req: http::Request<()>,
+    ) -> Result<crate::proxy::h2::H2Stream, Error> {
+        let cur = self.stream_count.fetch_add(1, Ordering::SeqCst);
+        trace!(current_streams = cur, "sending request");
+        let (send, recv) = match self.internal_send(req).await {
+            Ok(r) => r,
+            Err(e) => {
+                // Request failed, so drop the stream now
+                self.stream_count.fetch_sub(1, Ordering::SeqCst);
+                return Err(e);
+            }
+        };
+
+        let (dropped1, dropped2) = crate::proxy::h2::DropCounter::new(self.stream_count.clone());
+        let read = crate::proxy::h2::H2StreamReadHalf {
+            recv_stream: recv,
+            _dropped: dropped1,
+        };
+        let write = crate::proxy::h2::H2StreamWriteHalf {
+            send_stream: send,
+            _dropped: dropped2,
+        };
+        let h2 = crate::proxy::h2::H2Stream { read, write };
+        Ok(h2)
+    }
+
+    // helper to allow us to handle errors once
+    async fn internal_send(
+        &mut self,
+        req: Request<()>,
+    ) -> Result<(SendStream<Bytes>, h2::RecvStream), Error> {
+        // "This function must return `Ready` before `send_request` is called"
+        // We should always be ready though, because we make sure we don't go over the max stream limit out of band.
+        futures::future::poll_fn(|cx| self.sender.poll_ready(cx)).await?;
+        let (response, stream) = self.sender.send_request(req, false)?;
+        let response = response.await?;
+        if response.status() != 200 {
+            return Err(Error::HttpStatus(response.status()));
+        }
+        Ok((stream, response.into_body()))
+    }
+}
+
+pub async fn spawn_connection(
+    cfg: Arc<config::Config>,
+    s: impl AsyncRead + AsyncWrite + Unpin + Send + 'static,
+    driver_drain: Receiver<bool>,
+    wl_key: WorkloadKey,
+) -> Result<H2ConnectClient, Error> {
+    let mut builder = h2::client::Builder::new();
+    builder
+        .initial_window_size(cfg.window_size)
+        .initial_connection_window_size(cfg.connection_window_size)
+        .max_frame_size(cfg.frame_size)
+        .initial_max_send_streams(cfg.pool_max_streams_per_conn as usize)
+        .max_header_list_size(1024 * 16)
+        // 4mb. Aligned with window_size such that we can fill up the buffer, then flush it all in one go, without buffering up too much.
+        .max_send_buffer_size(cfg.window_size as usize)
+        .enable_push(false);
+
+    let (send_req, connection) = builder
+        .handshake::<_, Bytes>(s)
+        .await
+        .map_err(Error::Http2Handshake)?;
+
+    // We store max as u16, so if they report above that max size we just cap at u16::MAX
+    let max_allowed_streams = std::cmp::min(
+        cfg.pool_max_streams_per_conn,
+        connection
+            .max_concurrent_send_streams()
+            .try_into()
+            .unwrap_or(u16::MAX),
+    );
+    // spawn a task to poll the connection and drive the HTTP state
+    // if we got a drain for that connection, respect it in a race
+    // it is important to have a drain here, or this connection will never terminate
+    tokio::spawn(
+        async move {
+            drive_connection(connection, driver_drain).await;
+        }
+        .in_current_span(),
+    );
+
+    let c = H2ConnectClient {
+        sender: send_req,
+        stream_count: Arc::new(AtomicU16::new(0)),
+        max_allowed_streams,
+        wl_key,
+    };
+    Ok(c)
+}
+
+async fn drive_connection<S, B>(mut conn: Connection<S, B>, mut driver_drain: Receiver<bool>)
+where
+    S: AsyncRead + AsyncWrite + Send + Unpin,
+    B: Buf,
+{
+    let ping_pong = conn
+        .ping_pong()
+        .expect("ping_pong should only be called once");
+    // for ping to inform this fn to drop the connection
+    let (ping_drop_tx, ping_drop_rx) = oneshot::channel::<()>();
+    // for this fn to inform ping to give up when it is already dropped
+    let dropped = Arc::new(AtomicBool::new(false));
+    tokio::task::spawn(
+        super::do_ping_pong(ping_pong, ping_drop_tx, dropped.clone()).in_current_span(),
+    );
+
+    tokio::select! {
+        _ = driver_drain.changed() => {
+            debug!("draining outer HBONE connection");
+        }
+        _ = ping_drop_rx => {
+            warn!("HBONE ping timeout/error");
+        }
+        res = conn => {
+            match res {
+                Err(e) => {
+                    error!("Error in HBONE connection handshake: {:?}", e);
+                }
+                Ok(_) => {
+                    debug!("done with HBONE connection handshake: {:?}", res);
+                }
+            }
+        }
+    }
+    // Signal to the ping_pong it should also stop.
+    dropped.store(true, Ordering::Relaxed);
+}
--- a/src/proxy/h2/server.rs
+++ b/src/proxy/h2/server.rs
@ -0,0 +1,186 @@
+// Copyright Istio Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::config;
+use crate::drain::DrainWatcher;
+use crate::proxy::Error;
+use bytes::Bytes;
+use futures_util::FutureExt;
+use http::Response;
+use http::request::Parts;
+use std::fmt::Debug;
+use std::future::Future;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use tokio::net::TcpStream;
+use tokio::sync::{oneshot, watch};
+use tracing::{Instrument, debug};
+
+pub struct H2Request {
+    request: Parts,
+    recv: h2::RecvStream,
+    send: h2::server::SendResponse<Bytes>,
+}
+
+impl Debug for H2Request {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("H2Request")
+            .field("request", &self.request)
+            .finish()
+    }
+}
+
+impl H2Request {
+    pub fn send_error(mut self, resp: Response<()>) -> Result<(), Error> {
+        let _ = self.send.send_response(resp, true)?;
+        Ok(())
+    }
+
+    pub async fn send_response(
+        self,
+        resp: Response<()>,
+    ) -> Result<crate::proxy::h2::H2Stream, Error> {
+        let H2Request { recv, mut send, .. } = self;
+        let send = send.send_response(resp, false)?;
+        let read = crate::proxy::h2::H2StreamReadHalf {
+            recv_stream: recv,
+            _dropped: None, // We do not need to track on the server
+        };
+        let write = crate::proxy::h2::H2StreamWriteHalf {
+            send_stream: send,
+            _dropped: None, // We do not need to track on the server
+        };
+        let h2 = crate::proxy::h2::H2Stream { read, write };
+        Ok(h2)
+    }
+
+    pub fn get_request(&self) -> &Parts {
+        &self.request
+    }
+
+    pub fn headers(&self) -> &http::HeaderMap<http::HeaderValue> {
+        self.request.headers()
+    }
+}
+
+pub trait RequestParts {
+    fn uri(&self) -> &http::Uri;
+    fn method(&self) -> &http::Method;
+    fn headers(&self) -> &http::HeaderMap<http::HeaderValue>;
+}
+
+impl RequestParts for Parts {
+    fn uri(&self) -> &http::Uri {
+        &self.uri
+    }
+
+    fn method(&self) -> &http::Method {
+        &self.method
+    }
+
+    fn headers(&self) -> &http::HeaderMap<http::HeaderValue> {
+        &self.headers
+    }
+}
+
+pub async fn serve_connection<F, Fut>(
+    cfg: Arc<config::Config>,
+    s: tokio_rustls::server::TlsStream<TcpStream>,
+    drain: DrainWatcher,
+    mut force_shutdown: watch::Receiver<()>,
+    handler: F,
+) -> Result<(), Error>
+where
+    F: Fn(H2Request) -> Fut,
+    Fut: Future<Output = ()> + Send + 'static,
+{
+    let mut builder = h2::server::Builder::new();
+    let mut conn = builder
+        .initial_window_size(cfg.window_size)
+        .initial_connection_window_size(cfg.connection_window_size)
+        .max_frame_size(cfg.frame_size)
+        // 64KB max; default is 16MB driven from Golang's defaults
+        // Since we know we are going to receive a bounded set of headers, more is overkill.
+        .max_header_list_size(65536)
+        // 400kb, default from hyper
+        .max_send_buffer_size(1024 * 400)
+        // default from hyper
+        .max_concurrent_streams(200)
+        .handshake(s)
+        .await?;
+
+    let ping_pong = conn
+        .ping_pong()
+        .expect("new connection should have ping_pong");
+    // for ping to inform this fn to drop the connection
+    let (ping_drop_tx, mut ping_drop_rx) = oneshot::channel::<()>();
+    // for this fn to inform ping to give up when it is already dropped
+    let dropped = Arc::new(AtomicBool::new(false));
+    tokio::task::spawn(crate::proxy::h2::do_ping_pong(
+        ping_pong,
+        ping_drop_tx,
+        dropped.clone(),
+    ));
+
+    let handler = |req| handler(req).map(|_| ());
+    loop {
+        let drain = drain.clone();
+        tokio::select! {
+            request = conn.accept() => {
+                let Some(request) = request else {
+                    // done!
+                    // Signal to the ping_pong it should also stop.
+                    dropped.store(true, Ordering::Relaxed);
+                    return Ok(());
+                };
+                let (request, send) = request?;
+                let (request, recv) = request.into_parts();
+                let req = H2Request {
+                    request,
+                    recv,
+                    send,
+                };
+                let handle = handler(req);
+                // Serve the stream in a new task
+                tokio::task::spawn(handle.in_current_span());
+            }
+            _ = &mut ping_drop_rx => {
+                // Ideally this would be a warning/error message. However, due to an issue during shutdown,
+                // by the time pods with in-pod know to shut down, the network namespace is destroyed.
+                // This blocks the ability to send a GOAWAY and gracefully shutdown.
+                // See https://github.com/istio/ztunnel/issues/1191.
+                debug!("HBONE ping timeout/error, peer may have shutdown");
+                conn.abrupt_shutdown(h2::Reason::NO_ERROR);
+                break
+            }
+            _shutdown = drain.wait_for_drain() => {
+                debug!("starting graceful drain...");
+                conn.graceful_shutdown();
+                break;
+            }
+        }
+    }
+    // Signal to the ping_pong it should also stop.
+    dropped.store(true, Ordering::Relaxed);
+    let poll_closed = futures_util::future::poll_fn(move |cx| conn.poll_closed(cx));
+    tokio::select! {
+        _ = force_shutdown.changed() => {
+            return Err(Error::DrainTimeOut)
+        }
+        _ = poll_closed => {}
+    }
+    // Mark we are done with the connection
+    drop(drain);
+    Ok(())
+}
--- a/src/proxy/inbound.rs
+++ b/src/proxy/inbound.rs
--- a/src/proxy/inbound_passthrough.rs
+++ b/src/proxy/inbound_passthrough.rs
@ -12,85 +12,85 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use std::collections::HashSet;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::time::Instant;

-use drain::Watch;
-use tokio::net::{TcpListener, TcpStream};
-use tracing::{error, info, trace, Instrument};
+use tokio::net::TcpStream;
+use tokio::sync::watch;

-use crate::config::ProxyMode;
-use crate::proxy::connection_manager::ConnectionManager;
-use crate::proxy::metrics::Reporter;
+use tracing::{Instrument, debug, error, info, trace};
+
+use crate::drain::DrainWatcher;
+use crate::drain::run_with_drain;
 use crate::proxy::Error;
-use crate::proxy::{metrics, util, ProxyInputs};
-use crate::rbac;
+use crate::proxy::metrics::Reporter;
+use crate::proxy::{ProxyInputs, metrics, util};
 use crate::state::workload::NetworkAddress;
+use crate::{assertions, copy, handle_connection, rbac, strng};
 use crate::{proxy, socket};

 pub(super) struct InboundPassthrough {
-    listener: TcpListener,
-    pi: ProxyInputs,
-    drain: Watch,
+    listener: socket::Listener,
+    pi: Arc<ProxyInputs>,
+    drain: DrainWatcher,
+    enable_orig_src: bool,
 }

 impl InboundPassthrough {
    pub(super) async fn new(
-        mut pi: ProxyInputs,
-        drain: Watch,
+        pi: Arc<ProxyInputs>,
+        drain: DrainWatcher,
    ) -> Result<InboundPassthrough, Error> {
-        let listener: TcpListener = pi
+        let listener = pi
            .socket_factory
            .tcp_bind(pi.cfg.inbound_plaintext_addr)
            .map_err(|e| Error::Bind(pi.cfg.inbound_plaintext_addr, e))?;

-        let transparent = super::maybe_set_transparent(&pi, &listener)?;
-        // Override with our explicitly configured setting
-        pi.cfg.enable_original_source = Some(transparent);
+        let enable_orig_src = super::maybe_set_transparent(&pi, &listener)?;

        info!(
-            address=%listener.local_addr().expect("local_addr available"),
+            address=%listener.local_addr(),
            component="inbound plaintext",
-            transparent,
+            transparent=enable_orig_src,
            "listener established",
        );
        Ok(InboundPassthrough {
            listener,
            pi,
            drain,
+            enable_orig_src,
        })
    }

-    pub(super) fn address(&self) -> SocketAddr {
-        self.listener.local_addr().expect("local_addr available")
-    }
-
-    pub(super) async fn run(self, illegal_ports: Arc<HashSet<u16>>) {
-        let accept = async move {
+    pub(super) async fn run(self) {
+        let pi = self.pi.clone();
+        let accept = async move |drain: DrainWatcher, force_shutdown: watch::Receiver<()>| {
            loop {
                // Asynchronously wait for an inbound socket.
                let socket = self.listener.accept().await;
+                let start = Instant::now();
+                let mut force_shutdown = force_shutdown.clone();
+                let drain = drain.clone();
                let pi = self.pi.clone();
-                let illegal_ports = illegal_ports.clone();
-
-                let connection_manager = self.pi.connection_manager.clone();
                match socket {
                    Ok((stream, remote)) => {
-                        tokio::spawn(
-                            async move {
-                                Self::proxy_inbound_plaintext(
-                                    pi, // pi cloned above; OK to move
-                                    socket::to_canonical(remote),
-                                    stream,
-                                    illegal_ports,
-                                    connection_manager,
-                                )
-                                .await
+                        let serve_client = async move {
+                            debug!(component="inbound passthrough", "connection started");
+                                // Since this task is spawned, make sure we are guaranteed to terminate
+                            tokio::select! {
+                                _ = force_shutdown.changed() => {
+                                    debug!(component="inbound passthrough", "connection forcefully terminated");
+                                }
+                                _ = Self::proxy_inbound_plaintext(pi, socket::to_canonical(remote), stream, self.enable_orig_src) => {}
                            }
-                            .in_current_span(),
-                        );
+                            // Mark we are done with the connection, so drain can complete
+                            drop(drain);
+                            debug!(component="inbound passthrough", dur=?start.elapsed(), "connection completed");
+                        }.in_current_span();
+
+                        assertions::size_between_ref(1500, 3000, &serve_client);
+                        tokio::spawn(serve_client);
                    }
                    Err(e) => {
                        if util::is_runtime_shutdown(&e) {
@ -100,37 +100,28 @@ impl InboundPassthrough {
                    }
                }
            }
-        }
-        .in_current_span();
-        // Stop accepting once we drain.
-        // Note: we are *not* waiting for all connections to be closed. In the future, we may consider
-        // this, but will need some timeout period, as we have no back-pressure mechanism on connections.
-        tokio::select! {
-            res = accept => { res }
-            _ = self.drain.signaled() => {
-                info!("inbound passthrough drained");
-            }
-        }
+        };
+
+        run_with_drain(
+            "inbound passthrough".to_string(),
+            self.drain,
+            pi.cfg.self_termination_deadline,
+            accept,
+        )
+        .await
    }

    async fn proxy_inbound_plaintext(
-        pi: ProxyInputs,
+        pi: Arc<ProxyInputs>,
        source_addr: SocketAddr,
-        mut inbound_stream: TcpStream,
-        illegal_ports: Arc<HashSet<u16>>,
-        connection_manager: ConnectionManager,
+        inbound_stream: TcpStream,
+        enable_orig_src: bool,
    ) {
        let start = Instant::now();
        let dest_addr = socket::orig_dst_addr_or_default(&inbound_stream);
        // Check if it is an illegal call to ourself, which could trampoline to illegal addresses or
        // lead to infinite loops
-        let illegal_call = if pi.cfg.inpod_enabled {
-            // User sent a request to pod:15006. This would forward to pod:15006 infinitely
-            illegal_ports.contains(&dest_addr.port())
-        } else {
-            // User sent a request to the ztunnel directly. This isn't allowed
-            pi.cfg.proxy_mode == ProxyMode::Shared && Some(dest_addr.ip()) == pi.cfg.local_ip
-        };
+        let illegal_call = pi.cfg.illegal_ports.contains(&dest_addr.port());
        if illegal_call {
            metrics::log_early_deny(
                source_addr,
@ -140,21 +131,14 @@ impl InboundPassthrough {
            );
            return;
        }
-        let network_addr = NetworkAddress {
-            network: pi.cfg.network.clone(), // inbound request must be on our network
-            address: dest_addr.ip(),
-        };
-        let Some((upstream, upstream_service)) =
-            pi.state.fetch_workload_services(&network_addr).await
-        else {
-            metrics::log_early_deny(
-                source_addr,
-                dest_addr,
-                Reporter::destination,
-                Error::UnknownDestination(dest_addr.ip()),
-            );
-            return;
+        let upstream_workload = match pi.local_workload_information.get_workload().await {
+            Ok(upstream_workload) => upstream_workload,
+            Err(e) => {
+                metrics::log_early_deny(source_addr, dest_addr, Reporter::destination, e);
+                return;
+            }
        };
+        let upstream_services = pi.state.get_services_by_workload(&upstream_workload);

        let rbac_ctx = crate::state::ProxyRbacContext {
            conn: rbac::Connection {
@ -163,10 +147,10 @@ impl InboundPassthrough {
                // inbound request must be on our network since this is passthrough
                // rather than HBONE, which can be tunneled across networks through gateways.
                // by definition, without the gateway our source must be on our network.
-                dst_network: pi.cfg.network.clone(),
+                dst_network: strng::new(&pi.cfg.network),
                dst: dest_addr,
            },
-            dest_workload_info: pi.proxy_workload_info.clone(),
+            dest_workload: upstream_workload.clone(),
        };

        // Find source info. We can lookup by XDS or from connection attributes
@ -175,76 +159,75 @@ impl InboundPassthrough {
                // inbound request must be on our network since this is passthrough
                // rather than HBONE, which can be tunneled across networks through gateways.
                // by definition, without the gateway our source must be on our network.
-                network: pi.cfg.network.clone(),
+                network: pi.cfg.network.as_str().into(),
                address: source_addr.ip(),
            };
-            pi.state.fetch_workload(&network_addr_srcip).await
+            pi.state
+                .fetch_workload_by_address(&network_addr_srcip)
+                .await
        };
        let derived_source = metrics::DerivedWorkload {
            identity: rbac_ctx.conn.src_identity.clone(),
            ..Default::default()
        };
-        let ds = proxy::guess_inbound_service(&rbac_ctx.conn, upstream_service, &upstream);
-        let connection_metrics = metrics::ConnectionOpen {
-            reporter: Reporter::destination,
-            source: source_workload,
-            derived_source: Some(derived_source),
-            destination: Some(upstream),
-            connection_security_policy: metrics::SecurityPolicy::unknown,
-            destination_service: ds,
-        };
-        let result_tracker = metrics::ConnectionResult::new(
+        let ds = proxy::guess_inbound_service(
+            &rbac_ctx.conn,
+            &None,
+            upstream_services,
+            &upstream_workload,
+        );
+        let result_tracker = Box::new(metrics::ConnectionResult::new(
            source_addr,
            dest_addr,
            None,
            start,
-            &connection_metrics,
-            pi.metrics,
-        );
+            metrics::ConnectionOpen {
+                reporter: Reporter::destination,
+                source: source_workload,
+                derived_source: Some(derived_source),
+                destination: Some(upstream_workload),
+                connection_security_policy: metrics::SecurityPolicy::unknown,
+                destination_service: ds,
+            },
+            pi.metrics.clone(),
+        ));

-        //register before assert_rbac to ensure the connection is tracked during it's entire valid span
-        connection_manager.register(&rbac_ctx);
-        if !pi.state.assert_rbac(&rbac_ctx).await {
-            connection_manager.release(&rbac_ctx);
-            result_tracker.record(Err(Error::AuthorizationPolicyRejection));
-            return;
-        }
-        let close = match connection_manager.track(&rbac_ctx) {
-            Some(c) => c,
-            None => {
-                // this seems unlikely but could occur if policy changes while track awaits lock
-                result_tracker.record(Err(Error::AuthorizationPolicyRejection));
+        let mut conn_guard = match pi
+            .connection_manager
+            .assert_rbac(&pi.state, &rbac_ctx, None)
+            .await
+        {
+            Ok(cg) => cg,
+            Err(e) => {
+                result_tracker
+                    .record_with_flag(Err(e), metrics::ResponseFlags::AuthorizationPolicyDenied);
                return;
            }
        };

-        let orig_src = if pi.cfg.enable_original_source.unwrap_or_default() {
+        let orig_src = if enable_orig_src {
            Some(source_addr.ip())
        } else {
            None
        };

-        let result_tracker = Arc::new(result_tracker);
        let send = async {
-            let result_tracker = result_tracker.clone();
            trace!(%source_addr, %dest_addr, component="inbound plaintext", "connecting...");

-            let mut outbound =
-                super::freebind_connect(orig_src, dest_addr, pi.socket_factory.as_ref())
-                    .await
-                    .map_err(Error::ConnectionFailed)?;
+            let outbound = super::freebind_connect(orig_src, dest_addr, pi.socket_factory.as_ref())
+                .await
+                .map_err(Error::ConnectionFailed)?;

            trace!(%source_addr, destination=%dest_addr, component="inbound plaintext", "connected");
-            socket::copy_bidirectional(&mut inbound_stream, &mut outbound, &result_tracker).await
+            copy::copy_bidirectional(
+                copy::TcpStreamSplitter(inbound_stream),
+                copy::TcpStreamSplitter(outbound),
+                &result_tracker,
+            )
+            .await
        };

-        let res = tokio::select! {
-            res = send => {
-                connection_manager.release(&rbac_ctx);
-                res
-            }
-            _signaled = close.signaled() => Err(Error::AuthorizationPolicyLateRejection)
-        };
+        let res = handle_connection!(conn_guard, send);
        result_tracker.record(res);
    }
 }
--- a/src/proxy/metrics.rs
+++ b/src/proxy/metrics.rs
@ -14,22 +14,29 @@

 use std::fmt::Write;
 use std::net::SocketAddr;
-use std::sync::Arc;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, atomic};
 use std::time::Instant;

-use prometheus_client::encoding::{EncodeLabelSet, EncodeLabelValue, LabelValueEncoder};
-use prometheus_client::metrics::counter::Counter;
+use prometheus_client::encoding::{
+    EncodeLabelSet, EncodeLabelValue, LabelSetEncoder, LabelValueEncoder,
+};
+use prometheus_client::metrics::counter::{Atomic, Counter};
 use prometheus_client::metrics::family::Family;
 use prometheus_client::registry::Registry;

 use tracing::event;
+use tracing_core::field::Value;

 use crate::identity::Identity;
-use crate::metrics::{DefaultedUnknown, DeferRecorder, Deferred, IncrementRecorder, Recorder};
+use crate::metrics::DefaultedUnknown;
+use crate::proxy::{self, HboneAddress};

 use crate::state::service::ServiceDescription;
 use crate::state::workload::Workload;
+use crate::strng::{RichStrng, Strng};

+#[derive(Debug)]
 pub struct Metrics {
    pub connection_opens: Family<CommonTrafficLabels, Counter>,
    pub connection_close: Family<CommonTrafficLabels, Counter>,
@ -38,41 +45,8 @@ pub struct Metrics {

    // on-demand DNS is not a part of DNS proxy, but part of ztunnel proxy itself
    pub on_demand_dns: Family<OnDemandDnsLabels, Counter>,
-    pub on_demand_dns_cache_misses: Family<OnDemandDnsLabels, Counter>,
 }

-impl Metrics {
-    #[must_use = "metric will be dropped (and thus recorded) immediately if not assigned"]
-    /// increment_defer is used to increment a metric now and another metric later once the MetricGuard is dropped
-    ///
-    /// # Examples
-    ///
-    /// ```ignore
-    /// let connection_open = ConnectionOpen {};
-    /// // Record connection opened now
-    /// let connection_close = self.metrics.increment_defer::<_, ConnectionClosed>(&connection_open);
-    /// // Eventually, report connection closed
-    /// drop(connection_close);
-    /// ```
-    pub fn increment_defer<'a, M1, M2>(
-        &'a self,
-        event: &'a M1,
-    ) -> Deferred<'a, impl FnOnce(&'a Self), Self>
-    where
-        M1: Clone + 'a,
-        M2: From<&'a M1> + 'a,
-        Metrics: IncrementRecorder<M1> + IncrementRecorder<M2>,
-    {
-        self.increment(event);
-        let m2: M2 = event.into();
-        self.defer_record(move |metrics| {
-            metrics.increment(&m2);
-        })
-    }
-}
-
-impl DeferRecorder for Metrics {}
-
 #[derive(Clone, Copy, Default, Debug, Hash, PartialEq, Eq, EncodeLabelValue)]
 pub enum Reporter {
    #[default]
@ -92,13 +66,19 @@ pub enum RequestProtocol {
 #[derive(Default, Copy, Clone, Debug, Hash, PartialEq, Eq)]
 pub enum ResponseFlags {
    #[default]
-    none,
+    None,
+    // connection denied due to policy
+    AuthorizationPolicyDenied,
+    // connection denied because we could not establish an upstream connection
+    ConnectionFailure,
 }

 impl EncodeLabelValue for ResponseFlags {
    fn encode(&self, writer: &mut LabelValueEncoder) -> Result<(), std::fmt::Error> {
        match self {
-            ResponseFlags::none => writer.write_str("-"),
+            ResponseFlags::None => writer.write_str("-"),
+            ResponseFlags::AuthorizationPolicyDenied => writer.write_str("DENY"),
+            ResponseFlags::ConnectionFailure => writer.write_str("CONNECT"),
        }
    }
 }
@ -112,20 +92,22 @@ pub enum SecurityPolicy {

 #[derive(Clone, Debug, Default)]
 pub struct DerivedWorkload {
-    pub workload_name: Option<String>,
-    pub app: Option<String>,
-    pub revision: Option<String>,
-    pub namespace: Option<String>,
+    pub workload_name: Option<Strng>,
+    pub app: Option<Strng>,
+    pub revision: Option<Strng>,
+    pub namespace: Option<Strng>,
    pub identity: Option<Identity>,
-    pub cluster_id: Option<String>,
+    pub cluster_id: Option<Strng>,
+    pub region: Option<Strng>,
+    pub zone: Option<Strng>,
 }

 #[derive(Clone)]
 pub struct ConnectionOpen {
    pub reporter: Reporter,
-    pub source: Option<Workload>,
+    pub source: Option<Arc<Workload>>,
    pub derived_source: Option<DerivedWorkload>,
-    pub destination: Option<Workload>,
+    pub destination: Option<Arc<Workload>>,
    pub destination_service: Option<ServiceDescription>,
    pub connection_security_policy: SecurityPolicy,
 }
@ -141,10 +123,17 @@ impl CommonTrafficLabels {
        self.source_canonical_service = w.canonical_name.clone().into();
        self.source_canonical_revision = w.canonical_revision.clone().into();
        self.source_workload_namespace = w.namespace.clone().into();
-        self.source_principal = w.identity().into();
+        // We explicitly do not set source_principal here. This is set only with with_derived_source
+        // based on the real mTLS identity.
        self.source_app = w.canonical_name.clone().into();
        self.source_version = w.canonical_revision.clone().into();
        self.source_cluster = w.cluster_id.to_string().into();
+
+        let mut local = self.locality.0.unwrap_or_default();
+        local.source_region = w.locality.region.clone().into();
+        local.source_zone = w.locality.zone.clone().into();
+        self.locality = OptionallyEncode(Some(local));
+
        self
    }

@ -154,10 +143,17 @@ impl CommonTrafficLabels {
        self.source_canonical_service = w.app.clone().into();
        self.source_canonical_revision = w.revision.clone().into();
        self.source_workload_namespace = w.namespace.clone().into();
-        self.source_principal = w.identity.clone().into();
        self.source_app = w.workload_name.clone().into();
        self.source_version = w.revision.clone().into();
        self.source_cluster = w.cluster_id.clone().into();
+        // This is the identity from the TLS handshake; this is the most trustworthy source so use it
+        self.source_principal = w.identity.clone().into();
+
+        let mut local = self.locality.0.unwrap_or_default();
+        local.source_region = w.region.clone().into();
+        local.source_zone = w.zone.clone().into();
+        self.locality = OptionallyEncode(Some(local));
+
        self
    }

@ -171,6 +167,12 @@ impl CommonTrafficLabels {
        self.destination_app = w.canonical_name.clone().into();
        self.destination_version = w.canonical_revision.clone().into();
        self.destination_cluster = w.cluster_id.to_string().into();
+
+        let mut local = self.locality.0.unwrap_or_default();
+        local.destination_region = w.locality.region.clone().into();
+        local.destination_zone = w.locality.zone.clone().into();
+        self.locality = OptionallyEncode(Some(local));
+
        self
    }

@ -183,18 +185,18 @@ impl CommonTrafficLabels {
    }
 }

-impl From<&ConnectionOpen> for CommonTrafficLabels {
-    fn from(c: &ConnectionOpen) -> Self {
+impl From<ConnectionOpen> for CommonTrafficLabels {
+    fn from(c: ConnectionOpen) -> Self {
        CommonTrafficLabels {
            reporter: c.reporter,
            request_protocol: RequestProtocol::tcp,
-            response_flags: ResponseFlags::none,
+            response_flags: ResponseFlags::None,
            connection_security_policy: c.connection_security_policy,
            ..CommonTrafficLabels::new()
                // Intentionally before with_source; source is more reliable
                .with_derived_source(c.derived_source.as_ref())
-                .with_source(c.source.as_ref())
-                .with_destination(c.destination.as_ref())
+                .with_source(c.source.as_deref())
+                .with_destination(c.destination.as_deref())
                .with_destination_service(c.destination_service.as_ref())
        }
    }
@ -204,48 +206,71 @@ impl From<&ConnectionOpen> for CommonTrafficLabels {
 pub struct CommonTrafficLabels {
    reporter: Reporter,

-    source_workload: DefaultedUnknown<String>,
-    source_canonical_service: DefaultedUnknown<String>,
-    source_canonical_revision: DefaultedUnknown<String>,
-    source_workload_namespace: DefaultedUnknown<String>,
+    source_workload: DefaultedUnknown<RichStrng>,
+    source_canonical_service: DefaultedUnknown<RichStrng>,
+    source_canonical_revision: DefaultedUnknown<RichStrng>,
+    source_workload_namespace: DefaultedUnknown<RichStrng>,
    source_principal: DefaultedUnknown<Identity>,
-    source_app: DefaultedUnknown<String>,
-    source_version: DefaultedUnknown<String>,
-    source_cluster: DefaultedUnknown<String>,
+    source_app: DefaultedUnknown<RichStrng>,
+    source_version: DefaultedUnknown<RichStrng>,
+    source_cluster: DefaultedUnknown<RichStrng>,

-    // TODO: never set
-    destination_service: DefaultedUnknown<String>,
-    destination_service_namespace: DefaultedUnknown<String>,
-    destination_service_name: DefaultedUnknown<String>,
+    destination_service: DefaultedUnknown<RichStrng>,
+    destination_service_namespace: DefaultedUnknown<RichStrng>,
+    destination_service_name: DefaultedUnknown<RichStrng>,

-    destination_workload: DefaultedUnknown<String>,
-    destination_canonical_service: DefaultedUnknown<String>,
-    destination_canonical_revision: DefaultedUnknown<String>,
-    destination_workload_namespace: DefaultedUnknown<String>,
+    destination_workload: DefaultedUnknown<RichStrng>,
+    destination_canonical_service: DefaultedUnknown<RichStrng>,
+    destination_canonical_revision: DefaultedUnknown<RichStrng>,
+    destination_workload_namespace: DefaultedUnknown<RichStrng>,
    destination_principal: DefaultedUnknown<Identity>,
-    destination_app: DefaultedUnknown<String>,
-    destination_version: DefaultedUnknown<String>,
-    destination_cluster: DefaultedUnknown<String>,
+    destination_app: DefaultedUnknown<RichStrng>,
+    destination_version: DefaultedUnknown<RichStrng>,
+    destination_cluster: DefaultedUnknown<RichStrng>,

    request_protocol: RequestProtocol,
    response_flags: ResponseFlags,
    connection_security_policy: SecurityPolicy,
+
+    #[prometheus(flatten)]
+    locality: OptionallyEncode<LocalityLabels>,
+}
+
+/// OptionallyEncode is a wrapper that will optionally encode the entire label set.
+/// This differs from something like DefaultedUnknown which handles only the value - this makes the
+/// entire label not show up.
+#[derive(Clone, Hash, Default, Debug, PartialEq, Eq)]
+struct OptionallyEncode<T>(Option<T>);
+impl<T: EncodeLabelSet> EncodeLabelSet for OptionallyEncode<T> {
+    fn encode(&self, encoder: LabelSetEncoder) -> Result<(), std::fmt::Error> {
+        match &self.0 {
+            None => Ok(()),
+            Some(ll) => ll.encode(encoder),
+        }
+    }
+}
+#[derive(Clone, Hash, Default, Debug, PartialEq, Eq, EncodeLabelSet)]
+struct LocalityLabels {
+    source_region: DefaultedUnknown<RichStrng>,
+    source_zone: DefaultedUnknown<RichStrng>,
+    destination_region: DefaultedUnknown<RichStrng>,
+    destination_zone: DefaultedUnknown<RichStrng>,
 }

 #[derive(Clone, Hash, Default, Debug, PartialEq, Eq, EncodeLabelSet)]
 pub struct OnDemandDnsLabels {
    // on-demand DNS client information is just nice-to-have
-    source_workload: DefaultedUnknown<String>,
-    source_canonical_service: DefaultedUnknown<String>,
-    source_canonical_revision: DefaultedUnknown<String>,
-    source_workload_namespace: DefaultedUnknown<String>,
+    source_workload: DefaultedUnknown<RichStrng>,
+    source_canonical_service: DefaultedUnknown<RichStrng>,
+    source_canonical_revision: DefaultedUnknown<RichStrng>,
+    source_workload_namespace: DefaultedUnknown<RichStrng>,
    source_principal: DefaultedUnknown<Identity>,
-    source_app: DefaultedUnknown<String>,
-    source_version: DefaultedUnknown<String>,
-    source_cluster: DefaultedUnknown<String>,
+    source_app: DefaultedUnknown<RichStrng>,
+    source_version: DefaultedUnknown<RichStrng>,
+    source_cluster: DefaultedUnknown<RichStrng>,

    // on-demand DNS is resolved per hostname, so this is the most interesting part
-    hostname: DefaultedUnknown<String>,
+    hostname: DefaultedUnknown<RichStrng>,
 }

 impl OnDemandDnsLabels {
@ -304,12 +329,6 @@ impl Metrics {
            "The total number of requests that used on-demand DNS (unstable)",
            on_demand_dns.clone(),
        );
-        let on_demand_dns_cache_misses = Family::default();
-        registry.register(
-            "on_demand_dns_cache_misses",
-            "The total number of cache misses for requests on-demand DNS (unstable)",
-            on_demand_dns_cache_misses.clone(),
-        );

        Self {
            connection_opens,
@ -317,29 +336,37 @@ impl Metrics {
            received_bytes,
            sent_bytes,
            on_demand_dns,
-            on_demand_dns_cache_misses,
        }
    }
 }

-impl Recorder<ConnectionOpen, u64> for Metrics {
-    fn record(&self, reason: &ConnectionOpen, count: u64) {
-        self.connection_opens
-            .get_or_create(&CommonTrafficLabels::from(reason))
-            .inc_by(count);
-    }
-}
-
+#[derive(Debug)]
 /// ConnectionResult abstracts recording a metric and emitting an access log upon a connection completion
 pub struct ConnectionResult {
    // Src address and name
-    src: (SocketAddr, Option<String>),
+    src: (SocketAddr, Option<RichStrng>),
    // Dst address and name
-    dst: (SocketAddr, Option<String>),
-    hbone_target: Option<SocketAddr>,
+    dst: (SocketAddr, Option<RichStrng>),
+    hbone_target: Option<HboneAddress>,
    start: Instant,
+
+    // TODO: storing CommonTrafficLabels adds ~600 bytes retained throughout a connection life time.
+    // We can pre-fetch the metrics we need at initialization instead of storing this, then keep a more
+    // efficient representation for the fields we need to log. Ideally, this would even be optional
+    // in case logs were disabled.
    tl: CommonTrafficLabels,
    metrics: Arc<Metrics>,
+
+    // sent records the number of bytes sent on this connection
+    sent: AtomicU64,
+    // sent_metric records the number of bytes sent on this connection to the aggregated metric counter
+    sent_metric: Counter,
+    // recv records the number of bytes received on this connection
+    recv: AtomicU64,
+    // recv_metric records the number of bytes received on this connection to the aggregated metric counter
+    recv_metric: Counter,
+    // Have we recorded yet?
+    recorded: bool,
 }

 // log_early_deny allows logging a connection is denied before we have enough information to emit proper
@ -364,7 +391,7 @@ pub fn log_early_deny<E: std::error::Error>(
                "inbound"
            },

-            error = %err,
+            error = format!("{}", err),

            "connection failed"
    );
@ -402,44 +429,40 @@ impl ConnectionResult {
        dst: SocketAddr,
        // If using hbone, the inner HBONE address
        // That is, dst is the L4 address, while is the :authority.
-        hbone_target: Option<SocketAddr>,
+        hbone_target: Option<HboneAddress>,
        start: Instant,
-        conn: &ConnectionOpen,
+        conn: ConnectionOpen,
        metrics: Arc<Metrics>,
    ) -> Self {
+        // for src and dest, try to get pod name but fall back to "canonical service"
+        let mut src = (src, conn.source.as_ref().map(|wl| wl.name.clone().into()));
+        let mut dst = (
+            dst,
+            conn.destination.as_ref().map(|wl| wl.name.clone().into()),
+        );
        let tl = CommonTrafficLabels::from(conn);
        metrics.connection_opens.get_or_create(&tl).inc();
+
        let mtls = tl.connection_security_policy == SecurityPolicy::mutual_tls;
-        // for src and dest, try to get pod name but fall back to "canonical service"
-        let src = (
-            src,
-            conn.source
-                .as_ref()
-                .map(|wl| wl.name.clone())
-                .or(tl.source_canonical_service.clone().inner()),
-        );
-        let dst = (
-            dst,
-            conn.destination
-                .as_ref()
-                .map(|wl| wl.name.clone())
-                .or(tl.destination_canonical_service.clone().inner()),
-        );
+
+        src.1 = src.1.or(tl.source_canonical_service.clone().inner());
+        dst.1 = dst.1.or(tl.destination_canonical_service.clone().inner());
        event!(
            target: "access",
            parent: None,
            tracing::Level::DEBUG,

            src.addr = %src.0,
-            src.workload = src.1,
-            src.namespace = tl.source_workload_namespace.as_ref(),
-            src.identity = tl.source_principal.as_ref().filter(|_| mtls).map(|id| id.to_string()),
+            src.workload = src.1.as_deref().map(to_value),
+            src.namespace = tl.source_workload_namespace.to_value(),
+            src.identity = tl.source_principal.as_ref().filter(|_| mtls).map(to_value_owned),

            dst.addr = %dst.0,
-            dst.hbone_addr = hbone_target.map(|r| r.to_string()),
-            dst.workload = dst.1,
-            dst.namespace = tl.destination_canonical_service.as_ref(),
-            dst.identity = tl.destination_principal.as_ref().filter(|_| mtls).map(|id| id.to_string()),
+            dst.hbone_addr = hbone_target.as_ref().map(display),
+            dst.service = tl.destination_service.to_value(),
+            dst.workload = dst.1.as_deref().map(to_value),
+            dst.namespace = tl.destination_workload_namespace.to_value(),
+            dst.identity = tl.destination_principal.as_ref().filter(|_| mtls).map(to_value_owned),

            direction = if tl.reporter == Reporter::source {
                "outbound"
@ -449,6 +472,15 @@ impl ConnectionResult {

            "connection opened"
        );
+        // Grab the metrics with our labels now, so we don't need to fetch them each time.
+        // The inner metric is an Arc so clone is fine/cheap.
+        // With the raw Counter, we increment is a simple atomic add operation (~1ns).
+        // Fetching the metric itself is ~300ns; fast, but we call it on each read/write so it would
+        // add up.
+        let sent_metric = metrics.sent_bytes.get_or_create(&tl).clone();
+        let recv_metric = metrics.received_bytes.get_or_create(&tl).clone();
+        let sent = atomic::AtomicU64::new(0);
+        let recv = atomic::AtomicU64::new(0);
        Self {
            src,
            dst,
@ -456,20 +488,47 @@ impl ConnectionResult {
            start,
            tl,
            metrics,
+
+            sent,
+            sent_metric,
+            recv,
+            recv_metric,
+            recorded: false,
        }
    }

    pub fn increment_send(&self, res: u64) {
-        let tl = &self.tl;
-        self.metrics.sent_bytes.get_or_create(tl).inc_by(res);
+        self.sent.inc_by(res);
+        self.sent_metric.inc_by(res);
    }
+
    pub fn increment_recv(&self, res: u64) {
-        let tl = &self.tl;
-        self.metrics.received_bytes.get_or_create(tl).inc_by(res);
+        self.recv.inc_by(res);
+        self.recv_metric.inc_by(res);
    }
+
+    // Record our final result, with more details as a response flag.
+    pub fn record_with_flag<E: std::error::Error>(
+        mut self,
+        res: Result<(), E>,
+        flag: ResponseFlags,
+    ) {
+        self.tl.response_flags = flag;
+        self.record(res)
+    }
+
    // Record our final result.
-    // Ideally, we would save and report from the increment_ functions instead of requiring a report here.
-    pub fn record<E: std::error::Error>(&self, res: Result<(u64, u64), E>) {
+    pub fn record<E: std::error::Error>(mut self, res: Result<(), E>) {
+        self.record_internal(res)
+    }
+
+    // Internal-only function that takes `&mut` to facilitate Drop. Public consumers must use consuming functions.
+    fn record_internal<E: std::error::Error>(&mut self, res: Result<(), E>) {
+        debug_assert!(!self.recorded, "record called multiple times");
+        if self.recorded {
+            return;
+        }
+        self.recorded = true;
        let tl = &self.tl;

        // Unconditionally record the connection was closed
@ -477,23 +536,27 @@ impl ConnectionResult {

        // Unconditionally write out an access log
        let mtls = tl.connection_security_policy == SecurityPolicy::mutual_tls;
-        let bytes = res.as_ref().ok();
+        let bytes = (
+            self.recv.load(Ordering::SeqCst),
+            self.sent.load(Ordering::SeqCst),
+        );
        let dur = format!("{}ms", self.start.elapsed().as_millis());
+
        // We use our own macro to allow setting the level dynamically
        access_log!(
            res,

            src.addr = %self.src.0,
-            src.workload = self.src.1,
-            src.namespace = tl.source_workload_namespace.as_ref(),
-            src.identity = tl.source_principal.as_ref().filter(|_| mtls).map(|id| id.to_string()),
+            src.workload = self.src.1.as_deref().map(to_value),
+            src.namespace = tl.source_workload_namespace.to_value(),
+            src.identity = tl.source_principal.as_ref().filter(|_| mtls).map(to_value_owned),

            dst.addr = %self.dst.0,
-            dst.hbone_addr = self.hbone_target.map(|r| r.to_string()),
-            dst.service = tl.destination_service.as_ref(),
-            dst.workload = self.dst.1,
-            dst.namespace = tl.destination_canonical_service.as_ref(),
-            dst.identity = tl.destination_principal.as_ref().filter(|_| mtls).map(|id| id.to_string()),
+            dst.hbone_addr = self.hbone_target.as_ref().map(display),
+            dst.service = tl.destination_service.to_value(),
+            dst.workload = self.dst.1.as_deref().map(to_value),
+            dst.namespace = tl.destination_workload_namespace.to_value(),
+            dst.identity = tl.destination_principal.as_ref().filter(|_| mtls).map(to_value_owned),

            direction = if tl.reporter == Reporter::source {
                "outbound"
@ -503,9 +566,26 @@ impl ConnectionResult {

            // Istio flips the metric for source: https://github.com/istio/istio/issues/32399
            // Unflip for logs
-            bytes_sent = bytes.map(|r| if tl.reporter == Reporter::source {r.0} else {r.1}),
-            bytes_recv = bytes.map(|r| if tl.reporter == Reporter::source {r.1} else {r.0}),
+            bytes_sent = if tl.reporter == Reporter::source {bytes.0} else {bytes.1},
+            bytes_recv = if tl.reporter == Reporter::source {bytes.1} else {bytes.0},
            duration = dur,
        );
    }
 }
+
+impl Drop for ConnectionResult {
+    fn drop(&mut self) {
+        if !self.recorded {
+            self.record_internal(Err(proxy::Error::ClosedFromDrain))
+        }
+    }
+}
+
+fn to_value_owned<T: ToString>(t: T) -> impl Value {
+    t.to_string()
+}
+
+fn to_value<T: AsRef<str>>(t: &T) -> impl Value + '_ {
+    let v: &str = t.as_ref();
+    v
+}
--- a/src/proxy/outbound.rs
+++ b/src/proxy/outbound.rs
--- a/Show More
+++ b/Show More