add `ConsecutiveFailures` failure accrual policy (#2357)

Depends on https://github.com/linkerd/linkerd2-proxy/pull/2354.

This branch introduces a `ConsecutiveFailures` failure accrual policy
which marks an endpoint as unavailable if a given number of failures
occur in a row without any successes. Once the endpoint is marked as
failing, it is issued a single probe request after a delay. If the probe
succeeds, the endpoint transitions back to the available state.
Otherwise, it remains unavailable, with subsequent probe requests being
issued with an exponential backoff. The consecutive failures failure
accrual policy was initially implemented in PR #2334 by @olix0r.

A new `FailureAccrual::ConsecutiveFailures` variant is added in
`linkerd2-proxy-client-policy` for configuring the consecutive failures
failure accrual policy. The construction of circuit breakers in the
outbound stack is changed from a closure implementing `ExtractParam` to
a new `breaker::Params` type, so that the type returned can be the same
regardless of which failure accrual policy is constructed.
This commit is contained in:
Eliza Weisman 2023-03-30 16:06:57 -07:00 committed by GitHub
parent f5daf1f727
commit d158a98480
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 339 additions and 16 deletions

View File

@ -1048,6 +1048,7 @@ dependencies = [
"linkerd-meshtls-rustls",
"linkerd-proxy-client-policy",
"linkerd-retry",
"linkerd-stack",
"linkerd-tonic-watch",
"linkerd-tracing",
"linkerd2-proxy-api",
@ -1455,6 +1456,7 @@ dependencies = [
"http",
"ipnet",
"linkerd-error",
"linkerd-exp-backoff",
"linkerd-http-route",
"linkerd-proxy-api-resolve",
"linkerd-proxy-core",

View File

@ -50,6 +50,7 @@ linkerd-meshtls = { path = "../../meshtls", features = ["rustls"] }
linkerd-meshtls-rustls = { path = "../../meshtls/rustls", features = [
"test-util",
] }
linkerd-stack = { path = "../../stack", features = ["test-util"] }
linkerd-tracing = { path = "../../tracing", features = ["ansi"] }
parking_lot = "0.12"
tokio = { version = "1", features = ["macros", "sync", "time"] }

View File

@ -12,6 +12,7 @@ use linkerd_app_core::{
use std::{fmt::Debug, hash::Hash};
use tokio::sync::watch;
mod breaker;
pub mod concrete;
mod endpoint;
mod handle_proxy_error_headers;

View File

@ -0,0 +1,53 @@
mod consecutive_failures;
use consecutive_failures::ConsecutiveFailures;
use linkerd_app_core::{classify, proxy::http::classify::gate, svc};
use linkerd_proxy_client_policy::FailureAccrual;
use tracing::{trace_span, Instrument};
/// Params configuring a circuit breaker stack.
#[derive(Copy, Clone, Debug)]
pub(crate) struct Params {
pub(crate) accrual: FailureAccrual,
pub(crate) channel_capacity: usize,
}
impl<T> svc::ExtractParam<gate::Params<classify::Class>, T> for Params {
fn extract_param(&self, _: &T) -> gate::Params<classify::Class> {
// Create a channel so that we can receive response summaries and
// control the gate.
let (prms, gate, rsps) = gate::Params::channel(self.channel_capacity);
match self.accrual {
FailureAccrual::None => {
// No failure accrual for this target; construct a gate
// that will never close.
tracing::trace!("No failure accrual policy enabled.");
prms
}
FailureAccrual::ConsecutiveFailures {
max_failures,
backoff,
} => {
tracing::trace!(
max_failures,
backoff = ?backoff,
"Using consecutive-failures failure accrual policy.",
);
// 1. If the configured number of consecutive failures are encountered,
// shut the gate.
// 2. After an ejection timeout, open the gate so that 1 request can be processed.
// 3. If that request succeeds, open the gate. If it fails, increase the
// ejection timeout and repeat.
let breaker = ConsecutiveFailures::new(max_failures, backoff, gate, rsps);
tokio::spawn(
breaker
.run()
.instrument(trace_span!("consecutive_failures").or_current()),
);
prms
}
}
}
}

View File

@ -0,0 +1,212 @@
use futures::stream::StreamExt;
use linkerd_app_core::{classify, exp_backoff::ExponentialBackoff, proxy::http::classify::gate};
use std::sync::Arc;
use tokio::sync::{mpsc, Semaphore};
pub struct ConsecutiveFailures {
max_failures: usize,
backoff: ExponentialBackoff,
gate: gate::Tx,
rsps: mpsc::Receiver<classify::Class>,
semaphore: Arc<Semaphore>,
}
impl ConsecutiveFailures {
pub fn new(
max_failures: usize,
backoff: ExponentialBackoff,
gate: gate::Tx,
rsps: mpsc::Receiver<classify::Class>,
) -> Self {
Self {
max_failures,
backoff,
gate,
rsps,
semaphore: Arc::new(Semaphore::new(0)),
}
}
pub(super) async fn run(mut self) {
loop {
if self.open().await.is_err() {
return;
}
if self.closed().await.is_err() {
return;
}
}
}
/// Keep the breaker open until `max_failures` consecutive failures are
/// observed.
async fn open(&mut self) -> Result<(), ()> {
tracing::debug!("Open");
self.gate.open();
let mut failures = 0;
loop {
let class = tokio::select! {
rsp = self.rsps.recv() => rsp.ok_or(())?,
_ = self.gate.lost() => return Err(()),
};
tracing::trace!(?class, %failures, "Response");
if class.is_success() {
failures = 0;
} else {
failures += 1;
if failures == self.max_failures {
return Ok(());
}
}
}
}
/// Keep the breaker closed for at least the initial backoff, and then,
/// once the timeout expires, go into probation to admit a single request
/// before reverting to the open state or continuing in the shut state.
async fn closed(&mut self) -> Result<(), ()> {
let mut backoff = self.backoff.stream();
loop {
// The breaker is shut now. Wait until we can open it again.
tracing::debug!(backoff = ?backoff.duration(), "Shut");
self.gate.shut();
loop {
tokio::select! {
_ = backoff.next() => break,
// Ignore responses while the breaker is shut.
_ = self.rsps.recv() => continue,
_ = self.gate.lost() => return Err(()),
}
}
let class = self.probation().await?;
tracing::trace!(?class, "Response");
if class.is_success() {
// Open!
return Ok(());
}
}
}
/// Wait for a response to determine whether the breaker should be opened.
async fn probation(&mut self) -> Result<classify::Class, ()> {
tracing::debug!("Probation");
self.semaphore.add_permits(1);
self.gate.limit(self.semaphore.clone());
tokio::select! {
rsp = self.rsps.recv() => rsp.ok_or(()),
_ = self.gate.lost() => Err(()),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::time;
use tokio_test::{assert_pending, task};
#[tokio::test(flavor = "current_thread", start_paused = true)]
async fn transitions() {
let _trace = linkerd_tracing::test::trace_init();
let (mut params, gate, rsps) = gate::Params::channel(1);
let send = |res: Result<http::StatusCode, http::StatusCode>| {
params
.responses
.try_send(classify::Class::Http(res))
.unwrap()
};
let backoff = ExponentialBackoff::try_new(
time::Duration::from_secs(1),
time::Duration::from_secs(100),
// Don't jitter backoffs to ensure tests are deterministic.
0.0,
)
.expect("backoff params are valid");
let breaker = ConsecutiveFailures::new(2, backoff, gate, rsps);
let mut task = task::spawn(breaker.run());
// Start open and failing.
send(Err(http::StatusCode::BAD_GATEWAY));
assert_pending!(task.poll());
assert!(params.gate.is_open());
send(Err(http::StatusCode::BAD_GATEWAY));
assert_pending!(task.poll());
assert!(params.gate.is_shut());
// After two failures, the breaker is closed.
time::sleep(time::Duration::from_millis(500)).await;
assert_pending!(task.poll());
assert!(params.gate.is_shut());
// It remains closed until the init ejection backoff elapses. Then it's
// limited to a single request.
time::sleep(time::Duration::from_millis(500)).await;
assert_pending!(task.poll());
// hold the permit to prevent the breaker from opening
match params.gate.state() {
gate::State::Open => panic!("still open"),
gate::State::Shut => panic!("still shut"),
gate::State::Limited(sem) => {
assert_eq!(sem.available_permits(), 1);
params
.gate
.acquire_for_test()
.await
.expect("permit should be acquired")
// The `Gate` service would forget this permit when called, so
// we must do the same here explicitly.
.forget();
assert_eq!(sem.available_permits(), 0);
}
};
// The first request in probation fails, so the breaker remains closed.
send(Err(http::StatusCode::BAD_GATEWAY));
assert_pending!(task.poll());
assert!(params.gate.is_shut());
// The breaker goes into closed for 2s now...
time::sleep(time::Duration::from_secs(1)).await;
assert_pending!(task.poll());
assert!(params.gate.is_shut());
// If some straggling responses are observed while the breaker is
// closed, they are ignored.
send(Ok(http::StatusCode::OK));
assert_pending!(task.poll());
assert!(params.gate.is_shut());
// After that timeout elapses, we go into probation again.
time::sleep(time::Duration::from_secs(1)).await;
assert_pending!(task.poll());
match params.gate.state() {
gate::State::Open => panic!("still open"),
gate::State::Shut => panic!("still shut"),
gate::State::Limited(sem) => {
assert_eq!(sem.available_permits(), 1);
params
.gate
.acquire_for_test()
.await
.expect("permit should be acquired")
// The `Gate` service would forget this permit when called, so
// we must do the same here explicitly.
.forget();
assert_eq!(sem.available_permits(), 0);
}
}
// And then a single success takes us back into the open state!
send(Ok(http::StatusCode::OK));
assert_pending!(task.poll());
assert!(params.gate.is_open());
}
}

View File

@ -1,7 +1,7 @@
//! A stack that (optionally) resolves a service to a set of endpoint replicas
//! and distributes HTTP requests among them.
use super::{balance, client, handle_proxy_error_headers};
use super::{balance, breaker, client, handle_proxy_error_headers};
use crate::{http, stack_labels, Outbound};
use linkerd_app_core::{
classify, metrics, profiles,
@ -128,20 +128,6 @@ impl<N> Outbound<N> {
)
.instrument(|e: &Endpoint<T>| info_span!("endpoint", addr = %e.addr));
let mk_breaker = |target: &Balance<T>| {
match target.parent.param() {
FailureAccrual::None => |_: &(SocketAddr, Metadata)| {
// Construct a gate channel, dropping the controller
// side of the channel such that response summaries
// are never read. The failure accrual gate never
// closes in this configuration.
tracing::trace!("No failure accrual policy enabled");
let (prms, _, _) = classify::gate::Params::channel(1);
prms
},
}
};
let balance = endpoint
.push_map_target({
let inbound_ips = inbound_ips.clone();
@ -163,7 +149,20 @@ impl<N> Outbound<N> {
.push_on_service(svc::MapErr::layer_boxed())
.lift_new_with_target()
.push(
http::NewClassifyGateSet::<classify::Response, _, _, _>::layer_via(mk_breaker),
http::NewClassifyGateSet::<classify::Response, _, _, _>::layer_via({
// This sets the capacity of the channel used to send
// response classifications to the failure accrual task.
// Since the number of messages in this channel should
// be roughly the same as the number of requests, size
// it to the request queue capacity.
// TODO(eliza): when queue capacity is configured
// per-target, extract this from the target instead.
let channel_capacity = config.http_request_queue.capacity;
move |target: &Balance<T>| breaker::Params {
accrual: target.parent.param(),
channel_capacity,
}
}),
)
.push(http::NewBalancePeakEwma::layer(resolve))
.push(svc::NewMapErr::layer_from_target::<ConcreteError, _>())

View File

@ -34,12 +34,15 @@ pub struct ExponentialBackoffStream {
iterations: u32,
sleeping: bool,
sleep: Pin<Box<time::Sleep>>,
duration: time::Duration,
}
#[derive(Clone, Debug, Error)]
#[error("invalid backoff: {0}")]
pub struct InvalidBackoff(&'static str);
// === impl ExponentialBackoff ===
impl ExponentialBackoff {
pub const fn new_unchecked(min: time::Duration, max: time::Duration, jitter: f64) -> Self {
Self { min, max, jitter }
@ -75,6 +78,7 @@ impl ExponentialBackoff {
iterations: 0,
sleeping: false,
sleep: Box::pin(time::sleep(time::Duration::from_secs(0))),
duration: time::Duration::from_secs(0),
}
}
@ -113,6 +117,28 @@ impl ExponentialBackoff {
}
}
impl PartialEq for ExponentialBackoff {
fn eq(&self, other: &Self) -> bool {
debug_assert!(self.jitter.is_finite());
debug_assert!(other.jitter.is_finite());
self.min == other.min && self.max == other.max && self.jitter == other.jitter
}
}
// It's okay for `ExponentialBackoff`s to be `Eq` because we assert that the
// jitter field (a float) is finite when constructing the backoff.
impl Eq for ExponentialBackoff {}
impl std::hash::Hash for ExponentialBackoff {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.min.hash(state);
self.max.hash(state);
self.jitter.to_bits().hash(state);
}
}
// === impl ExponentialBackoffStream ===
impl Stream for ExponentialBackoffStream {
type Item = ();
@ -136,12 +162,20 @@ impl Stream for ExponentialBackoffStream {
let base = this.backoff.base(*this.iterations);
base + this.backoff.jitter(base, &mut this.rng)
};
*this.duration = backoff;
this.sleep.as_mut().reset(time::Instant::now() + backoff);
*this.sleeping = true;
}
}
}
impl ExponentialBackoffStream {
/// Returns the duration of the current backoff.
pub fn duration(&self) -> time::Duration {
self.duration
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -22,6 +22,7 @@ linkerd2-proxy-api = { version = "0.8", optional = true, features = [
"outbound",
] }
linkerd-error = { path = "../../error" }
linkerd-exp-backoff = { path = "../../exp-backoff" }
linkerd-http-route = { path = "../../http-route" }
linkerd-proxy-api-resolve = { path = "../api-resolve" }
linkerd-proxy-core = { path = "../core" }

View File

@ -120,6 +120,15 @@ pub struct PeakEwma {
pub enum FailureAccrual {
/// Endpoints do not become unavailable due to observed failures.
None,
/// Endpoints are marked as unavailable when `max_failures` consecutive
/// failures are observed.
ConsecutiveFailures {
/// The number of consecutive failures after which an endpoint becomes
/// unavailable.
max_failures: usize,
/// Backoff for probing the endpoint when it is in a failed state.
backoff: linkerd_exp_backoff::ExponentialBackoff,
},
}
// === impl ClientPolicy ===

View File

@ -9,6 +9,9 @@ description = """
Utilities for composing Tower services.
"""
[features]
test-util = []
[dependencies]
futures = { version = "0.3", default-features = false }
linkerd-error = { path = "../error" }

View File

@ -71,6 +71,14 @@ impl Rx {
self.0.changed().await
}
/// Waits for the gate state to change to be open.
///
/// This is intended for testing purposes.
#[cfg(feature = "test-util")]
pub async fn acquire_for_test(&mut self) -> Option<OwnedSemaphorePermit> {
self.acquire().await
}
/// Waits for the gate state to change to be open.
async fn acquire(&mut self) -> Option<OwnedSemaphorePermit> {
loop {