mirror of https://github.com/tikv/client-rust.git
pd: fix `get_timestamp` potentially hang (#432)
* fix get tso hang Signed-off-by: Ping Yu <yuping@pingcap.com> * wake Signed-off-by: Ping Yu <yuping@pingcap.com> * polish Signed-off-by: Ping Yu <yuping@pingcap.com> --------- Signed-off-by: Ping Yu <yuping@pingcap.com>
This commit is contained in:
parent
802b361df7
commit
b12f95e3b2
|
@ -70,18 +70,12 @@ impl<Cl> RetryClient<Cl> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! retry {
|
macro_rules! retry_core {
|
||||||
($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{
|
($self: ident, $tag: literal, $call: expr) => {{
|
||||||
let stats = pd_stats($tag);
|
let stats = pd_stats($tag);
|
||||||
let mut last_err = Ok(());
|
let mut last_err = Ok(());
|
||||||
for _ in 0..LEADER_CHANGE_RETRY {
|
for _ in 0..LEADER_CHANGE_RETRY {
|
||||||
// use the block here to drop the guard of the read lock,
|
let res = $call;
|
||||||
// otherwise `reconnect` will try to acquire the write lock and results in a deadlock
|
|
||||||
let res = {
|
|
||||||
let $cluster = &mut $self.cluster.write().await.0;
|
|
||||||
let res = $call.await;
|
|
||||||
res
|
|
||||||
};
|
|
||||||
|
|
||||||
match stats.done(res) {
|
match stats.done(res) {
|
||||||
Ok(r) => return Ok(r),
|
Ok(r) => return Ok(r),
|
||||||
|
@ -103,6 +97,28 @@ macro_rules! retry {
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
macro_rules! retry_mut {
|
||||||
|
($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{
|
||||||
|
retry_core!($self, $tag, {
|
||||||
|
// use the block here to drop the guard of the lock,
|
||||||
|
// otherwise `reconnect` will try to acquire the write lock and results in a deadlock
|
||||||
|
let $cluster = &mut $self.cluster.write().await.0;
|
||||||
|
$call.await
|
||||||
|
})
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! retry {
|
||||||
|
($self: ident, $tag: literal, |$cluster: ident| $call: expr) => {{
|
||||||
|
retry_core!($self, $tag, {
|
||||||
|
// use the block here to drop the guard of the lock,
|
||||||
|
// otherwise `reconnect` will try to acquire the write lock and results in a deadlock
|
||||||
|
let $cluster = &$self.cluster.read().await.0;
|
||||||
|
$call.await
|
||||||
|
})
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
impl RetryClient<Cluster> {
|
impl RetryClient<Cluster> {
|
||||||
pub async fn connect(
|
pub async fn connect(
|
||||||
endpoints: &[String],
|
endpoints: &[String],
|
||||||
|
@ -127,7 +143,7 @@ impl RetryClientTrait for RetryClient<Cluster> {
|
||||||
// These get_* functions will try multiple times to make a request, reconnecting as necessary.
|
// These get_* functions will try multiple times to make a request, reconnecting as necessary.
|
||||||
// It does not know about encoding. Caller should take care of it.
|
// It does not know about encoding. Caller should take care of it.
|
||||||
async fn get_region(self: Arc<Self>, key: Vec<u8>) -> Result<RegionWithLeader> {
|
async fn get_region(self: Arc<Self>, key: Vec<u8>) -> Result<RegionWithLeader> {
|
||||||
retry!(self, "get_region", |cluster| {
|
retry_mut!(self, "get_region", |cluster| {
|
||||||
let key = key.clone();
|
let key = key.clone();
|
||||||
async {
|
async {
|
||||||
cluster
|
cluster
|
||||||
|
@ -141,7 +157,7 @@ impl RetryClientTrait for RetryClient<Cluster> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_region_by_id(self: Arc<Self>, region_id: RegionId) -> Result<RegionWithLeader> {
|
async fn get_region_by_id(self: Arc<Self>, region_id: RegionId) -> Result<RegionWithLeader> {
|
||||||
retry!(self, "get_region_by_id", |cluster| async {
|
retry_mut!(self, "get_region_by_id", |cluster| async {
|
||||||
cluster
|
cluster
|
||||||
.get_region_by_id(region_id, self.timeout)
|
.get_region_by_id(region_id, self.timeout)
|
||||||
.await
|
.await
|
||||||
|
@ -152,7 +168,7 @@ impl RetryClientTrait for RetryClient<Cluster> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_store(self: Arc<Self>, id: StoreId) -> Result<metapb::Store> {
|
async fn get_store(self: Arc<Self>, id: StoreId) -> Result<metapb::Store> {
|
||||||
retry!(self, "get_store", |cluster| async {
|
retry_mut!(self, "get_store", |cluster| async {
|
||||||
cluster
|
cluster
|
||||||
.get_store(id, self.timeout)
|
.get_store(id, self.timeout)
|
||||||
.await
|
.await
|
||||||
|
@ -161,7 +177,7 @@ impl RetryClientTrait for RetryClient<Cluster> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_all_stores(self: Arc<Self>) -> Result<Vec<metapb::Store>> {
|
async fn get_all_stores(self: Arc<Self>) -> Result<Vec<metapb::Store>> {
|
||||||
retry!(self, "get_all_stores", |cluster| async {
|
retry_mut!(self, "get_all_stores", |cluster| async {
|
||||||
cluster
|
cluster
|
||||||
.get_all_stores(self.timeout)
|
.get_all_stores(self.timeout)
|
||||||
.await
|
.await
|
||||||
|
@ -174,7 +190,7 @@ impl RetryClientTrait for RetryClient<Cluster> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn update_safepoint(self: Arc<Self>, safepoint: u64) -> Result<bool> {
|
async fn update_safepoint(self: Arc<Self>, safepoint: u64) -> Result<bool> {
|
||||||
retry!(self, "update_gc_safepoint", |cluster| async {
|
retry_mut!(self, "update_gc_safepoint", |cluster| async {
|
||||||
cluster
|
cluster
|
||||||
.update_safepoint(safepoint, self.timeout)
|
.update_safepoint(safepoint, self.timeout)
|
||||||
.await
|
.await
|
||||||
|
@ -257,7 +273,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn retry_err(client: Arc<MockClient>) -> Result<()> {
|
async fn retry_err(client: Arc<MockClient>) -> Result<()> {
|
||||||
retry!(client, "test", |_c| ready(Err(internal_err!("whoops"))))
|
retry_mut!(client, "test", |_c| ready(Err(internal_err!("whoops"))))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn retry_ok(client: Arc<MockClient>) -> Result<()> {
|
async fn retry_ok(client: Arc<MockClient>) -> Result<()> {
|
||||||
|
@ -310,7 +326,7 @@ mod test {
|
||||||
client: Arc<MockClient>,
|
client: Arc<MockClient>,
|
||||||
max_retries: Arc<AtomicUsize>,
|
max_retries: Arc<AtomicUsize>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
retry!(client, "test", |c| {
|
retry_mut!(client, "test", |c| {
|
||||||
c.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
c.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
|
||||||
|
|
||||||
let max_retries = max_retries.fetch_sub(1, Ordering::SeqCst) - 1;
|
let max_retries = max_retries.fetch_sub(1, Ordering::SeqCst) - 1;
|
||||||
|
|
|
@ -98,15 +98,13 @@ async fn run_tso(
|
||||||
let mut responses = pd_client.tso(request_stream).await?.into_inner();
|
let mut responses = pd_client.tso(request_stream).await?.into_inner();
|
||||||
|
|
||||||
while let Some(Ok(resp)) = responses.next().await {
|
while let Some(Ok(resp)) = responses.next().await {
|
||||||
let mut pending_requests = pending_requests.lock().await;
|
{
|
||||||
|
let mut pending_requests = pending_requests.lock().await;
|
||||||
// Wake up the sending future blocked by too many pending requests as we are consuming
|
allocate_timestamps(&resp, &mut pending_requests)?;
|
||||||
// some of them here.
|
|
||||||
if pending_requests.len() == MAX_PENDING_COUNT {
|
|
||||||
sending_future_waker.wake();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
allocate_timestamps(&resp, &mut pending_requests)?;
|
// Wake up the sending future blocked by too many pending requests or locked.
|
||||||
|
sending_future_waker.wake();
|
||||||
}
|
}
|
||||||
// TODO: distinguish between unexpected stream termination and expected end of test
|
// TODO: distinguish between unexpected stream termination and expected end of test
|
||||||
info!("TSO stream terminated");
|
info!("TSO stream terminated");
|
||||||
|
@ -139,6 +137,7 @@ impl Stream for TsoRequestStream {
|
||||||
{
|
{
|
||||||
pending_requests
|
pending_requests
|
||||||
} else {
|
} else {
|
||||||
|
this.self_waker.register(cx.waker());
|
||||||
return Poll::Pending;
|
return Poll::Pending;
|
||||||
};
|
};
|
||||||
let mut requests = Vec::new();
|
let mut requests = Vec::new();
|
||||||
|
@ -148,8 +147,8 @@ impl Stream for TsoRequestStream {
|
||||||
Poll::Ready(Some(sender)) => {
|
Poll::Ready(Some(sender)) => {
|
||||||
requests.push(sender);
|
requests.push(sender);
|
||||||
}
|
}
|
||||||
Poll::Ready(None) => return Poll::Ready(None),
|
Poll::Ready(None) if requests.is_empty() => return Poll::Ready(None),
|
||||||
Poll::Pending => break,
|
_ => break,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue