feat: add warn logs when handle level1 task task slowly (#637)

Signed-off-by: Gaius <gaius.qi@gmail.com>
This commit is contained in:
Gaius 2024-07-30 17:16:00 +08:00 committed by GitHub
parent 434a395493
commit 85c73e3a00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 35 additions and 11 deletions

View File

@ -25,9 +25,17 @@ use prometheus::{
use std::net::SocketAddr; use std::net::SocketAddr;
use std::time::Duration; use std::time::Duration;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tracing::{error, info}; use tracing::{error, info, warn};
use warp::{Filter, Rejection, Reply}; use warp::{Filter, Rejection, Reply};
// DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of download task level1 duration for
// recording slow download task.
const DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
// UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of upload task level1 duration for
// recording slow upload task.
const UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
lazy_static! { lazy_static! {
// REGISTRY is used to register all metrics. // REGISTRY is used to register all metrics.
pub static ref REGISTRY: Registry = Registry::new(); pub static ref REGISTRY: Registry = Registry::new();
@ -188,6 +196,7 @@ lazy_static! {
} }
// TaskSize represents the size of the task. // TaskSize represents the size of the task.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TaskSize { pub enum TaskSize {
// Level0 represents unknown size. // Level0 represents unknown size.
Level0, Level0,
@ -332,13 +341,19 @@ pub fn collect_upload_task_finished_metrics(
content_length: u64, content_length: u64,
cost: Duration, cost: Duration,
) { ) {
let task_size = TaskSize::calculate_size_level(content_length);
// Collect the slow upload Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"upload task cost is too long: {}ms {}bytes",
content_length,
cost.as_millis()
);
}
UPLOAD_TASK_DURATION UPLOAD_TASK_DURATION
.with_label_values(&[ .with_label_values(&[typ.to_string().as_str(), task_size.to_string().as_str()])
typ.to_string().as_str(),
TaskSize::calculate_size_level(content_length)
.to_string()
.as_str(),
])
.observe(cost.as_millis() as f64); .observe(cost.as_millis() as f64);
CONCURRENT_UPLOAD_TASK_GAUGE CONCURRENT_UPLOAD_TASK_GAUGE
@ -383,11 +398,20 @@ pub fn collect_download_task_finished_metrics(
None => content_length, None => content_length,
}; };
let task_size = TaskSize::calculate_size_level(size);
// Nydus will request the small range of the file, so the download task duration
// should be short. Collect the slow download Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"download task cost is too long: {}ms {}bytes",
size,
cost.as_millis()
);
}
DOWNLOAD_TASK_DURATION DOWNLOAD_TASK_DURATION
.with_label_values(&[ .with_label_values(&[typ.to_string().as_str(), task_size.to_string().as_str()])
typ.to_string().as_str(),
TaskSize::calculate_size_level(size).to_string().as_str(),
])
.observe(cost.as_millis() as f64); .observe(cost.as_millis() as f64);
CONCURRENT_DOWNLOAD_TASK_GAUGE CONCURRENT_DOWNLOAD_TASK_GAUGE