feat: add warn logs when handle level1 task task slowly (#637)

Signed-off-by: Gaius <gaius.qi@gmail.com>
This commit is contained in:
Gaius 2024-07-30 17:16:00 +08:00 committed by GitHub
parent 434a395493
commit 85c73e3a00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 35 additions and 11 deletions

View File

@ -25,9 +25,17 @@ use prometheus::{
use std::net::SocketAddr;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::{error, info};
use tracing::{error, info, warn};
use warp::{Filter, Rejection, Reply};
// DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of download task level1 duration for
// recording slow download task.
const DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
// UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD is the threshold of upload task level1 duration for
// recording slow upload task.
const UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD: Duration = Duration::from_millis(500);
lazy_static! {
// REGISTRY is used to register all metrics.
pub static ref REGISTRY: Registry = Registry::new();
@ -188,6 +196,7 @@ lazy_static! {
}
// TaskSize represents the size of the task.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TaskSize {
// Level0 represents unknown size.
Level0,
@ -332,13 +341,19 @@ pub fn collect_upload_task_finished_metrics(
content_length: u64,
cost: Duration,
) {
let task_size = TaskSize::calculate_size_level(content_length);
// Collect the slow upload Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > UPLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"upload task cost is too long: {}ms {}bytes",
content_length,
cost.as_millis()
);
}
UPLOAD_TASK_DURATION
.with_label_values(&[
typ.to_string().as_str(),
TaskSize::calculate_size_level(content_length)
.to_string()
.as_str(),
])
.with_label_values(&[typ.to_string().as_str(), task_size.to_string().as_str()])
.observe(cost.as_millis() as f64);
CONCURRENT_UPLOAD_TASK_GAUGE
@ -383,11 +398,20 @@ pub fn collect_download_task_finished_metrics(
None => content_length,
};
let task_size = TaskSize::calculate_size_level(size);
// Nydus will request the small range of the file, so the download task duration
// should be short. Collect the slow download Level1 task for analysis.
if task_size == TaskSize::Level1 && cost > DOWNLOAD_TASK_LEVEL1_DURATION_THRESHOLD {
warn!(
"download task cost is too long: {}ms {}bytes",
size,
cost.as_millis()
);
}
DOWNLOAD_TASK_DURATION
.with_label_values(&[
typ.to_string().as_str(),
TaskSize::calculate_size_level(size).to_string().as_str(),
])
.with_label_values(&[typ.to_string().as_str(), task_size.to_string().as_str()])
.observe(cost.as_millis() as f64);
CONCURRENT_DOWNLOAD_TASK_GAUGE