diff --git a/scheduler/metrics/metrics.go b/scheduler/metrics/metrics.go index 48d7b0d79..88f035978 100644 --- a/scheduler/metrics/metrics.go +++ b/scheduler/metrics/metrics.go @@ -252,13 +252,13 @@ var ( Help: "Counter of the number of per host traffic.", }, []string{"type", "task_type", "task_tag", "task_app", "host_type", "host_id", "host_ip", "host_name"}) - DownloadPeerDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: types.MetricsNamespace, - Subsystem: types.SchedulerMetricsName, - Name: "download_peer_duration_milliseconds", - Help: "Histogram of the time each peer downloading.", - Buckets: []float64{100, 200, 500, 1000, 1500, 2 * 1000, 3 * 1000, 5 * 1000, 10 * 1000, 20 * 1000, 60 * 1000, 120 * 1000, 300 * 1000}, - }, []string{"priority", "task_type", "task_tag", "task_app", "task_content_length", "host_type"}) + DownloadPeerDuration = promauto.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: types.MetricsNamespace, + Subsystem: types.SchedulerMetricsName, + Name: "download_peer_duration_milliseconds", + Help: "Summary of the time each peer downloading.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }, []string{"task_size_level"}) ConcurrentScheduleGauge = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: types.MetricsNamespace, @@ -287,3 +287,168 @@ func New(cfg *config.MetricsConfig, svr *grpc.Server) *http.Server { Handler: mux, } } + +// TaskSizeLevel is the level of the task size. +type TaskSizeLevel int + +// String returns the string representation of the TaskSizeLevel. +func (t TaskSizeLevel) String() string { + switch t { + case TaskSizeLevel0: + return "0" + case TaskSizeLevel1: + return "1" + case TaskSizeLevel2: + return "2" + case TaskSizeLevel3: + return "3" + case TaskSizeLevel4: + return "4" + case TaskSizeLevel5: + return "5" + case TaskSizeLevel6: + return "6" + case TaskSizeLevel7: + return "7" + case TaskSizeLevel8: + return "8" + case TaskSizeLevel9: + return "9" + case TaskSizeLevel10: + return "10" + case TaskSizeLevel11: + return "11" + case TaskSizeLevel12: + return "12" + case TaskSizeLevel13: + return "13" + case TaskSizeLevel14: + return "14" + case TaskSizeLevel15: + return "15" + case TaskSizeLevel16: + return "16" + case TaskSizeLevel17: + return "17" + case TaskSizeLevel18: + return "18" + case TaskSizeLevel19: + return "19" + case TaskSizeLevel20: + return "20" + default: + return "0" + } +} + +const ( + // TaskSizeLevel0 represents unknow size. + TaskSizeLevel0 TaskSizeLevel = iota + + // TaskSizeLevel0 represents size range is from 0 to 1M. + TaskSizeLevel1 + + // TaskSizeLevel1 represents size range is from 1M to 4M. + TaskSizeLevel2 + + // TaskSizeLevel2 represents size range is from 4M to 8M. + TaskSizeLevel3 + + // TaskSizeLevel3 represents size range is from 8M to 16M. + TaskSizeLevel4 + + // TaskSizeLevel4 represents size range is from 16M to 32M. + TaskSizeLevel5 + + // TaskSizeLevel5 represents size range is from 32M to 64M. + TaskSizeLevel6 + + // TaskSizeLevel6 represents size range is from 64M to 128M. + TaskSizeLevel7 + + // TaskSizeLevel7 represents size range is from 128M to 256M. + TaskSizeLevel8 + + // TaskSizeLevel8 represents size range is from 256M to 512M. + TaskSizeLevel9 + + // TaskSizeLevel9 represents size range is from 512M to 1G. + TaskSizeLevel10 + + // TaskSizeLevel10 represents size range is from 1G to 4G. + TaskSizeLevel11 + + // TaskSizeLevel11 represents size range is from 4G to 8G. + TaskSizeLevel12 + + // TaskSizeLevel12 represents size range is from 8G to 16G. + TaskSizeLevel13 + + // TaskSizeLevel13 represents size range is from 16G to 32G. + TaskSizeLevel14 + + // TaskSizeLevel14 represents size range is from 32G to 64G. + TaskSizeLevel15 + + // TaskSizeLevel15 represents size range is from 64G to 128G. + TaskSizeLevel16 + + // TaskSizeLevel16 represents size range is from 128G to 256G. + TaskSizeLevel17 + + // TaskSizeLevel17 represents size range is from 256G to 512G. + TaskSizeLevel18 + + // TaskSizeLevel18 represents size range is from 512G to 1T. + TaskSizeLevel19 + + // TaskSizeLevel20 represents size is greater than 1T. + TaskSizeLevel20 +) + +// CalculateSizeLevel calculates the size level according to the size. +func CalculateSizeLevel(size int64) TaskSizeLevel { + if size <= 0 { + return TaskSizeLevel0 + } else if size < 1024*1024 { + return TaskSizeLevel1 + } else if size < 4*1024*1024 { + return TaskSizeLevel2 + } else if size < 8*1024*1024 { + return TaskSizeLevel3 + } else if size < 16*1024*1024 { + return TaskSizeLevel4 + } else if size < 32*1024*1024 { + return TaskSizeLevel5 + } else if size < 64*1024*1024 { + return TaskSizeLevel6 + } else if size < 128*1024*1024 { + return TaskSizeLevel7 + } else if size < 256*1024*1024 { + return TaskSizeLevel8 + } else if size < 512*1024*1024 { + return TaskSizeLevel9 + } else if size < 1024*1024*1024 { + return TaskSizeLevel10 + } else if size < 4*1024*1024*1024 { + return TaskSizeLevel11 + } else if size < 8*1024*1024*1024 { + return TaskSizeLevel12 + } else if size < 16*1024*1024*1024 { + return TaskSizeLevel13 + } else if size < 32*1024*1024*1024 { + return TaskSizeLevel14 + } else if size < 64*1024*1024*1024 { + return TaskSizeLevel15 + } else if size < 128*1024*1024*1024 { + return TaskSizeLevel16 + } else if size < 256*1024*1024*1024 { + return TaskSizeLevel17 + } else if size < 512*1024*1024*1024 { + return TaskSizeLevel18 + } else if size < 1024*1024*1024*1024 { + return TaskSizeLevel19 + } else { + return TaskSizeLevel20 + } +} diff --git a/scheduler/service/service_v1.go b/scheduler/service/service_v1.go index 987413587..0acbef3c8 100644 --- a/scheduler/service/service_v1.go +++ b/scheduler/service/service_v1.go @@ -323,15 +323,13 @@ func (v *V1) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerResult) go v.createDownloadRecord(peer, parents, req) v.handleTaskSuccess(ctx, peer.Task, req) v.handlePeerSuccess(ctx, peer) - metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(), - peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(req.GetCost())) + metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(req.GetCost())) return nil } go v.createDownloadRecord(peer, parents, req) v.handlePeerSuccess(ctx, peer) - metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(), - peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(req.GetCost())) + metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(req.GetCost())) return nil } diff --git a/scheduler/service/service_v2.go b/scheduler/service/service_v2.go index f8d2260ce..d39cd964e 100644 --- a/scheduler/service/service_v2.go +++ b/scheduler/service/service_v2.go @@ -967,8 +967,7 @@ func (v *V2) handleDownloadPeerFinishedRequest(ctx context.Context, peerID strin metrics.DownloadPeerCount.WithLabelValues(priority.String(), peer.Task.Type.String(), peer.Task.Tag, peer.Task.Application, peer.Host.Type.Name()).Inc() // TODO to be determined which traffic type to use, temporarily use TrafficType_REMOTE_PEER instead - metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(), - peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(peer.Cost.Load())) + metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(peer.Cost.Load())) return nil } @@ -1018,8 +1017,7 @@ func (v *V2) handleDownloadPeerBackToSourceFinishedRequest(ctx context.Context, metrics.DownloadPeerCount.WithLabelValues(priority.String(), peer.Task.Type.String(), peer.Task.Tag, peer.Task.Application, peer.Host.Type.Name()).Inc() // TODO to be determined which traffic type to use, temporarily use TrafficType_REMOTE_PEER instead - metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(), - peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(peer.Cost.Load())) + metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(peer.Cost.Load())) return nil }