[to #556] metrics: attach cluster label to metrics (#558) (#579)

Co-authored-by: iosmanthus <myosmanthustree@gmail.com>
Co-authored-by: Jian Zhang <zjsariel@gmail.com>
This commit is contained in:
ti-srebot 2022-03-29 21:17:12 +08:00 committed by GitHub
parent c2b23dbcf8
commit 1dc906bdad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 419 additions and 177 deletions

View File

@ -113,7 +113,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -214,7 +214,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_raw_requests_failure_total{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_raw_requests_failure_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -315,7 +315,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"format": "time_series",
"hide": false,
"interval": "",
@ -325,7 +325,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(1, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(1, sum(rate(client_java_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{type}} - max",
@ -435,7 +435,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"hide": false,
"interval": "",
@ -561,7 +561,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(1, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(1, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"format": "time_series",
"instant": false,
"interval": "",
@ -572,7 +572,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99, sum(rate(client_java_grpc_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"format": "time_series",
"hide": false,
"instant": false,
@ -686,7 +686,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_grpc_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -796,7 +796,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(1,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(1,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"interval": "",
"legendFormat": "{{ type }} -- max",
"queryType": "randomWalk",
@ -804,7 +804,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{ type }} -- 99",
@ -914,7 +914,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_grpc_single_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type) / sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"pdpb.PD/GetRegion\", type!=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)",
"interval": "",
"legendFormat": "{{ type }}",
"queryType": "randomWalk",
@ -1023,7 +1023,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_seek_leader_store_duration_sum{instance=~\"$instance\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_leader_store_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le)",
"expr": "sum(rate(client_java_seek_leader_store_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_leader_store_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)",
"interval": "",
"legendFormat": "seek-leader-store-avg",
"queryType": "randomWalk",
@ -1031,7 +1031,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_seek_leader_store_duration_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_seek_leader_store_duration_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "seek-leader-store-99",
@ -1140,7 +1140,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_seek_proxy_store_duration_sum{instance=~\"$instance\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_proxy_store_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le)",
"expr": "sum(rate(client_java_seek_proxy_store_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le) / sum(rate(client_java_seek_proxy_store_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le)",
"interval": "",
"legendFormat": "seek-proxy-store-avg",
"queryType": "randomWalk",
@ -1148,7 +1148,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_seek_proxy_store_duration_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_seek_proxy_store_duration_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "seek-proxy-store-99",
@ -1259,7 +1259,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-total",
@ -1358,7 +1358,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type)",
"interval": "",
"legendFormat": "{{type}}-count",
"queryType": "randomWalk",
@ -1459,7 +1459,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type!=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"interval": "",
"legendFormat": "{{type}}-avg",
"queryType": "randomWalk",
@ -1573,7 +1573,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__rate_interval]))",
"expr": "sum(rate(client_java_get_region_by_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "avg",
"queryType": "randomWalk",
@ -1581,7 +1581,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le))",
"expr": "histogram_quantile(0.99, sum(rate(client_java_get_region_by_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le))",
"hide": false,
"interval": "",
"legendFormat": "99th",
@ -1680,7 +1680,7 @@
"targets": [
{
"exemplar": true,
"expr": "1 - sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\"}[$__rate_interval]))",
"expr": "1 - sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) / sum(rate(client_java_get_region_by_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval]))",
"interval": "",
"legendFormat": "hit ratio",
"queryType": "randomWalk",
@ -1788,7 +1788,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (le, type))",
"interval": "",
"legendFormat": "{{ type }}-99th",
"queryType": "randomWalk",
@ -1796,7 +1796,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{ type }}-99th",
@ -1804,7 +1804,7 @@
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99,sum(rate(client_java_grpc_single_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (le, type))",
"hide": false,
"interval": "",
"legendFormat": "{{ type }}-99th",
@ -1904,7 +1904,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetRegion\"}[$__rate_interval])) by (type)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}",
@ -1912,7 +1912,7 @@
},
{
"exemplar": true,
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetStore\"}[$__rate_interval])) by (type)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}",
@ -1920,7 +1920,7 @@
},
{
"exemplar": true,
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_grpc_single_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"pdpb.PD/GetMembers\"}[$__rate_interval])) by (type)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}",
@ -2021,7 +2021,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type) / sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"interval": "",
"legendFormat": "{{type}}-avg",
"queryType": "randomWalk",
@ -2029,7 +2029,7 @@
},
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_sum{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"hide": false,
"interval": "",
"legendFormat": "{{type}}-sum",
@ -2128,7 +2128,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"expr": "sum(rate(client_java_backoff_duration_count{instance=~\"$instance\", cluster=~\"$cluster\", type=\"BoPDRPC\"}[$__rate_interval])) by (le, type)",
"interval": "",
"legendFormat": "{{type}}",
"queryType": "randomWalk",
@ -2350,7 +2350,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -2451,7 +2451,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_smart_raw_requests_failure_total{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_smart_raw_requests_failure_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -2592,7 +2592,7 @@
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(client_java_smart_raw_requests_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, type))",
"expr": "histogram_quantile(0.99, sum(rate(client_java_smart_raw_requests_latency_bucket{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (le, type))",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -2742,7 +2742,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_smart_raw_requests_latency_sum{instance=~\"$instance\"}[$__rate_interval])) by (type) / sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_smart_raw_requests_latency_sum{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type) / sum(rate(client_java_smart_raw_requests_latency_count{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"hide": false,
"interval": "",
@ -2853,7 +2853,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_smart_raw_circuit_breaker_opened_total{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_smart_raw_circuit_breaker_opened_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -2954,7 +2954,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(client_java_circuit_breaker_attempt_counter_total{instance=~\"$instance\"}[$__rate_interval])) by (type)",
"expr": "sum(rate(client_java_circuit_breaker_attempt_counter_total{instance=~\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])) by (type)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
@ -4900,6 +4900,103 @@
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"hiddenSeries": false,
"id": 105,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "7.5.7",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "histogram_quantile(1, sum(rate(grpc_threadless_executor_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, phase))",
"interval": "",
"legendFormat": "{{ phase }} max",
"queryType": "randomWalk",
"refId": "A"
},
{
"exemplar": true,
"expr": "histogram_quantile(0.99, sum(rate(grpc_threadless_executor_latency_bucket{instance=~\"$instance\"}[$__rate_interval])) by (le, phase))",
"hide": false,
"interval": "",
"legendFormat": "{{ phase }} p99",
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "ThreadlessExecutor Lock",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "gRPC internal",
@ -5054,6 +5151,37 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": ".*",
"current": {
"selected": true,
"text": "All",
"value": "$__all"
},
"datasource": "${DS_TEST-CLUSTER}",
"definition": "label_values(client_java_raw_requests_latency_count, cluster)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": "cluster",
"multi": false,
"name": "cluster",
"options": [],
"query": {
"query": "label_values(client_java_raw_requests_latency_count, cluster)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},

View File

@ -65,7 +65,9 @@ public class KVClient implements AutoCloseable {
* @return a ByteString value if key exists, ByteString.EMPTY if key does not exist
*/
public ByteString get(ByteString key, long version) throws GrpcException {
BackOffer backOffer = ConcreteBackOffer.newGetBackOff();
BackOffer backOffer =
ConcreteBackOffer.newGetBackOff(
clientBuilder.getRegionManager().getPDClient().getClusterId());
while (true) {
RegionStoreClient client = clientBuilder.build(key);
try {

View File

@ -113,6 +113,7 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
HistogramUtils.buildDuration()
.name("client_java_pd_get_region_by_requests_latency")
.help("pd getRegionByKey request latency.")
.labelNames("cluster")
.register();
private PDClient(TiConfiguration conf, ChannelFactory channelFactory) {
@ -203,7 +204,7 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
() -> GetOperatorRequest.newBuilder().setHeader(header).setRegionId(regionId).build();
// get operator no need to handle error and no need back offer.
return callWithRetry(
ConcreteBackOffer.newCustomBackOff(0),
ConcreteBackOffer.newCustomBackOff(0, getClusterId()),
PDGrpc.getGetOperatorMethod(),
request,
new NoopHandler<>());
@ -231,7 +232,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
@Override
public Pair<Metapb.Region, Metapb.Peer> getRegionByKey(BackOffer backOffer, ByteString key) {
Histogram.Timer requestTimer = PD_GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer();
Histogram.Timer requestTimer =
PD_GET_REGION_BY_KEY_REQUEST_LATENCY.labels(getClusterId().toString()).startTimer();
try {
if (conf.getKvMode() == KVMode.TXN) {
CodecDataOutput cdo = new CodecDataOutput();
@ -745,7 +747,8 @@ public class PDClient extends AbstractGRPCClient<PDBlockingStub, PDFutureStub>
return builder.build();
}
public long getClusterId() {
@Override
public Long getClusterId() {
return header.getClusterId();
}

View File

@ -65,4 +65,6 @@ public interface ReadOnlyPDClient {
List<Store> getAllStores(BackOffer backOffer);
TiConfiguration.ReplicaRead getReplicaRead();
Long getClusterId();
}

View File

@ -78,7 +78,9 @@ public class Snapshot {
try (KVClient client = new KVClient(session, session.getRegionStoreClientBuilder())) {
List<KvPair> kvPairList =
client.batchGet(
ConcreteBackOffer.newCustomBackOff(backOffer), list, timestamp.getVersion());
ConcreteBackOffer.newCustomBackOff(backOffer, session.getPDClient().getClusterId()),
list,
timestamp.getVersion());
return kvPairList
.stream()
.map(

View File

@ -60,7 +60,8 @@ public class StoreVersion {
public static boolean minTiKVVersion(String version, PDClient pdClient) {
StoreVersion storeVersion = new StoreVersion(version);
BackOffer bo = ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF);
BackOffer bo =
ConcreteBackOffer.newCustomBackOff(BackOffer.PD_INFO_BACKOFF, pdClient.getClusterId());
List<Metapb.Store> storeList =
pdClient
.getAllStores(bo)

View File

@ -91,13 +91,13 @@ public class TiSession implements AutoCloseable {
if (conf.isWarmUpEnable() && conf.isRawKVMode()) {
warmUp();
}
this.circuitBreaker = new CircuitBreakerImpl(conf);
this.circuitBreaker = new CircuitBreakerImpl(conf, getPDClient().getClusterId());
logger.info("TiSession initialized in " + conf.getKvMode() + " mode");
}
private synchronized void warmUp() {
long warmUpStartTime = System.nanoTime();
BackOffer backOffer = ConcreteBackOffer.newRawKVBackOff();
BackOffer backOffer = ConcreteBackOffer.newRawKVBackOff(getPDClient().getClusterId());
try {
// let JVM ClassLoader load gRPC error related classes
@ -128,7 +128,9 @@ public class TiSession implements AutoCloseable {
}
for (Pdpb.Region region : regions) {
regionManager.insertRegionToCache(
regionManager.createRegion(region.getRegion(), ConcreteBackOffer.newGetBackOff()));
regionManager.createRegion(
region.getRegion(),
ConcreteBackOffer.newGetBackOff(getPDClient().getClusterId())));
}
startKey = regions.get(regions.size() - 1).getRegion().getEndKey();
} while (!startKey.isEmpty());
@ -226,7 +228,8 @@ public class TiSession implements AutoCloseable {
public TiTimestamp getTimestamp() {
checkIsClosed();
return getPDClient().getTimestamp(ConcreteBackOffer.newTsoBackOff());
return getPDClient()
.getTimestamp(ConcreteBackOffer.newTsoBackOff(getPDClient().getClusterId()));
}
public Snapshot createSnapshot() {
@ -459,7 +462,7 @@ public class TiSession implements AutoCloseable {
.stream()
.map(k -> Key.toRawKey(k).next().toByteString())
.collect(Collectors.toList()),
ConcreteBackOffer.newCustomBackOff(splitRegionBackoffMS));
ConcreteBackOffer.newCustomBackOff(splitRegionBackoffMS, getPDClient().getClusterId()));
// scatter region
for (Metapb.Region newRegion : newRegions) {
@ -482,7 +485,9 @@ public class TiSession implements AutoCloseable {
return;
}
getPDClient()
.waitScatterRegionFinish(newRegion, ConcreteBackOffer.newCustomBackOff((int) remainMS));
.waitScatterRegionFinish(
newRegion,
ConcreteBackOffer.newCustomBackOff((int) remainMS, getPDClient().getClusterId()));
}
} else {
logger.info("skip to wait scatter region finish");

View File

@ -74,7 +74,9 @@ public class ConcreteScanIterator extends ScanIterator {
try (RegionStoreClient client = builder.build(startKey)) {
client.setTimeout(conf.getScanTimeout());
region = client.getRegion();
BackOffer backOffer = ConcreteBackOffer.newScannerNextMaxBackOff();
BackOffer backOffer =
ConcreteBackOffer.newScannerNextMaxBackOff(
builder.getRegionManager().getPDClient().getClusterId());
currentCache = client.scan(backOffer, startKey, version);
return region;
}
@ -86,7 +88,8 @@ public class ConcreteScanIterator extends ScanIterator {
builder.getRegionManager().getRegionStorePairByKey(current.getKey());
TiRegion region = pair.first;
TiStore store = pair.second;
BackOffer backOffer = ConcreteBackOffer.newGetBackOff();
BackOffer backOffer =
ConcreteBackOffer.newGetBackOff(builder.getRegionManager().getPDClient().getClusterId());
try (RegionStoreClient client = builder.build(region, store)) {
return client.get(backOffer, current.getKey(), version);
} catch (Exception e) {

View File

@ -33,19 +33,19 @@ public abstract class RetryPolicy<RespT> {
HistogramUtils.buildDuration()
.name("client_java_grpc_single_requests_latency")
.help("grpc request latency.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
public static final Histogram CALL_WITH_RETRY_DURATION =
HistogramUtils.buildDuration()
.name("client_java_call_with_retry_duration")
.help("callWithRetry duration.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
public static final Counter GRPC_REQUEST_RETRY_NUM =
Counter.build()
.name("client_java_grpc_requests_retry_num")
.help("grpc request retry num.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
// handles PD and TiKV's error.
@ -70,7 +70,8 @@ public abstract class RetryPolicy<RespT> {
}
public RespT callWithRetry(Callable<RespT> proc, String methodName, BackOffer backOffer) {
Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(methodName).startTimer();
String[] labels = new String[] {methodName, backOffer.getClusterId().toString()};
Histogram.Timer callWithRetryTimer = CALL_WITH_RETRY_DURATION.labels(labels).startTimer();
SlowLogSpan callWithRetrySlowLogSpan =
backOffer.getSlowLog().start("callWithRetry " + methodName);
try {
@ -78,8 +79,7 @@ public abstract class RetryPolicy<RespT> {
RespT result = null;
try {
// add single request duration histogram
Histogram.Timer requestTimer =
GRPC_SINGLE_REQUEST_LATENCY.labels(methodName).startTimer();
Histogram.Timer requestTimer = GRPC_SINGLE_REQUEST_LATENCY.labels(labels).startTimer();
SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("gRPC " + methodName);
try {
result = proc.call();
@ -93,7 +93,7 @@ public abstract class RetryPolicy<RespT> {
backOffer.checkTimeout();
boolean retry = handler.handleRequestError(backOffer, e);
if (retry) {
GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
GRPC_REQUEST_RETRY_NUM.labels(labels).inc();
continue;
} else {
return result;
@ -104,7 +104,7 @@ public abstract class RetryPolicy<RespT> {
if (handler != null) {
boolean retry = handler.handleResponseError(backOffer, result);
if (retry) {
GRPC_REQUEST_RETRY_NUM.labels(methodName).inc();
GRPC_REQUEST_RETRY_NUM.labels(labels).inc();
continue;
}
}

View File

@ -52,12 +52,14 @@ public abstract class AbstractRegionStoreClient
HistogramUtils.buildDuration()
.name("client_java_seek_leader_store_duration")
.help("seek leader store duration.")
.labelNames("cluster")
.register();
public static final Histogram SEEK_PROXY_STORE_DURATION =
HistogramUtils.buildDuration()
.name("client_java_seek_proxy_store_duration")
.help("seek proxy store duration.")
.labelNames("cluster")
.register();
protected final RegionManager regionManager;
@ -181,7 +183,10 @@ public abstract class AbstractRegionStoreClient
}
private Boolean seekLeaderStore(BackOffer backOffer) {
Histogram.Timer switchLeaderDurationTimer = SEEK_LEADER_STORE_DURATION.startTimer();
Histogram.Timer switchLeaderDurationTimer =
SEEK_LEADER_STORE_DURATION
.labels(regionManager.getPDClient().getClusterId().toString())
.startTimer();
SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("seekLeaderStore");
try {
List<Metapb.Peer> peers = region.getFollowerList();
@ -230,7 +235,10 @@ public abstract class AbstractRegionStoreClient
private boolean seekProxyStore(BackOffer backOffer) {
SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("seekProxyStore");
Histogram.Timer grpcForwardDurationTimer = SEEK_PROXY_STORE_DURATION.startTimer();
Histogram.Timer grpcForwardDurationTimer =
SEEK_PROXY_STORE_DURATION
.labels(regionManager.getPDClient().getClusterId().toString())
.startTimer();
try {
logger.info(String.format("try grpc forward: region[%d]", region.getId()));
// when current leader cannot be reached

View File

@ -52,11 +52,13 @@ public class RegionManager {
HistogramUtils.buildDuration()
.name("client_java_get_region_by_requests_latency")
.help("getRegionByKey request latency.")
.labelNames("cluster")
.register();
public static final Histogram SCAN_REGIONS_REQUEST_LATENCY =
HistogramUtils.buildDuration()
.name("client_java_scan_regions_request_latency")
.help("scanRegions request latency.")
.labelNames("cluster")
.register();
// TODO: the region cache logic need rewrite.
@ -101,7 +103,8 @@ public class RegionManager {
public List<Pdpb.Region> scanRegions(
BackOffer backOffer, ByteString startKey, ByteString endKey, int limit) {
Histogram.Timer requestTimer = SCAN_REGIONS_REQUEST_LATENCY.startTimer();
Histogram.Timer requestTimer =
SCAN_REGIONS_REQUEST_LATENCY.labels(getPDClient().getClusterId().toString()).startTimer();
SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("scanRegions");
try {
return pdClient.scanRegions(backOffer, startKey, endKey, limit);
@ -118,7 +121,10 @@ public class RegionManager {
}
public TiRegion getRegionByKey(ByteString key, BackOffer backOffer) {
Histogram.Timer requestTimer = GET_REGION_BY_KEY_REQUEST_LATENCY.startTimer();
Histogram.Timer requestTimer =
GET_REGION_BY_KEY_REQUEST_LATENCY
.labels(getPDClient().getClusterId().toString())
.startTimer();
SlowLogSpan slowLogSpan = backOffer.getSlowLog().start("getRegionByKey");
TiRegion region = cache.getRegionByKey(key, backOffer);
try {
@ -311,6 +317,7 @@ public class RegionManager {
}
private BackOffer defaultBackOff() {
return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS());
return ConcreteBackOffer.newCustomBackOff(
conf.getRawKVDefaultBackoffInMS(), pdClient.getClusterId());
}
}

View File

@ -76,7 +76,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
HistogramUtils.buildDuration()
.name("client_java_grpc_raw_requests_latency")
.help("grpc raw request latency.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private synchronized Boolean getIsV4() {
@ -141,6 +141,10 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
this.pdClient = pdClient;
}
private String[] withClusterId(String label) {
return new String[] {label, pdClient.getClusterId().toString()};
}
public synchronized boolean addResolvedLocks(Long version, Set<Long> locks) {
Set<Long> oldList = resolvedLocks.get(version);
if (oldList != null) {
@ -733,7 +737,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
StreamingResponse responseIterator =
this.callServerStreamingWithRetry(
ConcreteBackOffer.newCopNextMaxBackOff(),
ConcreteBackOffer.newCopNextMaxBackOff(pdClient.getClusterId()),
TikvGrpc.getCoprocessorStreamMethod(),
reqToSend,
handler);
@ -768,7 +772,10 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
SplitRegionResponse resp =
callWithRetry(
ConcreteBackOffer.newGetBackOff(), TikvGrpc.getSplitRegionMethod(), request, handler);
ConcreteBackOffer.newGetBackOff(pdClient.getClusterId()),
TikvGrpc.getSplitRegionMethod(),
request,
handler);
if (resp == null) {
this.regionManager.onRequestFail(region);
@ -789,7 +796,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public ByteString rawGet(BackOffer backOffer, ByteString key) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_get")).startTimer();
try {
Supplier<RawGetRequest> factory =
() -> RawGetRequest.newBuilder().setContext(makeContext(storeType)).setKey(key).build();
@ -820,7 +827,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public Long rawGetKeyTTL(BackOffer backOffer, ByteString key) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_get_key_ttl").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_get_key_ttl")).startTimer();
try {
Supplier<RawGetKeyTTLRequest> factory =
() ->
@ -859,7 +866,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public void rawDelete(BackOffer backOffer, ByteString key, boolean atomic) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_delete").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_delete")).startTimer();
try {
Supplier<RawDeleteRequest> factory =
() ->
@ -897,7 +904,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public void rawPut(
BackOffer backOffer, ByteString key, ByteString value, long ttl, boolean atomic) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_put").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_put")).startTimer();
try {
Supplier<RawPutRequest> factory =
() ->
@ -936,7 +943,9 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public ByteString rawPutIfAbsent(
BackOffer backOffer, ByteString key, ByteString value, long ttl) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_put_if_absent").startTimer();
GRPC_RAW_REQUEST_LATENCY
.labels(withClusterId("client_grpc_raw_put_if_absent"))
.startTimer();
try {
Supplier<RawCASRequest> factory =
() ->
@ -979,7 +988,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public List<KvPair> rawBatchGet(BackOffer backoffer, List<ByteString> keys) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_get").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_get")).startTimer();
try {
if (keys.isEmpty()) {
return new ArrayList<>();
@ -1014,7 +1023,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public void rawBatchPut(BackOffer backOffer, List<KvPair> kvPairs, long ttl, boolean atomic) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_put").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_put")).startTimer();
try {
if (kvPairs.isEmpty()) {
return;
@ -1066,7 +1075,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
public void rawBatchDelete(BackOffer backoffer, List<ByteString> keys, boolean atomic) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_batch_delete").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_batch_delete")).startTimer();
try {
if (keys.isEmpty()) {
return;
@ -1114,7 +1123,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
*/
public List<KvPair> rawScan(BackOffer backOffer, ByteString key, int limit, boolean keyOnly) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_scan").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_scan")).startTimer();
try {
Supplier<RawScanRequest> factory =
() ->
@ -1160,7 +1169,7 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
*/
public void rawDeleteRange(BackOffer backOffer, ByteString startKey, ByteString endKey) {
Histogram.Timer requestTimer =
GRPC_RAW_REQUEST_LATENCY.labels("client_grpc_raw_delete_range").startTimer();
GRPC_RAW_REQUEST_LATENCY.labels(withClusterId("client_grpc_raw_delete_range")).startTimer();
try {
Supplier<RawDeleteRangeRequest> factory =
() ->
@ -1316,7 +1325,8 @@ public class RegionStoreClient extends AbstractRegionStoreClient {
}
private BackOffer defaultBackOff() {
return ConcreteBackOffer.newCustomBackOff(conf.getRawKVDefaultBackoffInMS());
return ConcreteBackOffer.newCustomBackOff(
conf.getRawKVDefaultBackoffInMS(), pdClient.getClusterId());
}
}
}

View File

@ -80,7 +80,9 @@ public class StoreHealthyChecker implements Runnable {
private boolean checkStoreTombstone(TiStore store) {
try {
Metapb.Store newStore = pdClient.getStore(ConcreteBackOffer.newRawKVBackOff(), store.getId());
Metapb.Store newStore =
pdClient.getStore(
ConcreteBackOffer.newRawKVBackOff(pdClient.getClusterId()), store.getId());
if (newStore != null && newStore.getState() == Metapb.StoreState.Tombstone) {
return true;
}

View File

@ -66,4 +66,6 @@ public interface BackOffer {
}
SlowLog getSlowLog();
Long getClusterId();
}

View File

@ -39,6 +39,7 @@ import org.tikv.common.log.SlowLogSpan;
public class ConcreteBackOffer implements BackOffer {
private static final Logger logger = LoggerFactory.getLogger(ConcreteBackOffer.class);
private final int maxSleep;
private final Long clusterId;
@VisibleForTesting
public final Map<BackOffFunction.BackOffFuncType, BackOffFunction> backOffFunctionMap;
@ -52,14 +53,15 @@ public class ConcreteBackOffer implements BackOffer {
HistogramUtils.buildDuration()
.name("client_java_backoff_duration")
.help("backoff duration.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog) {
private ConcreteBackOffer(int maxSleep, long deadline, SlowLog slowLog, long clusterId) {
Preconditions.checkArgument(
maxSleep == 0 || deadline == 0, "Max sleep time should be 0 or Deadline should be 0.");
Preconditions.checkArgument(maxSleep >= 0, "Max sleep time cannot be less than 0.");
Preconditions.checkArgument(deadline >= 0, "Deadline cannot be less than 0.");
this.clusterId = clusterId;
this.maxSleep = maxSleep;
this.errors = Collections.synchronizedList(new ArrayList<>());
this.backOffFunctionMap = new ConcurrentHashMap<>();
@ -68,6 +70,7 @@ public class ConcreteBackOffer implements BackOffer {
}
private ConcreteBackOffer(ConcreteBackOffer source) {
this.clusterId = source.clusterId;
this.maxSleep = source.maxSleep;
this.totalSleep = source.totalSleep;
this.errors = source.errors;
@ -76,37 +79,54 @@ public class ConcreteBackOffer implements BackOffer {
this.slowLog = source.slowLog;
}
public static ConcreteBackOffer newDeadlineBackOff(int timeoutInMs, SlowLog slowLog) {
public static ConcreteBackOffer newDeadlineBackOff(
int timeoutInMs, SlowLog slowLog, long clusterId) {
long deadline = System.currentTimeMillis() + timeoutInMs;
return new ConcreteBackOffer(0, deadline, slowLog);
return new ConcreteBackOffer(0, deadline, slowLog, clusterId);
}
public static ConcreteBackOffer newDeadlineBackOff(int timeoutInMs, SlowLog slowLog) {
return newDeadlineBackOff(timeoutInMs, slowLog, 0);
}
public static ConcreteBackOffer newCustomBackOff(int maxSleep, long clusterId) {
return new ConcreteBackOffer(maxSleep, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer newCustomBackOff(int maxSleep) {
return new ConcreteBackOffer(maxSleep, 0, SlowLogEmptyImpl.INSTANCE);
return newCustomBackOff(maxSleep, 0);
}
public static ConcreteBackOffer newScannerNextMaxBackOff() {
return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
public static ConcreteBackOffer newScannerNextMaxBackOff(long clusterId) {
return new ConcreteBackOffer(SCANNER_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer newBatchGetMaxBackOff() {
return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
return new ConcreteBackOffer(BATCH_GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, 0);
}
public static ConcreteBackOffer newCopNextMaxBackOff() {
return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
return newCopNextMaxBackOff(0);
}
public static ConcreteBackOffer newGetBackOff() {
return new ConcreteBackOffer(GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
public static ConcreteBackOffer newCopNextMaxBackOff(long clusterId) {
return new ConcreteBackOffer(COP_NEXT_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer newGetBackOff(long clusterId) {
return new ConcreteBackOffer(GET_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer newRawKVBackOff(long clusterId) {
return new ConcreteBackOffer(RAWKV_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer newRawKVBackOff() {
return new ConcreteBackOffer(RAWKV_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
return newRawKVBackOff(0);
}
public static ConcreteBackOffer newTsoBackOff() {
return new ConcreteBackOffer(TSO_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE);
public static ConcreteBackOffer newTsoBackOff(long clusterId) {
return new ConcreteBackOffer(TSO_MAX_BACKOFF, 0, SlowLogEmptyImpl.INSTANCE, clusterId);
}
public static ConcreteBackOffer create(BackOffer source) {
@ -173,7 +193,8 @@ public class ConcreteBackOffer implements BackOffer {
}
public boolean canRetryAfterSleep(BackOffFunction.BackOffFuncType funcType, long maxSleepMs) {
Histogram.Timer backOffTimer = BACKOFF_DURATION.labels(funcType.name()).startTimer();
Histogram.Timer backOffTimer =
BACKOFF_DURATION.labels(funcType.name(), clusterId.toString()).startTimer();
SlowLogSpan slowLogSpan = getSlowLog().start("backoff " + funcType.name());
BackOffFunction backOffFunction =
backOffFunctionMap.computeIfAbsent(funcType, this::createBackOffFunc);
@ -238,4 +259,9 @@ public class ConcreteBackOffer implements BackOffer {
public SlowLog getSlowLog() {
return slowLog;
}
@Override
public Long getClusterId() {
return clusterId;
}
}

View File

@ -66,6 +66,7 @@ import org.tikv.kvproto.Kvrpcpb.KvPair;
public class RawKVClient implements RawKVClientBase {
private final Long clusterId;
private final List<URI> pdAddresses;
private final TiSession session;
private final RegionStoreClientBuilder clientBuilder;
private final TiConfiguration conf;
private final ExecutorService batchGetThreadPool;
@ -79,21 +80,21 @@ public class RawKVClient implements RawKVClientBase {
HistogramUtils.buildDuration()
.name("client_java_raw_requests_latency")
.help("client raw request latency.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
public static final Counter RAW_REQUEST_SUCCESS =
Counter.build()
.name("client_java_raw_requests_success")
.help("client raw request success.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
public static final Counter RAW_REQUEST_FAILURE =
Counter.build()
.name("client_java_raw_requests_failure")
.help("client raw request failure.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private static final TiKVException ERR_MAX_SCAN_LIMIT_EXCEEDED =
@ -104,6 +105,7 @@ public class RawKVClient implements RawKVClientBase {
Objects.requireNonNull(clientBuilder, "clientBuilder is null");
this.conf = session.getConf();
this.clientBuilder = clientBuilder;
this.session = session;
this.batchGetThreadPool = session.getThreadPoolForBatchGet();
this.batchPutThreadPool = session.getThreadPoolForBatchPut();
this.batchDeleteThreadPool = session.getThreadPoolForBatchDelete();
@ -113,6 +115,10 @@ public class RawKVClient implements RawKVClientBase {
this.pdAddresses = session.getPDClient().getPdAddrs();
}
private String[] withClusterId(String label) {
return new String[] {label, clusterId.toString()};
}
@Override
public void close() {}
@ -132,8 +138,8 @@ public class RawKVClient implements RawKVClientBase {
}
private void put(ByteString key, ByteString value, long ttl, boolean atomic) {
String label = "client_raw_put";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_put");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVWriteSlowLogInMS(),
@ -146,13 +152,13 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId);
try {
while (true) {
try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
slowLog.addProperty("region", client.getRegion().toString());
client.rawPut(backOffer, key, value, ttl, atomic);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
@ -160,7 +166,7 @@ public class RawKVClient implements RawKVClientBase {
}
}
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -176,8 +182,8 @@ public class RawKVClient implements RawKVClientBase {
@Override
public ByteString putIfAbsent(ByteString key, ByteString value, long ttl) {
String label = "client_raw_put_if_absent";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_put_if_absent");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVWriteSlowLogInMS(),
@ -190,13 +196,13 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId);
try {
while (true) {
try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
slowLog.addProperty("region", client.getRegion().toString());
ByteString result = client.rawPutIfAbsent(backOffer, key, value, ttl);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
@ -204,7 +210,7 @@ public class RawKVClient implements RawKVClientBase {
}
}
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -234,8 +240,8 @@ public class RawKVClient implements RawKVClientBase {
}
private void batchPut(Map<ByteString, ByteString> kvPairs, long ttl, boolean atomic) {
String label = "client_raw_batch_put";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_batch_put");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVBatchWriteSlowLogInMS(),
@ -248,13 +254,14 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(
conf.getRawKVBatchWriteTimeoutInMS(), slowLog, clusterId);
try {
long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS();
doSendBatchPut(backOffer, kvPairs, ttl, atomic, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -265,12 +272,12 @@ public class RawKVClient implements RawKVClientBase {
@Override
public ByteString get(ByteString key) {
String label = "client_raw_get";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_get");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVReadSlowLogInMS(),
new HashMap<String, Object>(2) {
new HashMap<String, Object>() {
{
put("cluster_id", clusterId);
put("pd_addresses", pdAddresses);
@ -280,13 +287,13 @@ public class RawKVClient implements RawKVClientBase {
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog, clusterId);
try {
while (true) {
try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
slowLog.addProperty("region", client.getRegion().toString());
ByteString result = client.rawGet(backOffer, key);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
@ -294,7 +301,7 @@ public class RawKVClient implements RawKVClientBase {
}
}
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -305,12 +312,12 @@ public class RawKVClient implements RawKVClientBase {
@Override
public List<KvPair> batchGet(List<ByteString> keys) {
String label = "client_raw_batch_get";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_batch_get");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVBatchReadSlowLogInMS(),
new HashMap<String, Object>(2) {
new HashMap<String, Object>() {
{
put("cluster_id", clusterId);
put("pd_addresses", pdAddresses);
@ -319,14 +326,15 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchReadTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(
conf.getRawKVBatchReadTimeoutInMS(), slowLog, clusterId);
try {
long deadline = System.currentTimeMillis() + conf.getRawKVBatchReadTimeoutInMS();
List<KvPair> result = doSendBatchGet(backOffer, keys, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -346,8 +354,8 @@ public class RawKVClient implements RawKVClientBase {
}
private void batchDelete(List<ByteString> keys, boolean atomic) {
String label = "client_raw_batch_delete";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_batch_delete");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVBatchWriteSlowLogInMS(),
@ -360,14 +368,15 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVBatchWriteTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(
conf.getRawKVBatchWriteTimeoutInMS(), slowLog, clusterId);
try {
long deadline = System.currentTimeMillis() + conf.getRawKVBatchWriteTimeoutInMS();
doSendBatchDelete(backOffer, keys, atomic, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -378,8 +387,8 @@ public class RawKVClient implements RawKVClientBase {
@Override
public Long getKeyTTL(ByteString key) {
String label = "client_raw_get_key_ttl";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_get_key_ttl");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVReadSlowLogInMS(),
@ -392,13 +401,13 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVReadTimeoutInMS(), slowLog, clusterId);
try {
while (true) {
try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
slowLog.addProperty("region", client.getRegion().toString());
Long result = client.rawGetKeyTTL(backOffer, key);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
@ -406,7 +415,7 @@ public class RawKVClient implements RawKVClientBase {
}
}
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -417,8 +426,8 @@ public class RawKVClient implements RawKVClientBase {
@Override
public List<List<KvPair>> batchScan(List<ScanOption> ranges) {
String label = "client_raw_batch_scan";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_batch_scan");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
long deadline = System.currentTimeMillis() + conf.getRawKVScanTimeoutInMS();
List<Future<Pair<Integer, List<KvPair>>>> futureList = new ArrayList<>();
try {
@ -453,10 +462,10 @@ public class RawKVClient implements RawKVClientBase {
throw new TiKVException("Execution exception met.", e);
}
}
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return scanResults;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
for (Future<Pair<Integer, List<KvPair>>> future : futureList) {
future.cancel(true);
}
@ -473,8 +482,8 @@ public class RawKVClient implements RawKVClientBase {
@Override
public List<KvPair> scan(ByteString startKey, ByteString endKey, int limit, boolean keyOnly) {
String label = "client_raw_scan";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_scan");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVScanSlowLogInMS(),
@ -490,16 +499,16 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog, clusterId);
try {
Iterator<KvPair> iterator =
rawScanIterator(conf, clientBuilder, startKey, endKey, limit, keyOnly, backOffer);
List<KvPair> result = new ArrayList<>();
iterator.forEachRemaining(result::add);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -525,8 +534,8 @@ public class RawKVClient implements RawKVClientBase {
@Override
public List<KvPair> scan(ByteString startKey, ByteString endKey, boolean keyOnly) {
String label = "client_raw_scan_without_limit";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_scan_without_limit");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVScanSlowLogInMS(),
@ -541,7 +550,7 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVScanTimeoutInMS(), slowLog, clusterId);
try {
ByteString newStartKey = startKey;
List<KvPair> result = new ArrayList<>();
@ -561,10 +570,10 @@ public class RawKVClient implements RawKVClientBase {
iterator.forEachRemaining(result::add);
newStartKey = Key.toRawKey(result.get(result.size() - 1).getKey()).next().toByteString();
}
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -607,8 +616,8 @@ public class RawKVClient implements RawKVClientBase {
}
private void delete(ByteString key, boolean atomic) {
String label = "client_raw_delete";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_delete");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
SlowLog slowLog =
new SlowLogImpl(
conf.getRawKVWriteSlowLogInMS(),
@ -622,13 +631,13 @@ public class RawKVClient implements RawKVClientBase {
}
});
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog);
ConcreteBackOffer.newDeadlineBackOff(conf.getRawKVWriteTimeoutInMS(), slowLog, clusterId);
try {
while (true) {
try (RegionStoreClient client = clientBuilder.build(key, backOffer)) {
slowLog.addProperty("region", client.getRegion().toString());
client.rawDelete(backOffer, key, atomic);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
return;
} catch (final TiKVException e) {
backOffer.doBackOff(BackOffFunction.BackOffFuncType.BoRegionMiss, e);
@ -636,7 +645,7 @@ public class RawKVClient implements RawKVClientBase {
}
}
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
slowLog.setError(e);
throw e;
} finally {
@ -647,17 +656,17 @@ public class RawKVClient implements RawKVClientBase {
@Override
public synchronized void deleteRange(ByteString startKey, ByteString endKey) {
String label = "client_raw_delete_range";
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(label).startTimer();
String[] labels = withClusterId("client_raw_delete_range");
Histogram.Timer requestTimer = RAW_REQUEST_LATENCY.labels(labels).startTimer();
ConcreteBackOffer backOffer =
ConcreteBackOffer.newDeadlineBackOff(
conf.getRawKVCleanTimeoutInMS(), SlowLogEmptyImpl.INSTANCE);
conf.getRawKVCleanTimeoutInMS(), SlowLogEmptyImpl.INSTANCE, clusterId);
try {
long deadline = System.currentTimeMillis() + conf.getRawKVCleanTimeoutInMS();
doSendDeleteRange(backOffer, startKey, endKey, deadline);
RAW_REQUEST_SUCCESS.labels(label).inc();
RAW_REQUEST_SUCCESS.labels(labels).inc();
} catch (Exception e) {
RAW_REQUEST_FAILURE.labels(label).inc();
RAW_REQUEST_FAILURE.labels(labels).inc();
throw e;
} finally {
requestTimer.observeDuration();
@ -670,6 +679,11 @@ public class RawKVClient implements RawKVClientBase {
deleteRange(key, endKey);
}
@Override
public TiSession getSession() {
return session;
}
private void doSendBatchPut(
BackOffer backOffer,
Map<ByteString, ByteString> kvPairs,

View File

@ -18,6 +18,7 @@ package org.tikv.raw;
import com.google.protobuf.ByteString;
import java.util.List;
import java.util.Map;
import org.tikv.common.TiSession;
import org.tikv.common.util.ScanOption;
import org.tikv.kvproto.Kvrpcpb;
@ -254,4 +255,6 @@ public interface RawKVClientBase extends AutoCloseable {
* @param key prefix of keys to be deleted
*/
void deletePrefix(ByteString key);
TiSession getSession();
}

View File

@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tikv.common.TiSession;
import org.tikv.common.exception.CircuitBreakerOpenException;
import org.tikv.common.util.HistogramUtils;
import org.tikv.common.util.ScanOption;
@ -35,28 +36,28 @@ public class SmartRawKVClient implements RawKVClientBase {
HistogramUtils.buildDuration()
.name("client_java_smart_raw_requests_latency")
.help("client smart raw request latency.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private static final Counter REQUEST_SUCCESS =
Counter.build()
.name("client_java_smart_raw_requests_success")
.help("client smart raw request success.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private static final Counter REQUEST_FAILURE =
Counter.build()
.name("client_java_smart_raw_requests_failure")
.help("client smart raw request failure.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private static final Counter CIRCUIT_BREAKER_OPENED =
Counter.build()
.name("client_java_smart_raw_circuit_breaker_opened")
.help("client smart raw circuit breaker opened.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private final RawKVClientBase client;
@ -208,14 +209,21 @@ public class SmartRawKVClient implements RawKVClientBase {
callWithCircuitBreaker("deletePrefix", () -> client.deletePrefix(key));
}
@Override
public TiSession getSession() {
return client.getSession();
}
<T> T callWithCircuitBreaker(String funcName, Function1<T> func) {
Histogram.Timer requestTimer = REQUEST_LATENCY.labels(funcName).startTimer();
String[] labels = new String[] {funcName, getSession().getPDClient().getClusterId().toString()};
Histogram.Timer requestTimer = REQUEST_LATENCY.labels(labels).startTimer();
try {
T result = callWithCircuitBreaker0(funcName, func);
REQUEST_SUCCESS.labels(funcName).inc();
REQUEST_SUCCESS.labels(labels).inc();
return result;
} catch (Exception e) {
REQUEST_FAILURE.labels(funcName).inc();
REQUEST_FAILURE.labels(labels).inc();
throw e;
} finally {
requestTimer.observeDuration();
@ -248,7 +256,9 @@ public class SmartRawKVClient implements RawKVClientBase {
}
} else {
logger.debug("Circuit Breaker Opened");
CIRCUIT_BREAKER_OPENED.labels(funcName).inc();
CIRCUIT_BREAKER_OPENED
.labels(funcName, getSession().getPDClient().getClusterId().toString())
.inc();
throw new CircuitBreakerOpenException();
}
}

View File

@ -30,9 +30,10 @@ public class CircuitBreakerImpl implements CircuitBreaker {
Counter.build()
.name("client_java_circuit_breaker_attempt_counter")
.help("client circuit breaker attempt counter.")
.labelNames("type")
.labelNames("type", "cluster")
.register();
private final Long clusterId;
private final boolean enable;
private final int windowInSeconds;
private final int errorThresholdPercentage;
@ -47,14 +48,15 @@ public class CircuitBreakerImpl implements CircuitBreaker {
private final CircuitBreakerMetrics metrics;
public CircuitBreakerImpl(TiConfiguration conf) {
public CircuitBreakerImpl(TiConfiguration conf, long clusterId) {
this(
conf.isCircuitBreakEnable(),
conf.getCircuitBreakAvailabilityWindowInSeconds(),
conf.getCircuitBreakAvailabilityErrorThresholdPercentage(),
conf.getCircuitBreakAvailabilityRequestVolumnThreshold(),
conf.getCircuitBreakSleepWindowInSeconds(),
conf.getCircuitBreakAttemptRequestCount());
conf.getCircuitBreakAttemptRequestCount(),
clusterId);
}
public CircuitBreakerImpl(
@ -63,8 +65,10 @@ public class CircuitBreakerImpl implements CircuitBreaker {
int errorThresholdPercentage,
int requestVolumeThreshold,
int sleepWindowInSeconds,
int attemptRequestCount) {
int attemptRequestCount,
long clusterId) {
this.enable = enable;
this.clusterId = clusterId;
this.windowInSeconds = windowInSeconds;
this.errorThresholdPercentage = errorThresholdPercentage;
this.requestVolumeThreshold = requestVolumeThreshold;
@ -123,7 +127,7 @@ public class CircuitBreakerImpl implements CircuitBreaker {
@Override
public void recordAttemptSuccess() {
CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("success").inc();
CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("success", clusterId.toString()).inc();
if (attemptSuccessCount.incrementAndGet() >= this.attemptRequestCount) {
halfOpen2Close();
}
@ -131,7 +135,7 @@ public class CircuitBreakerImpl implements CircuitBreaker {
@Override
public void recordAttemptFailure() {
CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("failure").inc();
CIRCUIT_BREAKER_ATTEMPT_COUNTER.labels("failure", clusterId.toString()).inc();
halfOpen2Open();
}

View File

@ -71,7 +71,9 @@ public class KVClient implements AutoCloseable {
* @return a ByteString value if key exists, ByteString.EMPTY if key does not exist
*/
public ByteString get(ByteString key, long version) throws GrpcException {
BackOffer backOffer = ConcreteBackOffer.newGetBackOff();
BackOffer backOffer =
ConcreteBackOffer.newGetBackOff(
clientBuilder.getRegionManager().getPDClient().getClusterId());
while (true) {
RegionStoreClient client = clientBuilder.build(key);
try {

View File

@ -90,7 +90,9 @@ public class TTLManager {
}
private void doKeepAlive() {
BackOffer bo = ConcreteBackOffer.newCustomBackOff(MANAGED_LOCK_TTL);
BackOffer bo =
ConcreteBackOffer.newCustomBackOff(
MANAGED_LOCK_TTL, regionManager.getPDClient().getClusterId());
long uptime = kvClient.getTimestamp().getPhysical() - TiTimestamp.extractPhysical(startTS);
long ttl = uptime + MANAGED_LOCK_TTL;

View File

@ -281,7 +281,9 @@ public class TwoPhaseCommitter {
// consume one task if reaches task limit
completionService.take().get();
}
BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(maxBackOfferMS);
BackOffer backOffer =
ConcreteBackOffer.newCustomBackOff(
maxBackOfferMS, regionManager.getPDClient().getClusterId());
completionService.submit(
() -> {
doPrewriteSecondaryKeysInBatchesWithRetry(
@ -539,7 +541,9 @@ public class TwoPhaseCommitter {
// consume one task if reaches task limit
completionService.take().get();
}
BackOffer backOffer = ConcreteBackOffer.newCustomBackOff(commitBackOfferMS);
BackOffer backOffer =
ConcreteBackOffer.newCustomBackOff(
commitBackOfferMS, regionManager.getPDClient().getClusterId());
completionService.submit(
() -> {
doCommitSecondaryKeysWithRetry(backOffer, keyBytes, curSize, commitTs);

View File

@ -68,7 +68,7 @@ public class TxnKVClient implements AutoCloseable {
}
public TiTimestamp getTimestamp() {
BackOffer bo = ConcreteBackOffer.newTsoBackOff();
BackOffer bo = ConcreteBackOffer.newTsoBackOff(pdClient.getClusterId());
TiTimestamp timestamp = new TiTimestamp(0, 0);
try {
while (true) {

View File

@ -33,6 +33,7 @@ public class MetricsTest extends BaseRawKVTest {
client.put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v"));
ByteString result = client.get(ByteString.copyFromUtf8("k"));
assertEquals(result.toStringUtf8(), "v");
Thread.sleep(1000);
client.close();
session.close();
}

View File

@ -25,7 +25,8 @@ public class CircuitBreakerTest {
errorThresholdPercentage,
requestVolumeThreshold,
sleepWindowInSeconds,
attemptRequestCount);
attemptRequestCount,
0);
CircuitBreakerMetrics metrics = circuitBreaker.getMetrics();
// initial state: CLOSE