Fix conduit health grafana dashboard (#1086)

* Scope health queries to controller namespace

* Add a prometheus query variable to get the conduit namespace
This commit is contained in:
Risha Mars 2018-06-08 12:57:05 -07:00 committed by GitHub
parent cea37ee48d
commit b930bc6b88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 29 additions and 9 deletions

View File

@ -85,7 +85,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(response_total{classification=\"success\", deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (deployment, pod) / sum(irate(response_total{deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (deployment, pod)",
"expr": "sum(irate(response_total{classification=\"success\", deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (deployment, pod) / sum(irate(response_total{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (deployment, pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "po/{{pod}}",
@ -169,7 +169,7 @@
"steppedLine": false,
"targets": [
{
"expr": "sum(irate(request_total{deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (deployment, pod)",
"expr": "sum(irate(request_total{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (deployment, pod)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "po/{{pod}}",
@ -253,21 +253,21 @@
"steppedLine": false,
"targets": [
{
"expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "p50 po/{{pod}}",
"refId": "A"
},
{
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "p95 po/{{pod}}",
"refId": "B"
},
{
"expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}[30s])) by (le, deployment, pod))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "p99 po/{{pod}}",
@ -405,7 +405,7 @@
"steppedLine": false,
"targets": [
{
"expr": "tcp_close_total{deployment=\"$deployment\", direction=\"inbound\",classification=\"failure\"}",
"expr": "tcp_close_total{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\",classification=\"failure\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
@ -488,7 +488,7 @@
"steppedLine": false,
"targets": [
{
"expr": "tcp_open_connections{deployment=\"$deployment\", direction=\"inbound\"}",
"expr": "tcp_open_connections{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}}",
@ -571,7 +571,7 @@
},
"targets": [
{
"expr": "tcp_connection_duration_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}",
"expr": "tcp_connection_duration_ms_bucket{deployment=\"$deployment\", namespace=\"$namespace\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
@ -1446,6 +1446,26 @@
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": "Conduit Namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(request_total{conduit_io_control_plane_ns!=\"\"}, conduit_io_control_plane_ns)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
@ -1481,5 +1501,5 @@
"timezone": "",
"title": "Conduit Health",
"uid": "Og9nanzmk",
"version": 2
"version": 3
}