From c9cdd838dc1087c1198912ac3a80b994424c1016 Mon Sep 17 00:00:00 2001 From: Andrew Seigner Date: Fri, 13 Apr 2018 18:01:44 -0700 Subject: [PATCH] Standardize and polish Grafana for 0.4.0 release (#766) The top-line, deployments, and health Grafana dashboards had inconsistent layouts and data. This change standardizes our Grafana dashboards. Every row is composed of Success Rate, Request Rate, and Latency. Part of #420. Signed-off-by: Andrew Seigner --- grafana/dashboards/deployment.json | 1847 +++++++++++++--------------- grafana/dashboards/health.json | 1343 ++++++++++---------- grafana/dashboards/top-line.json | 412 +++---- 3 files changed, 1693 insertions(+), 1909 deletions(-) diff --git a/grafana/dashboards/deployment.json b/grafana/dashboards/deployment.json index 3279b7473..713f2d6aa 100644 --- a/grafana/dashboards/deployment.json +++ b/grafana/dashboards/deployment.json @@ -16,11 +16,11 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1523047156373, + "iteration": 1523660761961, "links": [], "panels": [ { - "content": "
\n  \n $deployment\n
", + "content": "
\n  \n deploy/$deployment\n
", "gridPos": { "h": 2, "w": 24, @@ -34,90 +34,6 @@ "transparent": true, "type": "text" }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "decimals": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 5, - "x": 0, - "y": 2 - }, - "id": 4, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": " RPS", - "postfixFontSize": "100%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s]))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Request rate", - "transparent": true, - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, { "cacheTimeout": null, "colorBackground": false, @@ -138,9 +54,9 @@ "thresholdMarkers": true }, "gridPos": { - "h": 3, - "w": 4, - "x": 5, + "h": 4, + "w": 8, + "x": 0, "y": 2 }, "id": 5, @@ -173,7 +89,7 @@ ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, + "full": true, "lineColor": "rgb(31, 120, 193)", "show": true }, @@ -189,7 +105,7 @@ } ], "thresholds": "0.9,.99", - "title": "Success rate", + "title": "SUCCESS RATE", "transparent": true, "type": "singlestat", "valueFontSize": "80%", @@ -213,7 +129,7 @@ ], "datasource": "prometheus", "decimals": null, - "format": "ms", + "format": "none", "gauge": { "maxValue": 100, "minValue": 0, @@ -222,12 +138,12 @@ "thresholdMarkers": true }, "gridPos": { - "h": 3, - "w": 5, - "x": 9, + "h": 4, + "w": 8, + "x": 8, "y": 2 }, - "id": 6, + "id": 4, "interval": null, "links": [], "mappingType": 1, @@ -244,7 +160,7 @@ "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": "", + "postfix": " RPS", "postfixFontSize": "100%", "prefix": "", "prefixFontSize": "50%", @@ -257,14 +173,14 @@ ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, + "full": true, "lineColor": "rgb(31, 120, 193)", "show": true }, "tableColumn": "", "targets": [ { - "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le))", + "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s]))", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -273,175 +189,7 @@ } ], "thresholds": "", - "title": "P50 latency", - "transparent": true, - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "decimals": null, - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 5, - "x": 14, - "y": 2 - }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "100%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "P95 latency", - "transparent": true, - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "prometheus", - "decimals": null, - "format": "ms", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 5, - "x": 19, - "y": 2 - }, - "id": 8, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "100%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "P99 latency", + "title": "REQUEST RATE", "transparent": true, "type": "singlestat", "valueFontSize": "100%", @@ -474,10 +222,10 @@ "thresholdMarkers": true }, "gridPos": { - "h": 2, - "w": 12, - "x": 0, - "y": 5 + "h": 4, + "w": 4, + "x": 16, + "y": 2 }, "id": 11, "interval": null, @@ -496,9 +244,9 @@ "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": " inbound deployments", + "postfix": "", "postfixFontSize": "100%", - "prefix": "«", + "prefix": "", "prefixFontSize": "100%", "rangeMaps": [ { @@ -524,7 +272,7 @@ } ], "thresholds": "", - "title": "", + "title": "INBOUND DEPLOYMENTS", "transparent": true, "type": "singlestat", "valueFontSize": "100%", @@ -556,10 +304,10 @@ "thresholdMarkers": true }, "gridPos": { - "h": 2, - "w": 12, - "x": 12, - "y": 5 + "h": 4, + "w": 4, + "x": 20, + "y": 2 }, "id": 15, "interval": null, @@ -578,7 +326,7 @@ "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, - "postfix": " outbound deployments »", + "postfix": "", "postfixFontSize": "100%", "prefix": "", "prefixFontSize": "50%", @@ -606,7 +354,7 @@ } ], "thresholds": "", - "title": "", + "title": "OUTBOUND DEPLOYMENTS", "transparent": true, "type": "singlestat", "valueFontSize": "100%", @@ -619,6 +367,21 @@ ], "valueName": "current" }, + { + "content": "
\n INBOUND TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 17, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, { "aliasColors": {}, "bars": false, @@ -627,10 +390,91 @@ "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 7 + "y": 8 + }, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment) / sum(irate(response_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 }, "id": 2, "legend": { @@ -652,24 +496,24 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s]))", + "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "inbound", + "legendFormat": "deploy/{{deployment}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "INBOUND REQUEST RATE", + "title": "REQUEST RATE", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -682,11 +526,12 @@ }, "yaxes": [ { + "decimals": null, "format": "rps", - "label": null, + "label": "", "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -707,12 +552,12 @@ "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 7 + "h": 7, + "w": 8, + "x": 16, + "y": 8 }, - "id": 9, + "id": 68, "legend": { "avg": false, "current": false, @@ -736,20 +581,35 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}[20s]))", + "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "outbound", + "legendFormat": "p50 deploy/{{deployment}}", "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95 deploy/{{deployment}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99 deploy/{{deployment}}", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "OUTBOUND REQUEST RATE", + "title": "LATENCY", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -762,8 +622,9 @@ }, "yaxes": [ { - "format": "rps", - "label": null, + "decimals": null, + "format": "ms", + "label": "", "logBase": 1, "max": null, "min": null, @@ -780,20 +641,307 @@ ] }, { - "content": "
\n INBOUND\n
", + "content": "
\n INBOUND DEPLOYMENTS\n
", "gridPos": { "h": 2, "w": 24, "x": 0, - "y": 16 + "y": 15 }, - "id": 17, + "id": 76, "links": [], "mode": "html", "title": "", "transparent": true, "type": "text" }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 59, + "panels": [ + { + "content": "
\n  \n deploy/$inbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 39, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment, pod) / sum(irate(response_total{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment, pod)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment, pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 deploy/{{deployment}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 deploy/{{deployment}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 deploy/{{deployment}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": "inbound", + "title": "deploy/$inbound", + "type": "row" + }, { "collapsed": false, "gridPos": { @@ -802,435 +950,6 @@ "x": 0, "y": 18 }, - "id": 59, - "panels": [], - "repeat": "inbound", - "title": "", - "type": "row" - }, - { - "content": "
\n  \n $inbound\n
", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 39, - "links": [], - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 21 - }, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(request_total{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{deployment}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "REQUEST RATE", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 5, - "x": 4, - "y": 21 - }, - "id": 36, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(irate(response_total{classification=\"success\", deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment) / sum(irate(response_total{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (deployment)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{deployment}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "SUCCESS RATE", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 5, - "x": 9, - "y": 21 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{deployment}} P50", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P50", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 5, - "x": 14, - "y": 21 - }, - "id": 37, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{deployment}} P95", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P95", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 5, - "x": 19, - "y": 21 - }, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{deployment}} P99", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P99", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, "id": 34, "panels": [], "repeat": null, @@ -1238,12 +957,12 @@ "type": "row" }, { - "content": "
\n OUTBOUND\n
", + "content": "
\n OUTBOUND TRAFFIC\n
", "gridPos": { "h": 2, "w": 24, "x": 0, - "y": 28 + "y": 19 }, "id": 32, "links": [], @@ -1252,35 +971,6 @@ "transparent": true, "type": "text" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 27, - "panels": [], - "repeat": "outbound", - "title": "", - "type": "row" - }, - { - "content": "
\n  \n $outbound\n
", - "gridPos": { - "h": 2, - "w": 24, - "x": 0, - "y": 31 - }, - "id": 40, - "links": [], - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, { "aliasColors": {}, "bars": false, @@ -1289,12 +979,12 @@ "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 6, - "w": 4, + "h": 7, + "w": 8, "x": 0, - "y": 33 + "y": 21 }, - "id": 35, + "id": 77, "legend": { "avg": false, "current": false, @@ -1318,10 +1008,91 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment)", + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (dst_deployment) / sum(irate(response_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (dst_deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{dst_deployment}}", + "legendFormat": "deploy/{{dst_deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 21 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (dst_deployment)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{dst_deployment}}", "refId": "A" } ], @@ -1345,10 +1116,10 @@ "yaxes": [ { "format": "rps", - "label": null, + "label": "", "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { @@ -1369,12 +1140,12 @@ "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 6, - "w": 5, - "x": 4, - "y": 33 + "h": 7, + "w": 8, + "x": 16, + "y": 21 }, - "id": 28, + "id": 79, "legend": { "avg": false, "current": false, @@ -1398,97 +1169,17 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment) / sum(irate(response_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment)", + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{dst_deployment}}", + "legendFormat": "P95 deploy/{{dst_deployment}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "SUCCESS RATE", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, - "gridPos": { - "h": 6, - "w": 5, - "x": 9, - "y": 33 - }, - "id": 41, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{dst_deployment}} P50", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P50", + "title": "P95 LATENCY", "tooltip": { "shared": true, "sort": 2, @@ -1522,164 +1213,304 @@ ] }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, + "content": "
\n OUTBOUND DEPLOYMENTS\n
", "gridPos": { - "h": 6, - "w": 5, - "x": 14, - "y": 33 + "h": 2, + "w": 24, + "x": 0, + "y": 28 }, - "id": 42, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, + "id": 80, "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{dst_deployment}} P95", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P95", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "mode": "html", + "title": "", + "transparent": true, + "type": "text" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fill": 1, + "collapsed": true, "gridPos": { - "h": 6, - "w": 5, - "x": 19, - "y": 33 + "h": 1, + "w": 24, + "x": 0, + "y": 30 }, - "id": 43, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ + "id": 27, + "panels": [ { - "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{dst_deployment}} P99", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "P99", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "content": "
\n  \n deploy/$outbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 40, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 33 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment) / sum(irate(response_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{dst_deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 33 + }, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (dst_deployment)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{dst_deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 33 + }, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 deploy/{{dst_deployment}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 deploy/{{dst_deployment}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\"}[20s])) by (le, dst_deployment))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 deploy/{{dst_deployment}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] } - ] + ], + "repeat": "outbound", + "title": "deploy/$outbound", + "type": "row" } ], "refresh": "5s", diff --git a/grafana/dashboards/health.json b/grafana/dashboards/health.json index 49c49bb83..7925efe53 100644 --- a/grafana/dashboards/health.json +++ b/grafana/dashboards/health.json @@ -15,22 +15,68 @@ "editable": true, "gnetId": null, "graphTooltip": 1, + "id": null, + "iteration": 1523662094618, "links": [], "panels": [ + { + "content": "
\n Control-Plane Proxy Telemetry\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "id": 19, + "panels": [], + "repeat": "deployment", + "title": "", + "type": "row" + }, + { + "content": "
\n  \n deploy/$deployment\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 21, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 0 + "y": 5 }, - "id": 10, + "id": 23, "legend": { "avg": false, "current": false, @@ -54,18 +100,17 @@ "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", - "expr": "histogram_quantile(0.5, rate(grpc_server_handling_seconds_bucket{job=\"conduit-controller\"}[20s]))", + "expr": "sum(irate(response_total{classification=\"success\", deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment, pod) / sum(irate(response_total{deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{component}}/{{grpc_method}}", + "legendFormat": "po/{{pod}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "gRPC Latency P50", + "title": "SUCCESS RATE", "tooltip": { "shared": true, "sort": 2, @@ -81,16 +126,14 @@ }, "yaxes": [ { - "$$hashKey": "object:1706", - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": "1", "min": null, "show": true }, { - "$$hashKey": "object:1707", "format": "short", "label": null, "logBase": 1, @@ -105,15 +148,15 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 0 + "h": 7, + "w": 8, + "x": 8, + "y": 5 }, - "id": 13, + "id": 24, "legend": { "avg": false, "current": false, @@ -133,22 +176,21 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", - "expr": "histogram_quantile(0.95, rate(grpc_server_handling_seconds_bucket{job=\"conduit-controller\"}[20s]))", + "expr": "sum(irate(request_total{deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (deployment, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{component}}/{{grpc_method}}", + "legendFormat": "po/{{pod}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "gRPC Latency P95", + "title": "REQUEST RATE", "tooltip": { "shared": true, "sort": 2, @@ -164,99 +206,14 @@ }, "yaxes": [ { - "$$hashKey": "object:1706", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1707", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:172", - "expr": "irate(grpc_server_handled_total{job=\"conduit-controller\"}[20s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{component}}/{{grpc_method}}/{{grpc_code}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "gRPC Request Volume", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1706", "format": "rps", "label": null, "logBase": 1, "max": null, - "min": null, + "min": "0", "show": true }, { - "$$hashKey": "object:1707", "format": "short", "label": null, "logBase": 1, @@ -271,15 +228,15 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 9 + "h": 7, + "w": 8, + "x": 16, + "y": 5 }, - "id": 14, + "id": 25, "legend": { "avg": false, "current": false, @@ -303,390 +260,31 @@ "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", - "expr": "irate(grpc_server_msg_sent_total{job=\"conduit-controller\"}[20s])", + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment, pod))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "sent/{{component}}/{{grpc_method}}", + "legendFormat": "p50 po/{{pod}}", "refId": "A" }, { - "$$hashKey": "object:3590", - "expr": "irate(grpc_server_msg_received_total{job=\"conduit-controller\"}[20s])", + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment, pod))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "received/{{component}}/{{grpc_method}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "gRPC Message Volume", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1706", - "format": "rps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1707", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:172", - "expr": "histogram_quantile(0.5, rate(http_request_duration_seconds_bucket{job=\"conduit-controller\"}[20s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{component}}/{{code}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "HTTP Latency P50", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1706", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1707", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:172", - "expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job=\"conduit-controller\"}[20s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{component}}/{{code}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "HTTP Latency P95", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1706", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:1707", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 27 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:842", - "expr": "process_resident_memory_bytes{job=\"conduit-controller\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "resident/{{component}}", - "refId": "A" - }, - { - "$$hashKey": "object:1111", - "expr": "process_virtual_memory_bytes{job=\"conduit-controller\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "virtual/{{component}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Process Memory", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:867", - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:868", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 27 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:842", - "expr": "go_memstats_alloc_bytes{job=\"conduit-controller\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "alloc/{{component}}", - "refId": "A" - }, - { - "$$hashKey": "object:1111", - "expr": "irate(go_memstats_alloc_bytes_total{job=\"conduit-controller\"}[20s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "alloc rate/{{component}}", + "legendFormat": "p95 po/{{pod}}", "refId": "B" }, { - "$$hashKey": "object:1540", - "expr": "go_memstats_stack_inuse_bytes{job=\"conduit-controller\"}", + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}[20s])) by (le, deployment, pod))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "stack/{{component}}", + "legendFormat": "p99 po/{{pod}}", "refId": "C" - }, - { - "$$hashKey": "object:1561", - "expr": "go_memstats_heap_inuse_bytes{job=\"conduit-controller\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "heap/{{component}}", - "refId": "D" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Go Memstats", + "title": "LATENCY", "tooltip": { "shared": true, "sort": 2, @@ -702,8 +300,7 @@ }, "yaxes": [ { - "$$hashKey": "object:867", - "format": "decbytes", + "format": "ms", "label": null, "logBase": 1, "max": null, @@ -711,7 +308,6 @@ "show": true }, { - "$$hashKey": "object:868", "format": "short", "label": null, "logBase": 1, @@ -721,18 +317,46 @@ } ] }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 90, + "panels": [], + "title": "", + "type": "row" + }, + { + "content": "
\n Control-Plane Telemetry\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 27, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, + "h": 7, + "w": 8, "x": 0, - "y": 36 + "y": 15 }, "id": 2, "legend": { @@ -758,7 +382,6 @@ "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", "expr": "go_goroutines{job=\"conduit-controller\"}", "format": "time_series", "intervalFactor": 1, @@ -807,15 +430,15 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 36 + "h": 7, + "w": 8, + "x": 8, + "y": 15 }, - "id": 9, + "id": 5, "legend": { "avg": false, "current": false, @@ -839,18 +462,24 @@ "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", - "expr": "go_threads{job=\"conduit-controller\"}", + "expr": "process_resident_memory_bytes{job=\"conduit-controller\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{component}}", + "legendFormat": "resident/{{component}}", "refId": "A" + }, + { + "expr": "process_virtual_memory_bytes{job=\"conduit-controller\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "virtual/{{component}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Threads", + "title": "Process Memory", "tooltip": { "shared": true, "sort": 2, @@ -866,8 +495,7 @@ }, "yaxes": [ { - "$$hashKey": "object:2175", - "format": "short", + "format": "decbytes", "label": null, "logBase": 1, "max": null, @@ -875,7 +503,6 @@ "show": true }, { - "$$hashKey": "object:2176", "format": "short", "label": null, "logBase": 1, @@ -890,179 +517,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": "prometheus", "fill": 1, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 45 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:172", - "expr": "go_gc_duration_seconds{job=\"conduit-controller\", quantile=\"0.5\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{component}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "GC Duration P50", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:711", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:712", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 45 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "$$hashKey": "object:172", - "expr": "go_gc_duration_seconds{job=\"conduit-controller\", quantile=\"0.75\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{component}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "GC Duration P75", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:711", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:712", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 54 + "h": 7, + "w": 8, + "x": 16, + "y": 15 }, "id": 7, "legend": { @@ -1088,7 +549,6 @@ "steppedLine": false, "targets": [ { - "$$hashKey": "object:172", "expr": "process_open_fds{job=\"conduit-controller\"}", "format": "time_series", "intervalFactor": 1, @@ -1115,7 +575,6 @@ }, "yaxes": [ { - "$$hashKey": "object:1706", "format": "short", "label": null, "logBase": 1, @@ -1124,7 +583,491 @@ "show": true }, { - "$$hashKey": "object:1707", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_threads{job=\"conduit-controller\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{component}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Threads", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(http_request_duration_seconds_bucket{job=\"conduit-controller\"}[20s])) by (le, component, code))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 {{component}}/{{code}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{job=\"conduit-controller\"}[20s])) by (le, component, code))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 {{component}}/{{code}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job=\"conduit-controller\"}[20s])) by (le, component, code))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 {{component}}/{{code}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 29, + "panels": [], + "repeat": "component", + "title": "", + "type": "row" + }, + { + "content": "
\n  \n $component\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 30, + "links": [], + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_alloc_bytes{job=\"conduit-controller\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "alloc/{{component}}", + "refId": "A" + }, + { + "expr": "irate(go_memstats_alloc_bytes_total{job=\"conduit-controller\", component=\"$component\"}[20s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "alloc rate/{{component}}", + "refId": "B" + }, + { + "expr": "go_memstats_stack_inuse_bytes{job=\"conduit-controller\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "stack/{{component}}", + "refId": "C" + }, + { + "expr": "go_memstats_heap_inuse_bytes{job=\"conduit-controller\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "heap/{{component}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Go Memstats", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_gc_duration_seconds{job=\"conduit-controller\", quantile=\"0.5\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 {{component}}", + "refId": "A" + }, + { + "expr": "go_gc_duration_seconds{job=\"conduit-controller\", quantile=\"0.75\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P75 {{component}}", + "refId": "B" + }, + { + "expr": "go_gc_duration_seconds{job=\"conduit-controller\", quantile=\"1\", component=\"$component\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Max {{component}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "GC Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(grpc_server_msg_sent_total{job=\"conduit-controller\", component=\"$component\"}[20s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "sent/{{component}}/{{grpc_method}}", + "refId": "A" + }, + { + "expr": "irate(grpc_server_msg_received_total{job=\"conduit-controller\", component=\"$component\"}[20s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "received/{{component}}/{{grpc_method}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "gRPC Message Volume", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { "format": "short", "label": null, "logBase": 1, @@ -1140,7 +1083,48 @@ "style": "dark", "tags": [], "templating": { - "list": [] + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "prometheus", + "hide": 2, + "includeAll": true, + "label": "Deployment", + "multi": false, + "name": "deployment", + "options": [], + "query": "label_values(request_total{conduit_io_control_plane_component!=\"\"}, deployment)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "prometheus", + "hide": 2, + "includeAll": true, + "label": "Component", + "multi": false, + "name": "component", + "options": [], + "query": "label_values(component)", + "refresh": 2, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] }, "time": { "from": "now-5m", @@ -1174,6 +1158,5 @@ "timezone": "", "title": "Conduit Health", "uid": "Og9nanzmk", - "version": 3 + "version": 1 } - diff --git a/grafana/dashboards/top-line.json b/grafana/dashboards/top-line.json index 54ab7e5e2..1493cb680 100644 --- a/grafana/dashboards/top-line.json +++ b/grafana/dashboards/top-line.json @@ -16,7 +16,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1523048893004, + "iteration": 1523665070192, "links": [], "panels": [ { @@ -35,6 +35,170 @@ "transparent": true, "type": "text" }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "prometheus", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 3 + }, + "height": "", + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", deployment=~\"$deployment\"}[20s])) / sum(irate(response_total{deployment=~\"$deployment\"}[20s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": ".9,.99", + "title": "GLOBAL SUCCESS RATE", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "prometheus", + "format": "rps", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 3 + }, + "height": "", + "id": 29, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(request_total{deployment=~\"$deployment\"}[20s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "GLOBAL REQUEST VOLUME", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, { "cacheTimeout": null, "colorBackground": false, @@ -55,8 +219,8 @@ }, "gridPos": { "h": 4, - "w": 6, - "x": 0, + "w": 4, + "x": 16, "y": 3 }, "height": "", @@ -137,8 +301,8 @@ }, "gridPos": { "h": 4, - "w": 6, - "x": 6, + "w": 4, + "x": 20, "y": 3 }, "height": "", @@ -181,7 +345,7 @@ { "expr": "count(count(request_total{namespace=~\"$namespace\"}) by (namespace, deployment))", "format": "time_series", - "intervalFactor": 2, + "intervalFactor": 1, "legendFormat": "", "refId": "A" } @@ -199,170 +363,6 @@ ], "valueName": "current" }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": true, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "prometheus", - "format": "percentunit", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 3 - }, - "height": "", - "id": 28, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(response_total{classification=\"success\", deployment=~\"$deployment\"}[20s])) / sum(irate(response_total{deployment=~\"$deployment\"}[20s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": ".9,.99", - "title": "Global Success Rate", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": "prometheus", - "format": "rps", - "gauge": { - "maxValue": 1, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 3 - }, - "height": "", - "id": 29, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": true, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(request_total{deployment=~\"$deployment\"}[20s]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "", - "title": "Global Request Volume", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, { "content": "
\n TOP LINE\n
", "gridPos": { @@ -419,14 +419,14 @@ "expr": "sum(irate(response_total{classification=\"success\", namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (namespace) / sum(irate(response_total{namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (namespace)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{namespace}}", + "legendFormat": "ns/{{namespace}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Success Rate", + "title": "SUCCESS RATE", "tooltip": { "shared": true, "sort": 2, @@ -445,7 +445,7 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": null, "show": true }, @@ -499,14 +499,14 @@ "expr": "sum(irate(request_total{namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (namespace)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{namespace}}", + "legendFormat": "ns/{{namespace}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Request Volume", + "title": "REQUEST VOLUME", "tooltip": { "shared": true, "sort": 1, @@ -526,7 +526,7 @@ "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -575,32 +575,18 @@ "stack": false, "steppedLine": false, "targets": [ - { - "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (le, namespace))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "p50 {{namespace}}", - "refId": "A" - }, { "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (le, namespace))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "p95 {{namespace}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=~\"$namespace\", direction=\"inbound\"}[20s])) by (le, namespace))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "p99 {{namespace}}", - "refId": "C" + "legendFormat": "p95 ns/{{namespace}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Latency", + "title": "P95 LATENCY", "tooltip": { "shared": true, "sort": 2, @@ -634,7 +620,7 @@ ] }, { - "content": "
\n SERVICE METRICS\n
", + "content": "
\n NAMESPACES\n
", "gridPos": { "h": 2, "w": 24, @@ -664,7 +650,7 @@ "type": "row" }, { - "content": "
\n  \n \n $namespace\n \n
", + "content": "
\n  \n \n ns/$namespace\n \n
", "gridPos": { "h": 2, "w": 24, @@ -719,14 +705,14 @@ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (deployment) / sum(irate(response_total{namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{deployment}}", + "legendFormat": "deploy/{{deployment}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Success Rate", + "title": "SUCCESS RATE", "tooltip": { "shared": true, "sort": 2, @@ -745,7 +731,7 @@ "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": 1, "min": null, "show": true }, @@ -792,21 +778,21 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(irate(request_total{namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{deployment}}", + "legendFormat": "deploy/{{deployment}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Request Volume", + "title": "REQUEST VOLUME", "tooltip": { "shared": true, "sort": 2, @@ -826,7 +812,7 @@ "label": null, "logBase": 1, "max": null, - "min": null, + "min": 0, "show": true }, { @@ -875,35 +861,19 @@ "stack": false, "steppedLine": false, "targets": [ - { - "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (le, deployment))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "p50 {{deployment}}", - "refId": "A" - }, { "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (le, deployment))", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "p95 {{deployment}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\"}[20s])) by (le, deployment))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "p99 {{deployment}}", - "refId": "C" + "legendFormat": "p95 deploy/{{deployment}}", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Latency", + "title": "P95 LATENCY", "tooltip": { "shared": true, "sort": 2,