Add TCP stats to deployment dashboards (#824)

This PR adds the TCP metrics added in #785 and #790 to the Grafana deployment dashboards. I've added three new charts in the "Inbound Traffic" and "Outbound Traffic" headings:
+ "TCP Connection Failures": plots the number of failed TCP connections over time
+ "TCP Connections Open": shows the number of accepted and opened connections currently open
+ "TCP Connection Duration": a heatmap of connection durations over time

I'm planning on adding similar graphs to other dashboards as well in subsequent PRs.
This commit is contained in:
Eliza Weisman 2018-04-25 16:26:43 -07:00 committed by GitHub
parent fbacdd8a05
commit d55e334a42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 809 additions and 49 deletions

View File

@ -16,7 +16,7 @@
"gnetId": null,
"graphTooltip": 1,
"id": null,
"iteration": 1523660761961,
"iteration": 1524697688078,
"links": [],
"panels": [
{
@ -640,13 +640,259 @@
}
]
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 15
},
"id": 148,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 16
},
"id": 167,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "tcp_close_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\",classification=\"failure\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}} {{classification}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "TCP CONNECTION FAILURES",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "none",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 16
},
"id": 168,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "tcp_open_connections{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "TCP CONNECTIONS OPEN",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cards": {
"cardPadding": null,
"cardRound": null
},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "timeseries",
"datasource": null,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 16
},
"heatmap": {},
"highlightCards": true,
"id": 169,
"legend": {
"show": false
},
"links": [],
"targets": [
{
"expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
}
],
"title": "TCP CONNECTION DURATION",
"tooltip": {
"show": true,
"showHistogram": true
},
"type": "heatmap",
"xAxis": {
"show": true
},
"xBucketNumber": null,
"xBucketSize": null,
"yAxis": {
"decimals": null,
"format": "dtdurationms",
"logBase": 1,
"max": null,
"min": "0",
"show": true,
"splitFactor": null
},
"yBucketNumber": null,
"yBucketSize": null
}
],
"title": "Inbound TCP Metrics",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 16
},
"id": 152,
"panels": [],
"title": "",
"type": "row"
},
{
"content": "<div class=\"text-center dashboard-header\">\n <span>INBOUND DEPLOYMENTS</span>\n</div>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 15
"y": 17
},
"id": 76,
"links": [],
@ -661,7 +907,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 17
"y": 19
},
"id": 59,
"panels": [
@ -671,7 +917,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 18
"y": 43
},
"id": 39,
"links": [],
@ -691,7 +937,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 20
"y": 45
},
"id": 36,
"legend": {
@ -773,7 +1019,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 20
"y": 45
},
"id": 22,
"legend": {
@ -854,7 +1100,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 20
"y": 45
},
"id": 29,
"legend": {
@ -948,7 +1194,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 18
"y": 20
},
"id": 34,
"panels": [],
@ -962,7 +1208,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 19
"y": 21
},
"id": 32,
"links": [],
@ -982,7 +1228,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 21
"y": 23
},
"id": 77,
"legend": {
@ -1063,7 +1309,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 21
"y": 23
},
"id": 78,
"legend": {
@ -1143,7 +1389,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 21
"y": 23
},
"id": 79,
"legend": {
@ -1212,13 +1458,259 @@
}
]
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 154,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 31
},
"id": 157,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "tcp_close_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\",classification=\"failure\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}} {{classification}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "TCP CONNECTION FAILURES",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "none",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 31
},
"id": 166,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "tcp_open_connections{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "TCP CONNECTIONS OPEN",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cards": {
"cardPadding": null,
"cardRound": null
},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "timeseries",
"datasource": null,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 31
},
"heatmap": {},
"highlightCards": true,
"id": 160,
"legend": {
"show": false
},
"links": [],
"targets": [
{
"expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
}
],
"title": "TCP CONNECTION DURATION",
"tooltip": {
"show": true,
"showHistogram": true
},
"type": "heatmap",
"xAxis": {
"show": true
},
"xBucketNumber": null,
"xBucketSize": null,
"yAxis": {
"decimals": null,
"format": "dtdurationms",
"logBase": 1,
"max": null,
"min": "0",
"show": true,
"splitFactor": null
},
"yBucketNumber": null,
"yBucketSize": null
}
],
"title": "Outbound TCP Metrics",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 31
},
"id": 156,
"panels": [],
"title": "",
"type": "row"
},
{
"content": "<div class=\"text-center dashboard-header\">\n <span>OUTBOUND DEPLOYMENTS</span>\n</div>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 28
"y": 32
},
"id": 80,
"links": [],
@ -1233,7 +1725,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 30
"y": 34
},
"id": 27,
"panels": [
@ -1633,5 +2125,5 @@
"timezone": "",
"title": "Conduit Deployment",
"uid": "6svnwykmk",
"version": 1
"version": 3
}

View File

@ -16,31 +16,16 @@
"gnetId": null,
"graphTooltip": 1,
"id": null,
"iteration": 1523662094618,
"iteration": 1524697499292,
"links": [],
"panels": [
{
"content": "<div class=\"text-center dashboard-header\">\n <span>Control-Plane Proxy Telemetry</span>\n</div>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 0
},
"id": 17,
"links": [],
"mode": "html",
"title": "",
"transparent": true,
"type": "text"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 2
"y": 0
},
"id": 19,
"panels": [],
@ -54,7 +39,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 3
"y": 1
},
"id": 21,
"links": [],
@ -74,7 +59,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 5
"y": 3
},
"id": 23,
"legend": {
@ -154,7 +139,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 5
"y": 3
},
"id": 24,
"legend": {
@ -234,7 +219,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 5
"y": 3
},
"id": 25,
"legend": {
@ -323,7 +308,290 @@
"h": 1,
"w": 24,
"x": 0,
"y": 12
"y": 10
},
"id": 339,
"panels": [],
"title": "",
"type": "row"
},
{
"content": "<div class=\"text-center dashboard-header\">\n <span>Control-Plane TCP Metrics</span>\n</div>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 11
},
"id": 340,
"links": [],
"mode": "html",
"title": "",
"transparent": true,
"type": "text"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 13
},
"id": 179,
"panels": [
{
"content": "<div>\n <img src=\"https://conduit.io/favicon.png\" style=\"baseline; height:40px;\"/>&nbsp;\n <span style=\"font-size: 15px; border-image:none\">deploy/$deployment</span>\n</div>",
"gridPos": {
"h": 2,
"w": 24,
"x": 0,
"y": 44
},
"id": 282,
"links": [],
"mode": "html",
"repeatedByRow": true,
"title": "",
"transparent": true,
"type": "text"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 0,
"y": 46
},
"id": 227,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "tcp_close_total{deployment=\"$deployment\", direction=\"inbound\",classification=\"failure\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "TCP CONNECTION FAILURES",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"gridPos": {
"h": 7,
"w": 8,
"x": 8,
"y": 46
},
"id": 132,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": false,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "tcp_open_connections{deployment=\"$deployment\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{peer}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "OPEN TCP CONNECTIONS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cards": {
"cardPadding": null,
"cardRound": null
},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateOranges",
"exponent": 0.5,
"mode": "spectrum"
},
"dataFormat": "timeseries",
"datasource": null,
"gridPos": {
"h": 7,
"w": 8,
"x": 16,
"y": 46
},
"heatmap": {},
"highlightCards": true,
"id": 229,
"legend": {
"show": false
},
"links": [],
"scopedVars": {
"deployment": {
"selected": false,
"text": "controller",
"value": "controller"
}
},
"targets": [
{
"expr": "tcp_connection_duration_ms_bucket{deployment=\"$deployment\", direction=\"inbound\"}",
"format": "time_series",
"intervalFactor": 1,
"refId": "A"
}
],
"title": "TCP CONNECTION DURATION",
"tooltip": {
"show": true,
"showHistogram": true
},
"type": "heatmap",
"xAxis": {
"show": true
},
"xBucketNumber": null,
"xBucketSize": null,
"yAxis": {
"decimals": null,
"format": "short",
"logBase": 1,
"max": null,
"min": null,
"show": true,
"splitFactor": null
},
"yBucketNumber": null,
"yBucketSize": null
}
],
"repeat": "deployment",
"title": "$deployment",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 14
},
"id": 90,
"panels": [],
@ -336,7 +604,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 13
"y": 15
},
"id": 27,
"links": [],
@ -356,7 +624,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 15
"y": 17
},
"id": 2,
"legend": {
@ -436,7 +704,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 15
"y": 17
},
"id": 5,
"legend": {
@ -523,7 +791,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 15
"y": 17
},
"id": 7,
"legend": {
@ -603,7 +871,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 22
"y": 24
},
"id": 9,
"legend": {
@ -683,7 +951,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 22
"y": 24
},
"id": 12,
"legend": {
@ -772,7 +1040,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 29
"y": 31
},
"id": 29,
"panels": [],
@ -786,7 +1054,7 @@
"h": 2,
"w": 24,
"x": 0,
"y": 30
"y": 32
},
"id": 30,
"links": [],
@ -806,7 +1074,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 32
"y": 34
},
"id": 6,
"legend": {
@ -907,7 +1175,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 32
"y": 34
},
"id": 8,
"legend": {
@ -1001,7 +1269,7 @@
"h": 7,
"w": 8,
"x": 16,
"y": 32
"y": 34
},
"id": 14,
"legend": {
@ -1158,5 +1426,5 @@
"timezone": "",
"title": "Conduit Health",
"uid": "Og9nanzmk",
"version": 1
"version": 2
}