Support metrics with multiple TiSessions with the same port (#220) (#222)

Signed-off-by: marsishandsome <marsishandsome@gmail.com>
This commit is contained in:
Liangliang Gu 2021-06-29 15:44:06 +08:00 committed by GitHub
parent ccdce8f247
commit 8210eaf94d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 664 additions and 37 deletions

View File

@ -98,6 +98,16 @@ The following includes JVM related parameters.
- timeout of scan/delete range grpc request
- default: 20s
### Metrics Parameter
#### tikv.metrics.enable
- whether to enable metrics exporting
- default: false
#### tikv.metrics.port
- the metrics exporting http port
- default: 3140
### ThreadPool Parameter
The following includes ThreadPool related parameters, which can be passed in through JVM parameters.
@ -123,5 +133,39 @@ The following includes ThreadPool related parameters, which can be passed in thr
- default: 20
## Metrics
Client Java supports exporting metrics to Prometheus using poll mode and viewing on Grafana. The following steps shows how to enable this function.
### Step 1: Enable metrics exporting
- set the config `tikv.metrics.enable` to `true`
- call TiConfiguration.setMetricsEnable(true)
### Step 2: Set the metrics port
- set the config `tikv.metrics.port`
- call TiConfiguration.setMetricsPort
Default port is 3140.
### Step 3: Config Prometheus
Add the following config to `conf/prometheus.yml` and restart Prometheus.
```yaml
- job_name: "tikv-client"
honor_labels: true
static_configs:
- targets:
- '127.0.0.1:3140'
- '127.0.0.2:3140'
- '127.0.0.3:3140'
```
### Step 4: Config Grafana
Import the [Client-Java-Summary dashboard config](/metrics/grafana/client_java_summary.json) to Grafana.
## License
Apache 2.0 license. See the [LICENSE](./LICENSE) file for details.
Apache 2.0 license. See the [LICENSE](./LICENSE) file for details.

View File

@ -0,0 +1,439 @@
{
"__inputs": [
{
"name": "DS_TEST-CLUSTER",
"label": "test-cluster",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "6.1.6"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(client_java_raw_requests_latency_count[1m])) by (type)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Client QPS",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 4,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(client_java_raw_requests_latency_sum[1m])) by (type) / sum(rate(client_java_raw_requests_latency_count[1m])) by (type)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Client Avg Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"id": 8,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(client_java_raw_requests_failure_total[1m])) by (type)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Client Failures",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "${DS_TEST-CLUSTER}",
"fill": 1,
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 9
},
"id": 7,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(client_java_grpc_raw_requests_latency_sum[1m])) by (type) / sum(rate(client_java_grpc_raw_requests_latency_count[1m])) by (type)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{type}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Client gRPC Avg Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"schemaVersion": 18,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Client-Java-Summary",
"uid": "000000911",
"version": 1
}

View File

@ -0,0 +1,93 @@
/*
* Copyright 2021 PingCAP, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.tikv.common;
import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.exporter.HTTPServer;
import java.net.InetSocketAddress;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.tikv.common.policy.RetryPolicy;
import org.tikv.common.region.RegionManager;
import org.tikv.common.region.RegionStoreClient;
import org.tikv.raw.RawKVClient;
public class MetricsServer {
private static final Logger logger = LoggerFactory.getLogger(MetricsServer.class);
private static MetricsServer METRICS_SERVER_INSTANCE = null;
private static int metricsServerRefCount = 0;
private int port;
private HTTPServer server;
private CollectorRegistry collectorRegistry;
public static MetricsServer getInstance(TiConfiguration conf) {
if (!conf.isMetricsEnable()) {
return null;
}
synchronized (MetricsServer.class) {
int port = conf.getMetricsPort();
if (METRICS_SERVER_INSTANCE != null) {
if (port != METRICS_SERVER_INSTANCE.port) {
throw new IllegalArgumentException(
String.format(
"Do dot support multiple tikv.metrics.port, which are %d and %d",
port, METRICS_SERVER_INSTANCE.port));
}
} else {
METRICS_SERVER_INSTANCE = new MetricsServer(port);
}
metricsServerRefCount += 1;
return METRICS_SERVER_INSTANCE;
}
}
private MetricsServer(int port) {
try {
this.collectorRegistry = new CollectorRegistry();
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_LATENCY);
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_FAILURE);
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_SUCCESS);
this.collectorRegistry.register(RegionStoreClient.GRPC_RAW_REQUEST_LATENCY);
this.collectorRegistry.register(RetryPolicy.GRPC_SINGLE_REQUEST_LATENCY);
this.collectorRegistry.register(RegionManager.GET_REGION_BY_KEY_REQUEST_LATENCY);
this.collectorRegistry.register(PDClient.PD_GET_REGION_BY_KEY_REQUEST_LATENCY);
this.port = port;
this.server = new HTTPServer(new InetSocketAddress(port), this.collectorRegistry, true);
logger.info("http server is up " + this.server.getPort());
} catch (Exception e) {
logger.error("http server not up");
throw new RuntimeException(e);
}
}
public void close() {
synchronized (MetricsServer.class) {
if (metricsServerRefCount == 1) {
if (server != null) {
server.stop();
logger.info("Metrics server on " + server.getPort() + " is stopped");
}
METRICS_SERVER_INSTANCE = null;
}
if (metricsServerRefCount >= 1) {
metricsServerRefCount -= 1;
}
}
}
}

View File

@ -20,9 +20,6 @@ import static org.tikv.common.util.ClientUtils.groupKeysByRegion;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.ByteString;
import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.exporter.HTTPServer;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ -38,7 +35,6 @@ import org.tikv.common.event.CacheInvalidateEvent;
import org.tikv.common.exception.TiKVException;
import org.tikv.common.key.Key;
import org.tikv.common.meta.TiTimestamp;
import org.tikv.common.policy.RetryPolicy;
import org.tikv.common.region.RegionManager;
import org.tikv.common.region.RegionStoreClient;
import org.tikv.common.region.RegionStoreClient.RegionStoreClientBuilder;
@ -74,37 +70,14 @@ public class TiSession implements AutoCloseable {
private volatile boolean enableGrpcForward;
private volatile RegionStoreClient.RegionStoreClientBuilder clientBuilder;
private boolean isClosed = false;
private HTTPServer server;
private CollectorRegistry collectorRegistry;
private MetricsServer metricsServer;
public TiSession(TiConfiguration conf) {
this.conf = conf;
this.channelFactory = new ChannelFactory(conf.getMaxFrameSize());
this.client = PDClient.createRaw(conf, channelFactory);
this.enableGrpcForward = conf.getEnableGrpcForward();
if (conf.isMetricsEnable()) {
try {
this.collectorRegistry = new CollectorRegistry();
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_LATENCY);
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_FAILURE);
this.collectorRegistry.register(RawKVClient.RAW_REQUEST_SUCCESS);
this.collectorRegistry.register(RegionStoreClient.GRPC_RAW_REQUEST_LATENCY);
this.collectorRegistry.register(RetryPolicy.GRPC_SINGLE_REQUEST_LATENCY);
this.collectorRegistry.register(RegionManager.GET_REGION_BY_KEY_REQUEST_LATENCY);
this.collectorRegistry.register(PDClient.PD_GET_REGION_BY_KEY_REQUEST_LATENCY);
this.enableGrpcForward = conf.getEnableGrpcForward();
this.server =
new HTTPServer(
new InetSocketAddress(conf.getMetricsPort()), this.collectorRegistry, true);
logger.info("http server is up " + this.server.getPort());
} catch (Exception e) {
logger.error("http server not up");
throw new RuntimeException(e);
}
}
if (this.enableGrpcForward) {
logger.info("enable grpc forward for high available");
}
this.metricsServer = MetricsServer.getInstance(conf);
logger.info("TiSession initialized in " + conf.getKvMode() + " mode");
}
@ -353,10 +326,6 @@ public class TiSession implements AutoCloseable {
return channelFactory;
}
public CollectorRegistry getCollectorRegistry() {
return collectorRegistry;
}
/**
* This is used for setting call back function to invalidate cache information
*
@ -468,9 +437,8 @@ public class TiSession implements AutoCloseable {
return;
}
if (server != null) {
server.stop();
logger.info("Metrics server on " + server.getPort() + " is stopped");
if (metricsServer != null) {
metricsServer.close();
}
isClosed = true;

View File

@ -0,0 +1,83 @@
package org.tikv.raw;
import static org.junit.Assert.assertEquals;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.List;
import org.junit.After;
import org.junit.Test;
import org.tikv.common.TiConfiguration;
import org.tikv.common.TiSession;
public class MetricsTest {
private List<TiSession> sessionList = new ArrayList<>();
@After
public void tearDown() throws Exception {
for (TiSession tiSession : sessionList) {
if (tiSession != null) {
tiSession.close();
}
}
}
@Test
public void oneTiSession() throws Exception {
TiConfiguration conf = TiConfiguration.createRawDefault();
conf.setMetricsEnable(true);
TiSession session = TiSession.create(conf);
sessionList.add(session);
RawKVClient client = session.createRawClient();
client.put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v"));
ByteString result = client.get(ByteString.copyFromUtf8("k"));
assertEquals(result.toStringUtf8(), "v");
client.close();
session.close();
}
@Test
public void twoTiSession() throws Exception {
TiConfiguration conf = TiConfiguration.createRawDefault();
conf.setMetricsEnable(true);
TiSession session1 = TiSession.create(conf);
sessionList.add(session1);
RawKVClient client1 = session1.createRawClient();
client1.put(ByteString.copyFromUtf8("k1"), ByteString.copyFromUtf8("v1"));
TiSession session2 = TiSession.create(conf);
sessionList.add(session2);
RawKVClient client2 = session2.createRawClient();
client2.put(ByteString.copyFromUtf8("k2"), ByteString.copyFromUtf8("v2"));
client1.close();
session1.close();
ByteString result = client2.get(ByteString.copyFromUtf8("k2"));
assertEquals(result.toStringUtf8(), "v2");
client2.close();
session2.close();
}
@Test
public void twoTiSessionWithDifferentPort() {
TiConfiguration conf1 = TiConfiguration.createRawDefault();
conf1.setMetricsEnable(true);
conf1.setMetricsPort(12345);
TiSession session1 = TiSession.create(conf1);
sessionList.add(session1);
TiConfiguration conf2 = TiConfiguration.createRawDefault();
conf2.setMetricsEnable(true);
conf2.setMetricsPort(54321);
try {
TiSession.create(conf2);
assertEquals(1, 2);
} catch (IllegalArgumentException e) {
assertEquals(
"Do dot support multiple tikv.metrics.port, which are 54321 and 12345", e.getMessage());
}
}
}