client-go/rpc/client.go

583 lines
16 KiB
Go

// Copyright 2016 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package rpc
import (
"context"
"io"
"strconv"
"sync"
"sync/atomic"
"time"
grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing"
grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"github.com/pingcap/kvproto/pkg/coprocessor"
"github.com/pingcap/kvproto/pkg/tikvpb"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/tikv/client-go/config"
"github.com/tikv/client-go/metrics"
"google.golang.org/grpc"
gcodes "google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/keepalive"
gstatus "google.golang.org/grpc/status"
)
// Client is a client that sends RPC.
// It should not be used after calling Close().
type Client interface {
// Close should release all data.
Close() error
// SendRequest sends Request.
SendRequest(ctx context.Context, addr string, req *Request, timeout time.Duration) (*Response, error)
}
type connArray struct {
conf *config.RPC
index uint32
conns []*grpc.ClientConn
// Bind with a background goroutine to process coprocessor streaming timeout.
streamTimeout chan *Lease
// For batch commands.
batchCommandsCh chan *batchCommandsEntry
batchCommandsClients []*batchCommandsClient
transportLayerLoad uint64
}
type batchCommandsClient struct {
conf *config.Batch
conn *grpc.ClientConn
client tikvpb.Tikv_BatchCommandsClient
batched sync.Map
idAlloc uint64
transportLayerLoad *uint64
// Indicates the batch client is closed explicitly or not.
closed int32
// Protect client when re-create the streaming.
clientLock sync.Mutex
}
func (c *batchCommandsClient) isStopped() bool {
return atomic.LoadInt32(&c.closed) != 0
}
func (c *batchCommandsClient) failPendingRequests(err error) {
c.batched.Range(func(key, value interface{}) bool {
id, _ := key.(uint64)
entry, _ := value.(*batchCommandsEntry)
entry.err = err
close(entry.res)
c.batched.Delete(id)
return true
})
}
func (c *batchCommandsClient) batchRecvLoop() {
defer func() {
if r := recover(); r != nil {
log.Errorf("batchRecvLoop %v", r)
log.Infof("Restart batchRecvLoop")
go c.batchRecvLoop()
}
}()
for {
// When `conn.Close()` is called, `client.Recv()` will return an error.
resp, err := c.client.Recv()
if err != nil {
if c.isStopped() {
return
}
log.Errorf("batchRecvLoop error when receive: %v", err)
// Hold the lock to forbid batchSendLoop using the old client.
c.clientLock.Lock()
c.failPendingRequests(err) // fail all pending requests.
for { // try to re-create the streaming in the loop.
// Re-establish a application layer stream. TCP layer is handled by gRPC.
tikvClient := tikvpb.NewTikvClient(c.conn)
streamClient, err := tikvClient.BatchCommands(context.TODO())
if err == nil {
log.Infof("batchRecvLoop re-create streaming success")
c.client = streamClient
break
}
log.Errorf("batchRecvLoop re-create streaming fail: %v", err)
// TODO: Use a more smart backoff strategy.
time.Sleep(time.Second)
}
c.clientLock.Unlock()
continue
}
responses := resp.GetResponses()
for i, requestID := range resp.GetRequestIds() {
value, ok := c.batched.Load(requestID)
if !ok {
// There shouldn't be any unknown responses because if the old entries
// are cleaned by `failPendingRequests`, the stream must be re-created
// so that old responses will be never received.
panic("batchRecvLoop receives a unknown response")
}
entry := value.(*batchCommandsEntry)
if atomic.LoadInt32(&entry.canceled) == 0 {
// Put the response only if the request is not canceled.
entry.res <- responses[i]
}
c.batched.Delete(requestID)
}
transportLayerLoad := resp.GetTransportLayerLoad()
if transportLayerLoad > 0.0 && c.conf.MaxWaitTime > 0 {
// We need to consider TiKV load only if batch-wait strategy is enabled.
atomic.StoreUint64(c.transportLayerLoad, transportLayerLoad)
}
}
}
func newConnArray(addr string, conf *config.RPC) (*connArray, error) {
a := &connArray{
conf: conf,
index: 0,
conns: make([]*grpc.ClientConn, conf.GrpcMaxCallMsgSize),
streamTimeout: make(chan *Lease, 1024),
batchCommandsCh: make(chan *batchCommandsEntry, conf.Batch.MaxBatchSize),
batchCommandsClients: make([]*batchCommandsClient, 0, conf.GrpcMaxCallMsgSize),
transportLayerLoad: 0,
}
if err := a.Init(addr); err != nil {
return nil, err
}
return a, nil
}
func (a *connArray) Init(addr string) error {
opt := grpc.WithInsecure()
if len(a.conf.Security.SSLCA) != 0 {
tlsConfig, err := a.conf.Security.ToTLSConfig()
if err != nil {
return err
}
opt = grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))
}
unaryInterceptor := grpc_prometheus.UnaryClientInterceptor
streamInterceptor := grpc_prometheus.StreamClientInterceptor
if a.conf.EnableOpenTracing {
unaryInterceptor = grpc_middleware.ChainUnaryClient(
unaryInterceptor,
grpc_opentracing.UnaryClientInterceptor(),
)
streamInterceptor = grpc_middleware.ChainStreamClient(
streamInterceptor,
grpc_opentracing.StreamClientInterceptor(),
)
}
allowBatch := a.conf.Batch.MaxBatchSize > 0
for i := range a.conns {
ctx, cancel := context.WithTimeout(context.Background(), a.conf.DialTimeout)
conn, err := grpc.DialContext(
ctx,
addr,
opt,
grpc.WithInitialWindowSize(int32(a.conf.GrpcInitialWindowSize)),
grpc.WithInitialConnWindowSize(int32(a.conf.GrpcInitialConnWindowSize)),
grpc.WithUnaryInterceptor(unaryInterceptor),
grpc.WithStreamInterceptor(streamInterceptor),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(a.conf.GrpcMaxCallMsgSize)),
grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(a.conf.GrpcMaxSendMsgSize)),
grpc.WithBackoffMaxDelay(time.Second*3),
grpc.WithKeepaliveParams(keepalive.ClientParameters{
Time: a.conf.GrpcKeepAliveTime,
Timeout: a.conf.GrpcKeepAliveTimeout,
PermitWithoutStream: true,
}),
)
cancel()
if err != nil {
// Cleanup if the initialization fails.
a.Close()
return errors.WithStack(err)
}
a.conns[i] = conn
if allowBatch {
// Initialize batch streaming clients.
tikvClient := tikvpb.NewTikvClient(conn)
streamClient, err := tikvClient.BatchCommands(context.TODO())
if err != nil {
a.Close()
return errors.WithStack(err)
}
batchClient := &batchCommandsClient{
conf: &a.conf.Batch,
conn: conn,
client: streamClient,
batched: sync.Map{},
idAlloc: 0,
transportLayerLoad: &a.transportLayerLoad,
closed: 0,
}
a.batchCommandsClients = append(a.batchCommandsClients, batchClient)
go batchClient.batchRecvLoop()
}
}
go CheckStreamTimeoutLoop(a.streamTimeout)
if allowBatch {
go a.batchSendLoop()
}
return nil
}
func (a *connArray) Get() *grpc.ClientConn {
next := atomic.AddUint32(&a.index, 1) % uint32(len(a.conns))
return a.conns[next]
}
func (a *connArray) Close() {
// Close all batchRecvLoop.
for _, c := range a.batchCommandsClients {
// After connections are closed, `batchRecvLoop`s will check the flag.
atomic.StoreInt32(&c.closed, 1)
}
close(a.batchCommandsCh)
for i, c := range a.conns {
if c != nil {
c.Close()
a.conns[i] = nil
}
}
close(a.streamTimeout)
}
type batchCommandsEntry struct {
req *tikvpb.BatchCommandsRequest_Request
res chan *tikvpb.BatchCommandsResponse_Response
// Indicated the request is canceled or not.
canceled int32
err error
}
// fetchAllPendingRequests fetches all pending requests from the channel.
func fetchAllPendingRequests(
ch chan *batchCommandsEntry,
maxBatchSize int,
entries *[]*batchCommandsEntry,
requests *[]*tikvpb.BatchCommandsRequest_Request,
) {
// Block on the first element.
headEntry := <-ch
if headEntry == nil {
return
}
*entries = append(*entries, headEntry)
*requests = append(*requests, headEntry.req)
// This loop is for trying best to collect more requests.
for len(*entries) < maxBatchSize {
select {
case entry := <-ch:
if entry == nil {
return
}
*entries = append(*entries, entry)
*requests = append(*requests, entry.req)
default:
return
}
}
}
// fetchMorePendingRequests fetches more pending requests from the channel.
func fetchMorePendingRequests(
ch chan *batchCommandsEntry,
maxBatchSize int,
batchWaitSize int,
maxWaitTime time.Duration,
entries *[]*batchCommandsEntry,
requests *[]*tikvpb.BatchCommandsRequest_Request,
) {
waitStart := time.Now()
// Try to collect `batchWaitSize` requests, or wait `maxWaitTime`.
after := time.NewTimer(maxWaitTime)
for len(*entries) < batchWaitSize {
select {
case entry := <-ch:
if entry == nil {
return
}
*entries = append(*entries, entry)
*requests = append(*requests, entry.req)
case waitEnd := <-after.C:
metrics.BatchWaitDuration.Observe(float64(waitEnd.Sub(waitStart)))
return
}
}
after.Stop()
// Do an additional non-block try.
for len(*entries) < maxBatchSize {
select {
case entry := <-ch:
if entry == nil {
return
}
*entries = append(*entries, entry)
*requests = append(*requests, entry.req)
default:
metrics.BatchWaitDuration.Observe(float64(time.Since(waitStart)))
return
}
}
}
func (a *connArray) batchSendLoop() {
defer func() {
if r := recover(); r != nil {
log.Errorf("batchSendLoop %v", r)
log.Infof("Restart batchSendLoop")
go a.batchSendLoop()
}
}()
conf := &a.conf.Batch
entries := make([]*batchCommandsEntry, 0, conf.MaxBatchSize)
requests := make([]*tikvpb.BatchCommandsRequest_Request, 0, conf.MaxBatchSize)
requestIDs := make([]uint64, 0, conf.MaxBatchSize)
for {
// Choose a connection by round-robbin.
next := atomic.AddUint32(&a.index, 1) % uint32(len(a.conns))
batchCommandsClient := a.batchCommandsClients[next]
entries = entries[:0]
requests = requests[:0]
requestIDs = requestIDs[:0]
metrics.PendingBatchRequests.Set(float64(len(a.batchCommandsCh)))
fetchAllPendingRequests(a.batchCommandsCh, int(conf.MaxBatchSize), &entries, &requests)
if len(entries) < int(conf.MaxBatchSize) && conf.MaxWaitTime > 0 {
transportLayerLoad := atomic.LoadUint64(batchCommandsClient.transportLayerLoad)
// If the target TiKV is overload, wait a while to collect more requests.
if uint(transportLayerLoad) >= conf.OverloadThreshold {
fetchMorePendingRequests(
a.batchCommandsCh, int(conf.MaxBatchSize), int(conf.MaxWaitSize),
conf.MaxWaitTime, &entries, &requests,
)
}
}
length := len(requests)
maxBatchID := atomic.AddUint64(&batchCommandsClient.idAlloc, uint64(length))
for i := 0; i < length; i++ {
requestID := uint64(i) + maxBatchID - uint64(length)
requestIDs = append(requestIDs, requestID)
}
request := &tikvpb.BatchCommandsRequest{
Requests: requests,
RequestIds: requestIDs,
}
// Use the lock to protect the stream client won't be replaced by RecvLoop,
// and new added request won't be removed by `failPendingRequests`.
batchCommandsClient.clientLock.Lock()
for i, requestID := range request.RequestIds {
batchCommandsClient.batched.Store(requestID, entries[i])
}
err := batchCommandsClient.client.Send(request)
batchCommandsClient.clientLock.Unlock()
if err != nil {
log.Errorf("batch commands send error: %v", err)
batchCommandsClient.failPendingRequests(err)
}
}
}
// rpcClient is RPC client struct.
// TODO: Add flow control between RPC clients in TiDB ond RPC servers in TiKV.
// Since we use shared client connection to communicate to the same TiKV, it's possible
// that there are too many concurrent requests which overload the service of TiKV.
// TODO: Implement background cleanup. It adds a background goroutine to periodically check
// whether there is any connection is idle and then close and remove these idle connections.
type rpcClient struct {
sync.RWMutex
isClosed bool
conns map[string]*connArray
conf *config.RPC
}
// NewRPCClient manages connections and rpc calls with tikv-servers.
func NewRPCClient(conf *config.RPC) Client {
return &rpcClient{
conns: make(map[string]*connArray),
conf: conf,
}
}
func (c *rpcClient) getConnArray(addr string) (*connArray, error) {
c.RLock()
if c.isClosed {
c.RUnlock()
return nil, errors.Errorf("rpcClient is closed")
}
array, ok := c.conns[addr]
c.RUnlock()
if !ok {
var err error
array, err = c.createConnArray(addr)
if err != nil {
return nil, err
}
}
return array, nil
}
func (c *rpcClient) createConnArray(addr string) (*connArray, error) {
c.Lock()
defer c.Unlock()
array, ok := c.conns[addr]
if !ok {
var err error
array, err = newConnArray(addr, c.conf)
if err != nil {
return nil, err
}
c.conns[addr] = array
}
return array, nil
}
func (c *rpcClient) closeConns() {
c.Lock()
if !c.isClosed {
c.isClosed = true
// close all connections
for _, array := range c.conns {
array.Close()
}
}
c.Unlock()
}
func sendBatchRequest(
ctx context.Context,
addr string,
connArray *connArray,
req *tikvpb.BatchCommandsRequest_Request,
timeout time.Duration,
) (*Response, error) {
entry := &batchCommandsEntry{
req: req,
res: make(chan *tikvpb.BatchCommandsResponse_Response, 1),
canceled: 0,
err: nil,
}
ctx1, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
select {
case connArray.batchCommandsCh <- entry:
case <-ctx1.Done():
log.Warnf("SendRequest to %s is timeout", addr)
return nil, errors.WithStack(gstatus.Error(gcodes.DeadlineExceeded, "Canceled or timeout"))
}
select {
case res, ok := <-entry.res:
if !ok {
return nil, errors.WithStack(entry.err)
}
return FromBatchCommandsResponse(res), nil
case <-ctx1.Done():
atomic.StoreInt32(&entry.canceled, 1)
log.Warnf("SendRequest to %s is canceled", addr)
return nil, errors.WithStack(gstatus.Error(gcodes.DeadlineExceeded, "Canceled or timeout"))
}
}
// SendRequest sends a Request to server and receives Response.
func (c *rpcClient) SendRequest(ctx context.Context, addr string, req *Request, timeout time.Duration) (*Response, error) {
start := time.Now()
reqType := req.Type.String()
storeID := strconv.FormatUint(req.Context.GetPeer().GetStoreId(), 10)
defer func() {
metrics.SendReqHistogram.WithLabelValues(reqType, storeID).Observe(time.Since(start).Seconds())
}()
connArray, err := c.getConnArray(addr)
if err != nil {
return nil, err
}
if c.conf.Batch.MaxBatchSize > 0 {
if batchReq := req.ToBatchCommandsRequest(); batchReq != nil {
return sendBatchRequest(ctx, addr, connArray, batchReq, timeout)
}
}
client := tikvpb.NewTikvClient(connArray.Get())
if req.Type != CmdCopStream {
ctx1, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
return CallRPC(ctx1, client, req)
}
// Coprocessor streaming request.
// Use context to support timeout for grpc streaming client.
ctx1, cancel := context.WithCancel(ctx)
defer cancel()
resp, err := CallRPC(ctx1, client, req)
if err != nil {
return nil, err
}
// Put the lease object to the timeout channel, so it would be checked periodically.
copStream := resp.CopStream
copStream.Timeout = timeout
copStream.Lease.Cancel = cancel
connArray.streamTimeout <- &copStream.Lease
// Read the first streaming response to get CopStreamResponse.
// This can make error handling much easier, because SendReq() retry on
// region error automatically.
var first *coprocessor.Response
first, err = copStream.Recv()
if err != nil {
if errors.Cause(err) != io.EOF {
return nil, err
}
log.Debug("copstream returns nothing for the request.")
}
copStream.Response = first
return resp, nil
}
func (c *rpcClient) Close() error {
c.closeConns()
return nil
}