mirror of https://github.com/grpc/grpc-go.git
444 lines
14 KiB
Go
444 lines
14 KiB
Go
/*
|
||
*
|
||
* Copyright 2020 gRPC authors.
|
||
*
|
||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
* you may not use this file except in compliance with the License.
|
||
* You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing, software
|
||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
* See the License for the specific language governing permissions and
|
||
* limitations under the License.
|
||
*
|
||
*/
|
||
|
||
package controller
|
||
|
||
import (
|
||
"context"
|
||
"fmt"
|
||
"time"
|
||
|
||
"github.com/golang/protobuf/proto"
|
||
"google.golang.org/grpc"
|
||
controllerversion "google.golang.org/grpc/xds/internal/xdsclient/controller/version"
|
||
xdsresourceversion "google.golang.org/grpc/xds/internal/xdsclient/controller/version"
|
||
"google.golang.org/grpc/xds/internal/xdsclient/load"
|
||
"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
|
||
)
|
||
|
||
// AddWatch adds a watch for an xDS resource given its type and name.
|
||
func (t *Controller) AddWatch(rType xdsresource.ResourceType, resourceName string) {
|
||
t.sendCh.Put(&watchAction{
|
||
rType: rType,
|
||
remove: false,
|
||
resource: resourceName,
|
||
})
|
||
}
|
||
|
||
// RemoveWatch cancels an already registered watch for an xDS resource
|
||
// given its type and name.
|
||
func (t *Controller) RemoveWatch(rType xdsresource.ResourceType, resourceName string) {
|
||
t.sendCh.Put(&watchAction{
|
||
rType: rType,
|
||
remove: true,
|
||
resource: resourceName,
|
||
})
|
||
}
|
||
|
||
// run starts an ADS stream (and backs off exponentially, if the previous
|
||
// stream failed without receiving a single reply) and runs the sender and
|
||
// receiver routines to send and receive data from the stream respectively.
|
||
func (t *Controller) run(ctx context.Context) {
|
||
sendDoneCh := make(chan struct{})
|
||
defer func() {
|
||
<-sendDoneCh
|
||
close(t.runDoneCh)
|
||
}()
|
||
go t.send(ctx, sendDoneCh)
|
||
|
||
// TODO: start a goroutine monitoring ClientConn's connectivity state, and
|
||
// report error (and log) when stats is transient failure.
|
||
|
||
retries := 0
|
||
lastStreamStartTime := time.Time{}
|
||
for ctx.Err() == nil {
|
||
dur := time.Until(lastStreamStartTime.Add(t.backoff(retries)))
|
||
if dur > 0 {
|
||
timer := time.NewTimer(dur)
|
||
select {
|
||
case <-timer.C:
|
||
case <-ctx.Done():
|
||
timer.Stop()
|
||
return
|
||
}
|
||
}
|
||
|
||
retries++
|
||
lastStreamStartTime = time.Now()
|
||
stream, err := t.vClient.NewStream(ctx, t.cc)
|
||
if err != nil {
|
||
t.updateHandler.NewConnectionError(err)
|
||
t.logger.Warningf("xds: ADS stream creation failed: %v", err)
|
||
continue
|
||
}
|
||
t.logger.Infof("ADS stream created")
|
||
|
||
select {
|
||
case <-t.streamCh:
|
||
default:
|
||
}
|
||
t.streamCh <- stream
|
||
if t.recv(stream) {
|
||
retries = 0
|
||
}
|
||
}
|
||
}
|
||
|
||
// send is a separate goroutine for sending watch requests on the xds stream.
|
||
//
|
||
// It watches the stream channel for new streams, and the request channel for
|
||
// new requests to send on the stream.
|
||
//
|
||
// For each new request (watchAction), it's
|
||
// - processed and added to the watch map
|
||
// so, resend will pick them up when there are new streams
|
||
// - sent on the current stream if there's one
|
||
// the current stream is cleared when any send on it fails
|
||
//
|
||
// For each new stream, all the existing requests will be resent.
|
||
//
|
||
// Note that this goroutine doesn't do anything to the old stream when there's a
|
||
// new one. In fact, there should be only one stream in progress, and new one
|
||
// should only be created when the old one fails (recv returns an error).
|
||
func (t *Controller) send(ctx context.Context, doneCh chan struct{}) {
|
||
defer func() { close(doneCh) }()
|
||
|
||
var stream grpc.ClientStream
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
return
|
||
case stream = <-t.streamCh:
|
||
if !t.sendExisting(stream) {
|
||
// send failed, clear the current stream.
|
||
stream = nil
|
||
}
|
||
case u := <-t.sendCh.Get():
|
||
t.sendCh.Load()
|
||
|
||
var (
|
||
target []string
|
||
rType xdsresource.ResourceType
|
||
version, nonce, errMsg string
|
||
send bool
|
||
)
|
||
switch update := u.(type) {
|
||
case *watchAction:
|
||
target, rType, version, nonce = t.processWatchInfo(update)
|
||
case *ackAction:
|
||
target, rType, version, nonce, send = t.processAckInfo(update, stream)
|
||
if !send {
|
||
continue
|
||
}
|
||
errMsg = update.errMsg
|
||
}
|
||
if stream == nil {
|
||
// There's no stream yet. Skip the request. This request
|
||
// will be resent to the new streams. If no stream is
|
||
// created, the watcher will timeout (same as server not
|
||
// sending response back).
|
||
continue
|
||
}
|
||
if err := t.vClient.SendRequest(stream, target, rType, version, nonce, errMsg); err != nil {
|
||
t.logger.Warningf("ADS request for {target: %q, type: %v, version: %q, nonce: %q} failed: %v", target, rType, version, nonce, err)
|
||
// send failed, clear the current stream.
|
||
stream = nil
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// sendExisting sends out xDS requests for registered watchers when recovering
|
||
// from a broken stream.
|
||
//
|
||
// We call stream.Send() here with the lock being held. It should be OK to do
|
||
// that here because the stream has just started and Send() usually returns
|
||
// quickly (once it pushes the message onto the transport layer) and is only
|
||
// ever blocked if we don't have enough flow control quota.
|
||
func (t *Controller) sendExisting(stream grpc.ClientStream) bool {
|
||
t.mu.Lock()
|
||
defer t.mu.Unlock()
|
||
|
||
// Reset only the nonce when the stream restarts.
|
||
//
|
||
// xDS spec says the following. See section:
|
||
// https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#ack-nack-and-resource-type-instance-version
|
||
//
|
||
// Note that the version for a resource type is not a property of an
|
||
// individual xDS stream but rather a property of the resources themselves. If
|
||
// the stream becomes broken and the client creates a new stream, the client’s
|
||
// initial request on the new stream should indicate the most recent version
|
||
// seen by the client on the previous stream
|
||
t.nonceMap = make(map[xdsresource.ResourceType]string)
|
||
|
||
for rType, s := range t.watchMap {
|
||
if err := t.vClient.SendRequest(stream, mapToSlice(s), rType, t.versionMap[rType], "", ""); err != nil {
|
||
t.logger.Warningf("ADS request failed: %v", err)
|
||
return false
|
||
}
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
// recv receives xDS responses on the provided ADS stream and branches out to
|
||
// message specific handlers.
|
||
func (t *Controller) recv(stream grpc.ClientStream) bool {
|
||
msgReceived := false
|
||
for {
|
||
resp, err := t.vClient.RecvResponse(stream)
|
||
if err != nil {
|
||
t.updateHandler.NewConnectionError(err)
|
||
t.logger.Warningf("ADS stream is closed with error: %v", err)
|
||
return msgReceived
|
||
}
|
||
msgReceived = true
|
||
|
||
rType, version, nonce, err := t.handleResponse(resp)
|
||
if e, ok := err.(xdsresourceversion.ErrResourceTypeUnsupported); ok {
|
||
t.logger.Warningf("%s", e.ErrStr)
|
||
continue
|
||
}
|
||
if err != nil {
|
||
t.sendCh.Put(&ackAction{
|
||
rType: rType,
|
||
version: "",
|
||
nonce: nonce,
|
||
errMsg: err.Error(),
|
||
stream: stream,
|
||
})
|
||
t.logger.Warningf("Sending NACK for response type: %v, version: %v, nonce: %v, reason: %v", rType, version, nonce, err)
|
||
continue
|
||
}
|
||
t.sendCh.Put(&ackAction{
|
||
rType: rType,
|
||
version: version,
|
||
nonce: nonce,
|
||
stream: stream,
|
||
})
|
||
t.logger.Infof("Sending ACK for response type: %v, version: %v, nonce: %v", rType, version, nonce)
|
||
}
|
||
}
|
||
|
||
func (t *Controller) handleResponse(resp proto.Message) (xdsresource.ResourceType, string, string, error) {
|
||
rType, resource, version, nonce, err := t.vClient.ParseResponse(resp)
|
||
if err != nil {
|
||
return rType, version, nonce, err
|
||
}
|
||
opts := &xdsresource.UnmarshalOptions{
|
||
Version: version,
|
||
Resources: resource,
|
||
Logger: t.logger,
|
||
UpdateValidator: t.updateValidator,
|
||
}
|
||
var md xdsresource.UpdateMetadata
|
||
switch rType {
|
||
case xdsresource.ListenerResource:
|
||
var update map[string]xdsresource.ListenerUpdateErrTuple
|
||
update, md, err = xdsresource.UnmarshalListener(opts)
|
||
t.updateHandler.NewListeners(update, md)
|
||
case xdsresource.RouteConfigResource:
|
||
var update map[string]xdsresource.RouteConfigUpdateErrTuple
|
||
update, md, err = xdsresource.UnmarshalRouteConfig(opts)
|
||
t.updateHandler.NewRouteConfigs(update, md)
|
||
case xdsresource.ClusterResource:
|
||
var update map[string]xdsresource.ClusterUpdateErrTuple
|
||
update, md, err = xdsresource.UnmarshalCluster(opts)
|
||
t.updateHandler.NewClusters(update, md)
|
||
case xdsresource.EndpointsResource:
|
||
var update map[string]xdsresource.EndpointsUpdateErrTuple
|
||
update, md, err = xdsresource.UnmarshalEndpoints(opts)
|
||
t.updateHandler.NewEndpoints(update, md)
|
||
default:
|
||
return rType, "", "", xdsresourceversion.ErrResourceTypeUnsupported{
|
||
ErrStr: fmt.Sprintf("Resource type %v unknown in response from server", rType),
|
||
}
|
||
}
|
||
return rType, version, nonce, err
|
||
}
|
||
|
||
func mapToSlice(m map[string]bool) []string {
|
||
ret := make([]string, 0, len(m))
|
||
for i := range m {
|
||
ret = append(ret, i)
|
||
}
|
||
return ret
|
||
}
|
||
|
||
type watchAction struct {
|
||
rType xdsresource.ResourceType
|
||
remove bool // Whether this is to remove watch for the resource.
|
||
resource string
|
||
}
|
||
|
||
// processWatchInfo pulls the fields needed by the request from a watchAction.
|
||
//
|
||
// It also updates the watch map.
|
||
func (t *Controller) processWatchInfo(w *watchAction) (target []string, rType xdsresource.ResourceType, ver, nonce string) {
|
||
t.mu.Lock()
|
||
defer t.mu.Unlock()
|
||
|
||
var current map[string]bool
|
||
current, ok := t.watchMap[w.rType]
|
||
if !ok {
|
||
current = make(map[string]bool)
|
||
t.watchMap[w.rType] = current
|
||
}
|
||
|
||
if w.remove {
|
||
delete(current, w.resource)
|
||
if len(current) == 0 {
|
||
delete(t.watchMap, w.rType)
|
||
}
|
||
} else {
|
||
current[w.resource] = true
|
||
}
|
||
|
||
rType = w.rType
|
||
target = mapToSlice(current)
|
||
// We don't reset version or nonce when a new watch is started. The version
|
||
// and nonce from previous response are carried by the request. Only the nonce
|
||
// is reset when the stream is recreated.
|
||
ver = t.versionMap[rType]
|
||
nonce = t.nonceMap[rType]
|
||
return target, rType, ver, nonce
|
||
}
|
||
|
||
type ackAction struct {
|
||
rType xdsresource.ResourceType
|
||
version string // NACK if version is an empty string.
|
||
nonce string
|
||
errMsg string // Empty unless it's a NACK.
|
||
// ACK/NACK are tagged with the stream it's for. When the stream is down,
|
||
// all the ACK/NACK for this stream will be dropped, and the version/nonce
|
||
// won't be updated.
|
||
stream grpc.ClientStream
|
||
}
|
||
|
||
// processAckInfo pulls the fields needed by the ack request from a ackAction.
|
||
//
|
||
// If no active watch is found for this ack, it returns false for send.
|
||
func (t *Controller) processAckInfo(ack *ackAction, stream grpc.ClientStream) (target []string, rType xdsresource.ResourceType, version, nonce string, send bool) {
|
||
if ack.stream != stream {
|
||
// If ACK's stream isn't the current sending stream, this means the ACK
|
||
// was pushed to queue before the old stream broke, and a new stream has
|
||
// been started since. Return immediately here so we don't update the
|
||
// nonce for the new stream.
|
||
return nil, xdsresource.UnknownResource, "", "", false
|
||
}
|
||
rType = ack.rType
|
||
|
||
t.mu.Lock()
|
||
defer t.mu.Unlock()
|
||
|
||
// Update the nonce no matter if we are going to send the ACK request on
|
||
// wire. We may not send the request if the watch is canceled. But the nonce
|
||
// needs to be updated so the next request will have the right nonce.
|
||
nonce = ack.nonce
|
||
t.nonceMap[rType] = nonce
|
||
|
||
s, ok := t.watchMap[rType]
|
||
if !ok || len(s) == 0 {
|
||
// We don't send the request ack if there's no active watch (this can be
|
||
// either the server sends responses before any request, or the watch is
|
||
// canceled while the ackAction is in queue), because there's no resource
|
||
// name. And if we send a request with empty resource name list, the
|
||
// server may treat it as a wild card and send us everything.
|
||
return nil, xdsresource.UnknownResource, "", "", false
|
||
}
|
||
send = true
|
||
target = mapToSlice(s)
|
||
|
||
version = ack.version
|
||
if version == "" {
|
||
// This is a nack, get the previous acked version.
|
||
version = t.versionMap[rType]
|
||
// version will still be an empty string if rType isn't
|
||
// found in versionMap, this can happen if there wasn't any ack
|
||
// before.
|
||
} else {
|
||
t.versionMap[rType] = version
|
||
}
|
||
return target, rType, version, nonce, send
|
||
}
|
||
|
||
// reportLoad starts an LRS stream to report load data to the management server.
|
||
// It blocks until the context is cancelled.
|
||
func (t *Controller) reportLoad(ctx context.Context, cc *grpc.ClientConn, opts controllerversion.LoadReportingOptions) {
|
||
retries := 0
|
||
lastStreamStartTime := time.Time{}
|
||
for ctx.Err() == nil {
|
||
dur := time.Until(lastStreamStartTime.Add(t.backoff(retries)))
|
||
if dur > 0 {
|
||
timer := time.NewTimer(dur)
|
||
select {
|
||
case <-timer.C:
|
||
case <-ctx.Done():
|
||
timer.Stop()
|
||
return
|
||
}
|
||
}
|
||
|
||
retries++
|
||
lastStreamStartTime = time.Now()
|
||
func() {
|
||
// streamCtx is created and canceled in case we terminate the stream
|
||
// early for any reason, to avoid gRPC-Go leaking the RPC's monitoring
|
||
// goroutine.
|
||
streamCtx, cancel := context.WithCancel(ctx)
|
||
defer cancel()
|
||
stream, err := t.vClient.NewLoadStatsStream(streamCtx, cc)
|
||
if err != nil {
|
||
t.logger.Warningf("lrs: failed to create stream: %v", err)
|
||
return
|
||
}
|
||
t.logger.Infof("lrs: created LRS stream")
|
||
|
||
if err := t.vClient.SendFirstLoadStatsRequest(stream); err != nil {
|
||
t.logger.Warningf("lrs: failed to send first request: %v", err)
|
||
return
|
||
}
|
||
|
||
clusters, interval, err := t.vClient.HandleLoadStatsResponse(stream)
|
||
if err != nil {
|
||
t.logger.Warningf("lrs: error from stream: %v", err)
|
||
return
|
||
}
|
||
|
||
retries = 0
|
||
t.sendLoads(streamCtx, stream, opts.LoadStore, clusters, interval)
|
||
}()
|
||
}
|
||
}
|
||
|
||
func (t *Controller) sendLoads(ctx context.Context, stream grpc.ClientStream, store *load.Store, clusterNames []string, interval time.Duration) {
|
||
tick := time.NewTicker(interval)
|
||
defer tick.Stop()
|
||
for {
|
||
select {
|
||
case <-tick.C:
|
||
case <-ctx.Done():
|
||
return
|
||
}
|
||
if err := t.vClient.SendLoadStatsRequest(stream, store.Stats(clusterNames)); err != nil {
|
||
t.logger.Warningf("lrs: error from stream: %v", err)
|
||
return
|
||
}
|
||
}
|
||
}
|