circuit breaking: keep max_count per picker, instead of globally, and add support in cluster_impl balancer (#4203)

Also changed circuit breaking counter implementation to move max_count into the
picker, because this is how cluster_impl is designed. Implementation in EDS is
also modified to keep max_count in picker.
This commit is contained in:
Menghan Li 2021-02-17 10:46:07 -08:00 committed by GitHub
parent 425d405f39
commit 1b75f7144d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 310 additions and 114 deletions

View File

@ -34,7 +34,6 @@ import (
"google.golang.org/grpc/resolver"
"google.golang.org/grpc/serviceconfig"
"google.golang.org/grpc/xds/internal/balancer/edsbalancer"
"google.golang.org/grpc/xds/internal/client"
xdsclient "google.golang.org/grpc/xds/internal/client"
"google.golang.org/grpc/xds/internal/client/bootstrap"
)
@ -328,8 +327,6 @@ func (b *cdsBalancer) handleWatchUpdate(update *watchUpdate) {
return
}
client.SetMaxRequests(update.cds.ServiceName, update.cds.MaxRequests)
// The first good update from the watch API leads to the instantiation of an
// edsBalancer. Further updates/errors are propagated to the existing
// edsBalancer.
@ -342,7 +339,10 @@ func (b *cdsBalancer) handleWatchUpdate(update *watchUpdate) {
b.edsLB = edsLB
b.logger.Infof("Created child policy %p of type %s", b.edsLB, edsName)
}
lbCfg := &edsbalancer.EDSConfig{EDSServiceName: update.cds.ServiceName}
lbCfg := &edsbalancer.EDSConfig{
EDSServiceName: update.cds.ServiceName,
MaxConcurrentRequests: update.cds.MaxRequests,
}
if update.cds.EnableLRS {
// An empty string here indicates that the edsBalancer should use the
// same xDS server for load reporting as it does for EDS

View File

@ -233,7 +233,7 @@ func (s) TestSecurityConfigWithoutXDSCreds(t *testing.T) {
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
@ -289,7 +289,7 @@ func (s) TestNoSecurityConfigWithXDSCreds(t *testing.T) {
// newEDSBalancer function as part of test setup. No security config is
// passed to the CDS balancer as part of this update.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
@ -445,7 +445,7 @@ func (s) TestSecurityConfigUpdate_BadToGood(t *testing.T) {
// create a new EDS balancer. The fake EDS balancer created above will be
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil {
t.Fatal(err)
}
@ -479,7 +479,7 @@ func (s) TestGoodSecurityConfig(t *testing.T) {
// create a new EDS balancer. The fake EDS balancer created above will be
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil {
@ -510,7 +510,7 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) {
// create a new EDS balancer. The fake EDS balancer created above will be
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil {
@ -560,7 +560,7 @@ func (s) TestSecurityConfigUpdate_GoodToBad(t *testing.T) {
// create a new EDS balancer. The fake EDS balancer created above will be
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil {
@ -637,7 +637,7 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) {
RootInstanceName: "default1",
},
}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {

View File

@ -193,8 +193,11 @@ func cdsCCS(cluster string) balancer.ClientConnState {
// edsCCS is a helper function to construct a good update passed from the
// cdsBalancer to the edsBalancer.
func edsCCS(service string, enableLRS bool) balancer.ClientConnState {
lbCfg := &edsbalancer.EDSConfig{EDSServiceName: service}
func edsCCS(service string, countMax *uint32, enableLRS bool) balancer.ClientConnState {
lbCfg := &edsbalancer.EDSConfig{
EDSServiceName: service,
MaxConcurrentRequests: countMax,
}
if enableLRS {
lbCfg.LrsLoadReportingServerName = new(string)
}
@ -350,12 +353,12 @@ func (s) TestHandleClusterUpdate(t *testing.T) {
{
name: "happy-case-with-lrs",
cdsUpdate: xdsclient.ClusterUpdate{ServiceName: serviceName, EnableLRS: true},
wantCCS: edsCCS(serviceName, true),
wantCCS: edsCCS(serviceName, nil, true),
},
{
name: "happy-case-without-lrs",
cdsUpdate: xdsclient.ClusterUpdate{ServiceName: serviceName},
wantCCS: edsCCS(serviceName, false),
wantCCS: edsCCS(serviceName, nil, false),
},
}
@ -423,7 +426,7 @@ func (s) TestHandleClusterUpdateError(t *testing.T) {
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
t.Fatal(err)
}
@ -508,7 +511,7 @@ func (s) TestResolverError(t *testing.T) {
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
t.Fatal(err)
}
@ -557,7 +560,7 @@ func (s) TestUpdateSubConnState(t *testing.T) {
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
@ -592,7 +595,7 @@ func (s) TestCircuitBreaking(t *testing.T) {
// the service's counter with the new max requests.
var maxRequests uint32 = 1
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName, MaxRequests: &maxRequests}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, &maxRequests, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {
@ -602,10 +605,10 @@ func (s) TestCircuitBreaking(t *testing.T) {
// Since the counter's max requests was set to 1, the first request should
// succeed and the second should fail.
counter := client.GetServiceRequestsCounter(serviceName)
if err := counter.StartRequest(); err != nil {
if err := counter.StartRequest(maxRequests); err != nil {
t.Fatal(err)
}
if err := counter.StartRequest(); err == nil {
if err := counter.StartRequest(maxRequests); err == nil {
t.Fatal("unexpected success on start request over max")
}
counter.EndRequest()
@ -625,7 +628,7 @@ func (s) TestClose(t *testing.T) {
// returned to the CDS balancer, because we have overridden the
// newEDSBalancer function as part of test setup.
cdsUpdate := xdsclient.ClusterUpdate{ServiceName: serviceName}
wantCCS := edsCCS(serviceName, false)
wantCCS := edsCCS(serviceName, nil, false)
ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer ctxCancel()
if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil {

View File

@ -58,9 +58,9 @@ func init() {
newRandomWRR = testutils.NewTestWRR
}
// TestDrop verifies that the balancer correctly drops the picks, and that
// the drops are reported.
func TestDrop(t *testing.T) {
// TestDropByCategory verifies that the balancer correctly drops the picks, and
// that the drops are reported.
func TestDropByCategory(t *testing.T) {
xdsC := fakeclient.NewClient()
oldNewXDSClient := newXDSClient
newXDSClient = func() (xdsClientInterface, error) { return xdsC, nil }
@ -214,3 +214,113 @@ func TestDrop(t *testing.T) {
t.Fatalf("got unexpected reports, diff (-got, +want): %v", diff)
}
}
// TestDropCircuitBreaking verifies that the balancer correctly drops the picks
// due to circuit breaking, and that the drops are reported.
func TestDropCircuitBreaking(t *testing.T) {
xdsC := fakeclient.NewClient()
oldNewXDSClient := newXDSClient
newXDSClient = func() (xdsClientInterface, error) { return xdsC, nil }
defer func() { newXDSClient = oldNewXDSClient }()
builder := balancer.Get(clusterImplName)
cc := testutils.NewTestClientConn(t)
b := builder.Build(cc, balancer.BuildOptions{})
defer b.Close()
var maxRequest uint32 = 50
if err := b.UpdateClientConnState(balancer.ClientConnState{
ResolverState: resolver.State{
Addresses: testBackendAddrs,
},
BalancerConfig: &lbConfig{
Cluster: testClusterName,
EDSServiceName: testServiceName,
LRSLoadReportingServerName: newString(testLRSServerName),
MaxConcurrentRequests: &maxRequest,
ChildPolicy: &internalserviceconfig.BalancerConfig{
Name: roundrobin.Name,
},
},
}); err != nil {
t.Fatalf("unexpected error from UpdateClientConnState: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
got, err := xdsC.WaitForReportLoad(ctx)
if err != nil {
t.Fatalf("xdsClient.ReportLoad failed with error: %v", err)
}
if got.Server != testLRSServerName {
t.Fatalf("xdsClient.ReportLoad called with {%q}: want {%q}", got.Server, testLRSServerName)
}
sc1 := <-cc.NewSubConnCh
b.UpdateSubConnState(sc1, balancer.SubConnState{ConnectivityState: connectivity.Connecting})
// This should get the connecting picker.
p0 := <-cc.NewPickerCh
for i := 0; i < 10; i++ {
_, err := p0.Pick(balancer.PickInfo{})
if err != balancer.ErrNoSubConnAvailable {
t.Fatalf("picker.Pick, got _,%v, want Err=%v", err, balancer.ErrNoSubConnAvailable)
}
}
b.UpdateSubConnState(sc1, balancer.SubConnState{ConnectivityState: connectivity.Ready})
// Test pick with one backend.
dones := []func(){}
p1 := <-cc.NewPickerCh
const rpcCount = 100
for i := 0; i < rpcCount; i++ {
gotSCSt, err := p1.Pick(balancer.PickInfo{})
if i < 50 && err != nil {
t.Errorf("The first 50%% picks should be non-drops, got error %v", err)
} else if i > 50 && err == nil {
t.Errorf("The second 50%% picks should be drops, got error <nil>")
}
dones = append(dones, func() {
if gotSCSt.Done != nil {
gotSCSt.Done(balancer.DoneInfo{})
}
})
}
for _, done := range dones {
done()
}
dones = []func(){}
// Pick without drops.
for i := 0; i < 50; i++ {
gotSCSt, err := p1.Pick(balancer.PickInfo{})
if err != nil {
t.Errorf("The third 50%% picks should be non-drops, got error %v", err)
}
dones = append(dones, func() {
if gotSCSt.Done != nil {
gotSCSt.Done(balancer.DoneInfo{})
}
})
}
for _, done := range dones {
done()
}
// Dump load data from the store and compare with expected counts.
loadStore := xdsC.LoadStore()
if loadStore == nil {
t.Fatal("loadStore is nil in xdsClient")
}
wantStatsData0 := []*load.Data{{
Cluster: testClusterName,
Service: testServiceName,
TotalDrops: uint64(maxRequest),
}}
gotStatsData0 := loadStore.Stats([]string{testClusterName})
if diff := cmp.Diff(gotStatsData0, wantStatsData0, cmpOpts); diff != "" {
t.Fatalf("got unexpected drop reports, diff (-got, +want): %v", diff)
}
}

View File

@ -38,8 +38,8 @@ import (
)
const (
clusterImplName = "xds_cluster_impl_experimental"
// TODO: define defaultRequestCountMax = 1024
clusterImplName = "xds_cluster_impl_experimental"
defaultRequestCountMax = 1024
)
func init() {
@ -52,11 +52,12 @@ type clusterImplBB struct{}
func (clusterImplBB) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer {
b := &clusterImplBalancer{
ClientConn: cc,
bOpts: bOpts,
closed: grpcsync.NewEvent(),
loadWrapper: loadstore.NewWrapper(),
pickerUpdateCh: buffer.NewUnbounded(),
ClientConn: cc,
bOpts: bOpts,
closed: grpcsync.NewEvent(),
loadWrapper: loadstore.NewWrapper(),
pickerUpdateCh: buffer.NewUnbounded(),
requestCountMax: defaultRequestCountMax,
}
b.logger = prefixLogger(b)
@ -105,10 +106,11 @@ type clusterImplBalancer struct {
// childState/drops/requestCounter can only be accessed in run(). And run()
// is the only goroutine that sends picker to the parent ClientConn. All
// requests to update picker need to be sent to pickerUpdateCh.
childState balancer.State
drops []*dropper
// TODO: add serviceRequestCount and maxRequestCount for circuit breaking.
pickerUpdateCh *buffer.Unbounded
childState balancer.State
drops []*dropper
requestCounter *xdsclient.ServiceRequestsCounter
requestCountMax uint32
pickerUpdateCh *buffer.Unbounded
}
// updateLoadStore checks the config for load store, and decides whether it
@ -198,19 +200,28 @@ func (cib *clusterImplBalancer) UpdateClientConnState(s balancer.ClientConnState
updatePicker = true
}
// TODO: compare cluster name. And update picker if it's changed, because
// circuit breaking's stream counter will be different.
//
// Set `updatePicker` to manually update the picker.
// TODO: compare upper bound of stream count. And update picker if it's
// changed. This is also for circuit breaking.
//
// Set `updatePicker` to manually update the picker.
// Compare cluster name. And update picker if it's changed, because circuit
// breaking's stream counter will be different.
if cib.config == nil || cib.config.Cluster != newConfig.Cluster {
cib.requestCounter = xdsclient.GetServiceRequestsCounter(newConfig.Cluster)
updatePicker = true
}
// Compare upper bound of stream count. And update picker if it's changed.
// This is also for circuit breaking.
var newRequestCountMax uint32 = 1024
if newConfig.MaxConcurrentRequests != nil {
newRequestCountMax = *newConfig.MaxConcurrentRequests
}
if cib.requestCountMax != newRequestCountMax {
cib.requestCountMax = newRequestCountMax
updatePicker = true
}
if updatePicker {
cib.pickerUpdateCh.Put(&dropConfigs{
drops: cib.drops,
drops: cib.drops,
requestCounter: cib.requestCounter,
requestCountMax: cib.requestCountMax,
})
}
@ -280,7 +291,9 @@ func (cib *clusterImplBalancer) UpdateState(state balancer.State) {
}
type dropConfigs struct {
drops []*dropper
drops []*dropper
requestCounter *xdsclient.ServiceRequestsCounter
requestCountMax uint32
}
func (cib *clusterImplBalancer) run() {
@ -293,15 +306,19 @@ func (cib *clusterImplBalancer) run() {
cib.childState = u
cib.ClientConn.UpdateState(balancer.State{
ConnectivityState: cib.childState.ConnectivityState,
Picker: newDropPicker(cib.childState, cib.drops, cib.loadWrapper),
Picker: newDropPicker(cib.childState, &dropConfigs{
drops: cib.drops,
requestCounter: cib.requestCounter,
requestCountMax: cib.requestCountMax,
}, cib.loadWrapper),
})
case *dropConfigs:
cib.drops = u.drops
// cib.requestCounter = u.requestCounter
cib.requestCounter = u.requestCounter
if cib.childState.Picker != nil {
cib.ClientConn.UpdateState(balancer.State{
ConnectivityState: cib.childState.ConnectivityState,
Picker: newDropPicker(cib.childState, cib.drops, cib.loadWrapper),
Picker: newDropPicker(cib.childState, u, cib.loadWrapper),
})
}
}

View File

@ -24,6 +24,7 @@ import (
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/internal/wrr"
"google.golang.org/grpc/status"
"google.golang.org/grpc/xds/internal/client"
"google.golang.org/grpc/xds/internal/client/load"
)
@ -72,14 +73,17 @@ type dropPicker struct {
drops []*dropper
s balancer.State
loadStore loadReporter
// TODO: add serviceRequestCount and maxRequestCount for circuit breaking.
counter *client.ServiceRequestsCounter
countMax uint32
}
func newDropPicker(s balancer.State, drops []*dropper, loadStore load.PerClusterReporter) *dropPicker {
func newDropPicker(s balancer.State, config *dropConfigs, loadStore load.PerClusterReporter) *dropPicker {
return &dropPicker{
drops: drops,
drops: config.drops,
s: s,
loadStore: loadStore,
counter: config.requestCounter,
countMax: config.requestCountMax,
}
}
@ -98,7 +102,30 @@ func (d *dropPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
return balancer.PickResult{}, status.Errorf(codes.Unavailable, "RPC is dropped")
}
}
// TODO: support circuit breaking, check if d.maxRequestCount >=
// d.counter.StartRequestWithMax().
if d.counter != nil {
if err := d.counter.StartRequest(d.countMax); err != nil {
// Drops by circuit breaking are reported with empty category. They
// will be reported only in total drops, but not in per category.
if d.loadStore != nil {
d.loadStore.CallDropped("")
}
return balancer.PickResult{}, status.Errorf(codes.Unavailable, err.Error())
}
pr, err := d.s.Picker.Pick(info)
if err != nil {
d.counter.EndRequest()
return pr, err
}
oldDone := pr.Done
pr.Done = func(doneInfo balancer.DoneInfo) {
d.counter.EndRequest()
if oldDone != nil {
oldDone(doneInfo)
}
}
return pr, err
}
return d.s.Picker.Pick(info)
}

View File

@ -38,6 +38,15 @@ type EDSConfig struct {
// Name to use in EDS query. If not present, defaults to the server
// name from the target URI.
EDSServiceName string
// MaxConcurrentRequests is the max number of concurrent request allowed for
// this service. If unset, default value 1024 is used.
//
// Note that this is not defined in the service config proto. And the reason
// is, we are dropping EDS and moving the features into cluster_impl. But in
// the mean time, to keep things working, we need to add this field. And it
// should be fine to add this extra field here, because EDS is only used in
// CDS today, so we have full control.
MaxConcurrentRequests *uint32
// LRS server to send load reports to. If not present, load reporting
// will be disabled. If set to the empty string, load reporting will
// be sent to the same server that we obtained CDS data from.
@ -51,6 +60,7 @@ type edsConfigJSON struct {
ChildPolicy []*loadBalancingConfig
FallbackPolicy []*loadBalancingConfig
EDSServiceName string
MaxConcurrentRequests *uint32
LRSLoadReportingServerName *string
}
@ -64,6 +74,7 @@ func (l *EDSConfig) UnmarshalJSON(data []byte) error {
}
l.EDSServiceName = configJSON.EDSServiceName
l.MaxConcurrentRequests = configJSON.MaxConcurrentRequests
l.LrsLoadReportingServerName = configJSON.LRSLoadReportingServerName
for _, lbcfg := range configJSON.ChildPolicy {

View File

@ -113,9 +113,9 @@ type edsBalancerImplInterface interface {
handleSubConnStateChange(sc balancer.SubConn, state connectivity.State)
// updateState handle a balancer state update from the priority.
updateState(priority priorityType, s balancer.State)
// updateServiceRequestsCounter updates the service requests counter to the
// updateServiceRequestsConfig updates the service requests counter to the
// one for the given service name.
updateServiceRequestsCounter(serviceName string)
updateServiceRequestsConfig(serviceName string, max *uint32)
// close closes the eds balancer.
close()
}
@ -215,7 +215,7 @@ func (x *edsBalancer) handleGRPCUpdate(update interface{}) {
x.logger.Warningf("failed to update xDS client: %v", err)
}
x.edsImpl.updateServiceRequestsCounter(cfg.EDSServiceName)
x.edsImpl.updateServiceRequestsConfig(cfg.EDSServiceName, cfg.MaxConcurrentRequests)
// We will update the edsImpl with the new child policy, if we got a
// different one.

View File

@ -45,6 +45,8 @@ import (
// TODO: make this a environment variable?
var defaultPriorityInitTimeout = 10 * time.Second
const defaultServiceRequestCountMax = 1024
type localityConfig struct {
weight uint32
addrs []resolver.Address
@ -101,6 +103,7 @@ type edsBalancerImpl struct {
drops []*dropper
innerState balancer.State // The state of the picker without drop support.
serviceRequestsCounter *client.ServiceRequestsCounter
serviceRequestCountMax uint32
}
// newEDSBalancerImpl create a new edsBalancerImpl.
@ -114,9 +117,10 @@ func newEDSBalancerImpl(cc balancer.ClientConn, bOpts balancer.BuildOptions, enq
enqueueChildBalancerStateUpdate: enqueueState,
priorityToLocalities: make(map[priorityType]*balancerGroupWithConfig),
priorityToState: make(map[priorityType]*balancer.State),
subConnToPriority: make(map[balancer.SubConn]priorityType),
priorityToLocalities: make(map[priorityType]*balancerGroupWithConfig),
priorityToState: make(map[priorityType]*balancer.State),
subConnToPriority: make(map[balancer.SubConn]priorityType),
serviceRequestCountMax: defaultServiceRequestCountMax,
}
// Don't start balancer group here. Start it when handling the first EDS
// response. Otherwise the balancer group will be started with round-robin,
@ -181,7 +185,7 @@ func (edsImpl *edsBalancerImpl) updateDrops(dropConfig []xdsclient.OverloadDropC
// Update picker with old inner picker, new drops.
edsImpl.cc.UpdateState(balancer.State{
ConnectivityState: edsImpl.innerState.ConnectivityState,
Picker: newDropPicker(edsImpl.innerState.Picker, newDrops, edsImpl.loadReporter, edsImpl.serviceRequestsCounter)},
Picker: newDropPicker(edsImpl.innerState.Picker, newDrops, edsImpl.loadReporter, edsImpl.serviceRequestsCounter, edsImpl.serviceRequestCountMax)},
)
}
edsImpl.pickerMu.Unlock()
@ -410,14 +414,17 @@ func (edsImpl *edsBalancerImpl) handleSubConnStateChange(sc balancer.SubConn, s
}
}
// updateConfig handles changes to the circuit breaking configuration.
func (edsImpl *edsBalancerImpl) updateServiceRequestsCounter(serviceName string) {
// updateServiceRequestsConfig handles changes to the circuit breaking configuration.
func (edsImpl *edsBalancerImpl) updateServiceRequestsConfig(serviceName string, max *uint32) {
if !env.CircuitBreakingSupport {
return
}
if edsImpl.serviceRequestsCounter == nil || edsImpl.serviceRequestsCounter.ServiceName != serviceName {
edsImpl.serviceRequestsCounter = client.GetServiceRequestsCounter(serviceName)
}
if max != nil {
edsImpl.serviceRequestCountMax = *max
}
}
// updateState first handles priority, and then wraps picker in a drop picker
@ -434,7 +441,7 @@ func (edsImpl *edsBalancerImpl) updateState(priority priorityType, s balancer.St
defer edsImpl.pickerMu.Unlock()
edsImpl.innerState = s
// Don't reset drops when it's a state change.
edsImpl.cc.UpdateState(balancer.State{ConnectivityState: s.ConnectivityState, Picker: newDropPicker(s.Picker, edsImpl.drops, edsImpl.loadReporter, edsImpl.serviceRequestsCounter)})
edsImpl.cc.UpdateState(balancer.State{ConnectivityState: s.ConnectivityState, Picker: newDropPicker(s.Picker, edsImpl.drops, edsImpl.loadReporter, edsImpl.serviceRequestsCounter, edsImpl.serviceRequestCountMax)})
}
}
@ -487,14 +494,16 @@ type dropPicker struct {
p balancer.Picker
loadStore load.PerClusterReporter
counter *client.ServiceRequestsCounter
countMax uint32
}
func newDropPicker(p balancer.Picker, drops []*dropper, loadStore load.PerClusterReporter, counter *client.ServiceRequestsCounter) *dropPicker {
func newDropPicker(p balancer.Picker, drops []*dropper, loadStore load.PerClusterReporter, counter *client.ServiceRequestsCounter, countMax uint32) *dropPicker {
return &dropPicker{
drops: drops,
p: p,
loadStore: loadStore,
counter: counter,
countMax: countMax,
}
}
@ -517,7 +526,7 @@ func (d *dropPicker) Pick(info balancer.PickInfo) (balancer.PickResult, error) {
return balancer.PickResult{}, status.Errorf(codes.Unavailable, "RPC is dropped")
}
if d.counter != nil {
if err := d.counter.StartRequest(); err != nil {
if err := d.counter.StartRequest(d.countMax); err != nil {
// Drops by circuit breaking are reported with empty category. They
// will be reported only in total drops, but not in per category.
if d.loadStore != nil {

View File

@ -579,9 +579,8 @@ func (s) TestEDS_CircuitBreaking(t *testing.T) {
cc := testutils.NewTestClientConn(t)
edsb := newEDSBalancerImpl(cc, balancer.BuildOptions{}, nil, nil, nil)
edsb.enqueueChildBalancerStateUpdate = edsb.updateState
edsb.updateServiceRequestsCounter("test")
var maxRequests uint32 = 50
client.SetMaxRequests("test", &maxRequests)
edsb.updateServiceRequestsConfig("test", &maxRequests)
// One locality with one backend.
clab1 := testutils.NewClusterLoadAssignmentBuilder(testClusterNames[0], nil)
@ -738,7 +737,7 @@ func (s) TestDropPicker(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p := newDropPicker(constPicker, tt.drops, nil, nil)
p := newDropPicker(constPicker, tt.drops, nil, nil, defaultServiceRequestCountMax)
// scCount is the number of sc's returned by pick. The opposite of
// drop-count.
@ -786,9 +785,8 @@ func (s) TestEDS_LoadReport(t *testing.T) {
cbMaxRequests = 20
)
var maxRequestsTemp uint32 = cbMaxRequests
client.SetMaxRequests(testServiceName, &maxRequestsTemp)
edsb.updateServiceRequestsConfig(testServiceName, &maxRequestsTemp)
defer client.ClearCounterForTesting(testServiceName)
edsb.updateServiceRequestsCounter(testServiceName)
backendToBalancerID := make(map[balancer.SubConn]internal.LocalityID)

View File

@ -116,6 +116,7 @@ type fakeEDSBalancer struct {
subconnStateChange *testutils.Channel
edsUpdate *testutils.Channel
serviceName *testutils.Channel
serviceRequestMax *testutils.Channel
}
func (f *fakeEDSBalancer) handleSubConnStateChange(sc balancer.SubConn, state connectivity.State) {
@ -132,8 +133,9 @@ func (f *fakeEDSBalancer) handleEDSResponse(edsResp xdsclient.EndpointsUpdate) {
func (f *fakeEDSBalancer) updateState(priority priorityType, s balancer.State) {}
func (f *fakeEDSBalancer) updateServiceRequestsCounter(serviceName string) {
func (f *fakeEDSBalancer) updateServiceRequestsConfig(serviceName string, max *uint32) {
f.serviceName.Send(serviceName)
f.serviceRequestMax.Send(max)
}
func (f *fakeEDSBalancer) close() {}
@ -186,6 +188,25 @@ func (f *fakeEDSBalancer) waitForCounterUpdate(ctx context.Context, wantServiceN
return nil
}
func (f *fakeEDSBalancer) waitForCountMaxUpdate(ctx context.Context, want *uint32) error {
val, err := f.serviceRequestMax.Receive(ctx)
if err != nil {
return err
}
got := val.(*uint32)
if got == nil && want == nil {
return nil
}
if got != nil && want != nil {
if *got != *want {
return fmt.Errorf("got countMax %v, want %v", *got, *want)
}
return nil
}
return fmt.Errorf("got countMax %+v, want %+v", got, want)
}
func newFakeEDSBalancer(cc balancer.ClientConn) edsBalancerImplInterface {
return &fakeEDSBalancer{
cc: cc,
@ -193,6 +214,7 @@ func newFakeEDSBalancer(cc balancer.ClientConn) edsBalancerImplInterface {
subconnStateChange: testutils.NewChannelWithSize(10),
edsUpdate: testutils.NewChannelWithSize(10),
serviceName: testutils.NewChannelWithSize(10),
serviceRequestMax: testutils.NewChannelWithSize(10),
}
}
@ -328,15 +350,20 @@ func (s) TestConfigChildPolicyUpdate(t *testing.T) {
if err := edsLB.waitForCounterUpdate(ctx, testServiceName); err != nil {
t.Fatal(err)
}
if err := edsLB.waitForCountMaxUpdate(ctx, nil); err != nil {
t.Fatal(err)
}
var testCountMax uint32 = 100
lbCfgB := &loadBalancingConfig{
Name: fakeBalancerB,
Config: json.RawMessage("{}"),
}
if err := edsB.UpdateClientConnState(balancer.ClientConnState{
BalancerConfig: &EDSConfig{
ChildPolicy: lbCfgB,
EDSServiceName: testServiceName,
ChildPolicy: lbCfgB,
EDSServiceName: testServiceName,
MaxConcurrentRequests: &testCountMax,
},
}); err != nil {
t.Fatalf("edsB.UpdateClientConnState() failed: %v", err)
@ -349,6 +376,9 @@ func (s) TestConfigChildPolicyUpdate(t *testing.T) {
// eds_impl will compare the service names, and skip if it didn't change.
t.Fatal(err)
}
if err := edsLB.waitForCountMaxUpdate(ctx, &testCountMax); err != nil {
t.Fatal(err)
}
}
// TestSubConnStateChange verifies if the top-level edsBalancer passes on
@ -606,15 +636,23 @@ func (s) TestCounterUpdate(t *testing.T) {
}
defer edsB.Close()
var testCountMax uint32 = 100
// Update should trigger counter update with provided service name.
if err := edsB.UpdateClientConnState(balancer.ClientConnState{
BalancerConfig: &EDSConfig{EDSServiceName: "foobar-1"},
BalancerConfig: &EDSConfig{
EDSServiceName: "foobar-1",
MaxConcurrentRequests: &testCountMax,
},
}); err != nil {
t.Fatal(err)
}
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
if err := edsB.(*edsBalancer).edsImpl.(*fakeEDSBalancer).waitForCounterUpdate(ctx, "foobar-1"); err != nil {
edsI := edsB.(*edsBalancer).edsImpl.(*fakeEDSBalancer)
if err := edsI.waitForCounterUpdate(ctx, "foobar-1"); err != nil {
t.Fatal(err)
}
if err := edsI.waitForCountMaxUpdate(ctx, &testCountMax); err != nil {
t.Fatal(err)
}
}
@ -642,6 +680,7 @@ func (s) TestBalancerConfigParsing(t *testing.T) {
t.Fatalf("%v", err)
}
var testMaxConcurrentRequests uint32 = 123
tests := []struct {
name string
js json.RawMessage
@ -690,6 +729,7 @@ func (s) TestBalancerConfigParsing(t *testing.T) {
{"fake_balancer_A": {}}
],
"edsServiceName": "eds.service",
"maxConcurrentRequests": 123,
"lrsLoadReportingServerName": "lrs.server"
}`),
want: &EDSConfig{
@ -702,6 +742,7 @@ func (s) TestBalancerConfigParsing(t *testing.T) {
Config: json.RawMessage("{}"),
},
EDSServiceName: testEDSName,
MaxConcurrentRequests: &testMaxConcurrentRequests,
LrsLoadReportingServerName: &testLRSName,
},
},

View File

@ -24,8 +24,6 @@ import (
"sync/atomic"
)
const defaultMaxRequests uint32 = 1024
type servicesRequestsCounter struct {
mu sync.Mutex
services map[string]*ServiceRequestsCounter
@ -39,7 +37,6 @@ var src = &servicesRequestsCounter{
// service with the provided name.
type ServiceRequestsCounter struct {
ServiceName string
maxRequests uint32
numRequests uint32
}
@ -50,33 +47,17 @@ func GetServiceRequestsCounter(serviceName string) *ServiceRequestsCounter {
defer src.mu.Unlock()
c, ok := src.services[serviceName]
if !ok {
c = &ServiceRequestsCounter{ServiceName: serviceName, maxRequests: defaultMaxRequests}
c = &ServiceRequestsCounter{ServiceName: serviceName}
src.services[serviceName] = c
}
return c
}
// SetMaxRequests updates the max requests for a service's counter.
func SetMaxRequests(serviceName string, maxRequests *uint32) {
src.mu.Lock()
defer src.mu.Unlock()
c, ok := src.services[serviceName]
if !ok {
c = &ServiceRequestsCounter{ServiceName: serviceName}
src.services[serviceName] = c
}
if maxRequests != nil {
c.maxRequests = *maxRequests
} else {
c.maxRequests = defaultMaxRequests
}
}
// StartRequest starts a request for a service, incrementing its number of
// requests by 1. Returns an error if the max number of requests is exceeded.
func (c *ServiceRequestsCounter) StartRequest() error {
if atomic.LoadUint32(&c.numRequests) >= atomic.LoadUint32(&c.maxRequests) {
return fmt.Errorf("max requests %v exceeded on service %v", c.maxRequests, c.ServiceName)
func (c *ServiceRequestsCounter) StartRequest(max uint32) error {
if atomic.LoadUint32(&c.numRequests) >= max {
return fmt.Errorf("max requests %v exceeded on service %v", max, c.ServiceName)
}
atomic.AddUint32(&c.numRequests, 1)
return nil
@ -97,6 +78,5 @@ func ClearCounterForTesting(serviceName string) {
if !ok {
return
}
c.maxRequests = defaultMaxRequests
c.numRequests = 0
}

View File

@ -56,7 +56,6 @@ func resetServiceRequestsCounter() {
}
func testCounter(t *testing.T, test counterTest) {
SetMaxRequests(test.name, &test.maxRequests)
requestsStarted := make(chan struct{})
requestsSent := sync.WaitGroup{}
requestsSent.Add(int(test.numRequests))
@ -68,7 +67,7 @@ func testCounter(t *testing.T, test counterTest) {
go func() {
counter := GetServiceRequestsCounter(test.name)
defer requestsDone.Done()
err := counter.StartRequest()
err := counter.StartRequest(test.maxRequests)
if err == nil {
atomic.AddUint32(&successes, 1)
} else {
@ -98,7 +97,7 @@ func testCounter(t *testing.T, test counterTest) {
}
func (s) TestRequestsCounter(t *testing.T) {
resetServiceRequestsCounter()
defer resetServiceRequestsCounter()
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
testCounter(t, test)
@ -107,7 +106,7 @@ func (s) TestRequestsCounter(t *testing.T) {
}
func (s) TestGetServiceRequestsCounter(t *testing.T) {
resetServiceRequestsCounter()
defer resetServiceRequestsCounter()
for _, test := range tests {
counterA := GetServiceRequestsCounter(test.name)
counterB := GetServiceRequestsCounter(test.name)
@ -118,39 +117,40 @@ func (s) TestGetServiceRequestsCounter(t *testing.T) {
}
func startRequests(t *testing.T, n uint32, max uint32, counter *ServiceRequestsCounter) {
SetMaxRequests(counter.ServiceName, &max)
for i := uint32(0); i < n; i++ {
if err := counter.StartRequest(); err != nil {
if err := counter.StartRequest(max); err != nil {
t.Fatalf("error starting initial request: %v", err)
}
}
}
func (s) TestSetMaxRequestsIncreased(t *testing.T) {
resetServiceRequestsCounter()
defer resetServiceRequestsCounter()
const serviceName string = "set-max-requests-increased"
var initialMax uint32 = 16
counter := GetServiceRequestsCounter(serviceName)
startRequests(t, initialMax, initialMax, counter)
if err := counter.StartRequest(); err == nil {
if err := counter.StartRequest(initialMax); err == nil {
t.Fatal("unexpected success on start request after max met")
}
newMax := initialMax + 1
SetMaxRequests(counter.ServiceName, &newMax)
if err := counter.StartRequest(); err != nil {
if err := counter.StartRequest(newMax); err != nil {
t.Fatalf("unexpected error on start request after max increased: %v", err)
}
}
func (s) TestSetMaxRequestsDecreased(t *testing.T) {
resetServiceRequestsCounter()
defer resetServiceRequestsCounter()
const serviceName string = "set-max-requests-decreased"
var initialMax uint32 = 16
counter := GetServiceRequestsCounter(serviceName)
startRequests(t, initialMax-1, initialMax, counter)
newMax := initialMax - 1
SetMaxRequests(counter.ServiceName, &newMax)
if err := counter.StartRequest(); err == nil {
if err := counter.StartRequest(newMax); err == nil {
t.Fatalf("unexpected success on start request after max decreased: %v", err)
}
}