test: update client state subscriber test to be not flaky and more stressful about rapid updates (#6512)

This commit is contained in:
Doug Fawley 2023-08-10 15:12:06 -07:00 committed by GitHub
parent f3e94ec13b
commit 879faf6bb2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 156 additions and 176 deletions

View File

@ -209,7 +209,7 @@ func (ccb *ccBalancerWrapper) closeBalancer(m ccbMode) {
}
ccb.mode = m
done := ccb.serializer.Done
done := ccb.serializer.Done()
b := ccb.balancer
ok := ccb.serializer.Schedule(func(_ context.Context) {
// Close the serializer to ensure that no more calls from gRPC are sent

View File

@ -189,7 +189,7 @@ func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *
// Register ClientConn with channelz.
cc.channelzRegistration(target)
cc.csMgr = newConnectivityStateManager(cc.channelzID)
cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelzID)
if err := cc.validateTransportCredentials(); err != nil {
return nil, err
@ -541,10 +541,10 @@ func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStr
// newConnectivityStateManager creates an connectivityStateManager with
// the specified id.
func newConnectivityStateManager(id *channelz.Identifier) *connectivityStateManager {
func newConnectivityStateManager(ctx context.Context, id *channelz.Identifier) *connectivityStateManager {
return &connectivityStateManager{
channelzID: id,
pubSub: grpcsync.NewPubSub(),
pubSub: grpcsync.NewPubSub(ctx),
}
}
@ -600,10 +600,6 @@ func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
return csm.notifyChan
}
func (csm *connectivityStateManager) close() {
csm.pubSub.Stop()
}
// ClientConnInterface defines the functions clients need to perform unary and
// streaming RPCs. It is implemented by *ClientConn, and is only intended to
// be referenced by generated code.
@ -1234,7 +1230,10 @@ func (cc *ClientConn) ResetConnectBackoff() {
// Close tears down the ClientConn and all underlying connections.
func (cc *ClientConn) Close() error {
defer cc.cancel()
defer func() {
cc.cancel()
<-cc.csMgr.pubSub.Done()
}()
cc.mu.Lock()
if cc.conns == nil {
@ -1249,7 +1248,6 @@ func (cc *ClientConn) Close() error {
conns := cc.conns
cc.conns = nil
cc.csMgr.updateState(connectivity.Shutdown)
cc.csMgr.close()
pWrapper := cc.blockingpicker
rWrapper := cc.resolverWrapper

View File

@ -32,10 +32,10 @@ import (
//
// This type is safe for concurrent access.
type CallbackSerializer struct {
// Done is closed once the serializer is shut down completely, i.e all
// done is closed once the serializer is shut down completely, i.e all
// scheduled callbacks are executed and the serializer has deallocated all
// its resources.
Done chan struct{}
done chan struct{}
callbacks *buffer.Unbounded
closedMu sync.Mutex
@ -48,12 +48,12 @@ type CallbackSerializer struct {
// callbacks will be added once this context is canceled, and any pending un-run
// callbacks will be executed before the serializer is shut down.
func NewCallbackSerializer(ctx context.Context) *CallbackSerializer {
t := &CallbackSerializer{
Done: make(chan struct{}),
cs := &CallbackSerializer{
done: make(chan struct{}),
callbacks: buffer.NewUnbounded(),
}
go t.run(ctx)
return t
go cs.run(ctx)
return cs
}
// Schedule adds a callback to be scheduled after existing callbacks are run.
@ -64,56 +64,62 @@ func NewCallbackSerializer(ctx context.Context) *CallbackSerializer {
// Return value indicates if the callback was successfully added to the list of
// callbacks to be executed by the serializer. It is not possible to add
// callbacks once the context passed to NewCallbackSerializer is cancelled.
func (t *CallbackSerializer) Schedule(f func(ctx context.Context)) bool {
t.closedMu.Lock()
defer t.closedMu.Unlock()
func (cs *CallbackSerializer) Schedule(f func(ctx context.Context)) bool {
cs.closedMu.Lock()
defer cs.closedMu.Unlock()
if t.closed {
if cs.closed {
return false
}
t.callbacks.Put(f)
cs.callbacks.Put(f)
return true
}
func (t *CallbackSerializer) run(ctx context.Context) {
func (cs *CallbackSerializer) run(ctx context.Context) {
var backlog []func(context.Context)
defer close(t.Done)
defer close(cs.done)
for ctx.Err() == nil {
select {
case <-ctx.Done():
// Do nothing here. Next iteration of the for loop will not happen,
// since ctx.Err() would be non-nil.
case callback, ok := <-t.callbacks.Get():
case callback, ok := <-cs.callbacks.Get():
if !ok {
return
}
t.callbacks.Load()
cs.callbacks.Load()
callback.(func(ctx context.Context))(ctx)
}
}
// Fetch pending callbacks if any, and execute them before returning from
// this method and closing t.Done.
t.closedMu.Lock()
t.closed = true
backlog = t.fetchPendingCallbacks()
t.callbacks.Close()
t.closedMu.Unlock()
// this method and closing cs.done.
cs.closedMu.Lock()
cs.closed = true
backlog = cs.fetchPendingCallbacks()
cs.callbacks.Close()
cs.closedMu.Unlock()
for _, b := range backlog {
b(ctx)
}
}
func (t *CallbackSerializer) fetchPendingCallbacks() []func(context.Context) {
func (cs *CallbackSerializer) fetchPendingCallbacks() []func(context.Context) {
var backlog []func(context.Context)
for {
select {
case b := <-t.callbacks.Get():
case b := <-cs.callbacks.Get():
backlog = append(backlog, b.(func(context.Context)))
t.callbacks.Load()
cs.callbacks.Load()
default:
return backlog
}
}
}
// Done returns a channel that is closed after the context passed to
// NewCallbackSerializer is canceled and all callbacks have been executed.
func (cs *CallbackSerializer) Done() <-chan struct{} {
return cs.done
}

View File

@ -190,7 +190,7 @@ func (s) TestCallbackSerializer_Schedule_Close(t *testing.T) {
}
}
}
<-cs.Done
<-cs.Done()
done := make(chan struct{})
if cs.Schedule(func(context.Context) { close(done) }) {

View File

@ -40,25 +40,23 @@ type Subscriber interface {
// subscribers interested in receiving these messages register a callback
// via the Subscribe() method.
//
// Once a PubSub is stopped, no more messages can be published, and
// it is guaranteed that no more subscriber callback will be invoked.
// Once a PubSub is stopped, no more messages can be published, but any pending
// published messages will be delivered to the subscribers. Done may be used
// to determine when all published messages have been delivered.
type PubSub struct {
cs *CallbackSerializer
cancel context.CancelFunc
cs *CallbackSerializer
// Access to the below fields are guarded by this mutex.
mu sync.Mutex
msg interface{}
subscribers map[Subscriber]bool
stopped bool
}
// NewPubSub returns a new PubSub instance.
func NewPubSub() *PubSub {
ctx, cancel := context.WithCancel(context.Background())
// NewPubSub returns a new PubSub instance. Users should cancel the
// provided context to shutdown the PubSub.
func NewPubSub(ctx context.Context) *PubSub {
return &PubSub{
cs: NewCallbackSerializer(ctx),
cancel: cancel,
subscribers: map[Subscriber]bool{},
}
}
@ -75,10 +73,6 @@ func (ps *PubSub) Subscribe(sub Subscriber) (cancel func()) {
ps.mu.Lock()
defer ps.mu.Unlock()
if ps.stopped {
return func() {}
}
ps.subscribers[sub] = true
if ps.msg != nil {
@ -106,10 +100,6 @@ func (ps *PubSub) Publish(msg interface{}) {
ps.mu.Lock()
defer ps.mu.Unlock()
if ps.stopped {
return
}
ps.msg = msg
for sub := range ps.subscribers {
s := sub
@ -124,13 +114,8 @@ func (ps *PubSub) Publish(msg interface{}) {
}
}
// Stop shuts down the PubSub and releases any resources allocated by it.
// It is guaranteed that no subscriber callbacks would be invoked once this
// method returns.
func (ps *PubSub) Stop() {
ps.mu.Lock()
defer ps.mu.Unlock()
ps.stopped = true
ps.cancel()
// Done returns a channel that is closed after the context passed to NewPubSub
// is canceled and all updates have been sent to subscribers.
func (ps *PubSub) Done() <-chan struct{} {
return ps.cs.Done()
}

View File

@ -19,6 +19,7 @@
package grpcsync
import (
"context"
"sync"
"testing"
"time"
@ -40,8 +41,9 @@ func (ts *testSubscriber) OnMessage(msg interface{}) {
}
func (s) TestPubSub_PublishNoMsg(t *testing.T) {
pubsub := NewPubSub()
defer pubsub.Stop()
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
pubsub := NewPubSub(ctx)
ts := newTestSubscriber(1)
pubsub.Subscribe(ts)
@ -54,7 +56,9 @@ func (s) TestPubSub_PublishNoMsg(t *testing.T) {
}
func (s) TestPubSub_PublishMsgs_RegisterSubs_And_Stop(t *testing.T) {
pubsub := NewPubSub()
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
pubsub := NewPubSub(ctx)
const numPublished = 10
@ -148,7 +152,8 @@ func (s) TestPubSub_PublishMsgs_RegisterSubs_And_Stop(t *testing.T) {
t.FailNow()
}
pubsub.Stop()
cancel()
<-pubsub.Done()
go func() {
pubsub.Publish(99)
@ -165,8 +170,9 @@ func (s) TestPubSub_PublishMsgs_RegisterSubs_And_Stop(t *testing.T) {
}
func (s) TestPubSub_PublishMsgs_BeforeRegisterSub(t *testing.T) {
pubsub := NewPubSub()
defer pubsub.Stop()
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
pubsub := NewPubSub(ctx)
const numPublished = 3
for i := 0; i < numPublished; i++ {

View File

@ -133,7 +133,7 @@ func (ccr *ccResolverWrapper) close() {
ccr.mu.Unlock()
// Give enqueued callbacks a chance to finish.
<-ccr.serializer.Done
<-ccr.serializer.Done()
// Spawn a goroutine to close the resolver (since it may block trying to
// cleanup all allocated resources) and return early.

View File

@ -32,6 +32,9 @@ import (
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/balancer/stub"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/internal/testutils"
"google.golang.org/grpc/resolver"
"google.golang.org/grpc/resolver/manual"
@ -547,3 +550,93 @@ func awaitNoStateChange(ctx context.Context, t *testing.T, cc *grpc.ClientConn,
t.Fatalf("State changed from %q to %q when no state change was expected", currState, cc.GetState())
}
}
type funcConnectivityStateSubscriber struct {
onMsg func(connectivity.State)
}
func (f *funcConnectivityStateSubscriber) OnMessage(msg interface{}) {
f.onMsg(msg.(connectivity.State))
}
// TestConnectivityStateSubscriber confirms updates sent by the balancer in
// rapid succession are not missed by the subscriber.
func (s) TestConnectivityStateSubscriber(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
sendStates := []connectivity.State{
connectivity.Connecting,
connectivity.Ready,
connectivity.Idle,
connectivity.Connecting,
connectivity.Idle,
connectivity.Connecting,
connectivity.Ready,
}
wantStates := append(sendStates, connectivity.Shutdown)
const testBalName = "any"
bf := stub.BalancerFuncs{
UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
// Send the expected states in rapid succession.
for _, s := range sendStates {
t.Logf("Sending state update %s", s)
bd.ClientConn.UpdateState(balancer.State{ConnectivityState: s})
}
return nil
},
}
stub.Register(testBalName, bf)
// Create the ClientConn.
const testResName = "any"
rb := manual.NewBuilderWithScheme(testResName)
cc, err := grpc.Dial(testResName+":///",
grpc.WithResolvers(rb),
grpc.WithDefaultServiceConfig(fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, testBalName)),
grpc.WithTransportCredentials(insecure.NewCredentials()),
)
if err != nil {
t.Fatalf("Unexpected error from grpc.Dial: %v", err)
}
// Subscribe to state updates. Use a buffer size of 1 to allow the
// Shutdown state to go into the channel when Close()ing.
connCh := make(chan connectivity.State, 1)
s := &funcConnectivityStateSubscriber{
onMsg: func(s connectivity.State) {
select {
case connCh <- s:
case <-ctx.Done():
}
if s == connectivity.Shutdown {
close(connCh)
}
},
}
internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, s)
// Send an update from the resolver that will trigger the LB policy's UpdateClientConnState.
go rb.UpdateState(resolver.State{})
// Verify the resulting states.
for i, want := range wantStates {
if i == len(sendStates) {
// Trigger Shutdown to be sent by the channel. Use a goroutine to
// ensure the operation does not block.
cc.Close()
}
select {
case got := <-connCh:
if got != want {
t.Errorf("Update %v was %s; want %s", i, got, want)
} else {
t.Logf("Update %v was %s as expected", i, got)
}
case <-ctx.Done():
t.Fatalf("Timed out waiting for state update %v: %s", i, want)
}
}
}

View File

@ -1,108 +0,0 @@
/*
*
* Copyright 2023 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package test
import (
"context"
"testing"
"google.golang.org/grpc"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/internal/stubserver"
"google.golang.org/grpc/resolver"
"google.golang.org/grpc/resolver/manual"
)
type testSubscriber struct {
onMsgCh chan connectivity.State
}
func newTestSubscriber() *testSubscriber {
return &testSubscriber{onMsgCh: make(chan connectivity.State, 1)}
}
func (ts *testSubscriber) OnMessage(msg interface{}) {
select {
case ts.onMsgCh <- msg.(connectivity.State):
default:
}
}
func (s) TestConnectivityStateUpdates(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
defer cancel()
// Create a ClientConn with a short idle_timeout.
r := manual.NewBuilderWithScheme("whatever")
dopts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithResolvers(r),
grpc.WithIdleTimeout(defaultTestShortIdleTimeout),
}
cc, err := grpc.Dial(r.Scheme()+":///test.server", dopts...)
if err != nil {
t.Fatalf("grpc.Dial() failed: %v", err)
}
t.Cleanup(func() { cc.Close() })
s := newTestSubscriber()
internal.SubscribeToConnectivityStateChanges.(func(cc *grpc.ClientConn, s grpcsync.Subscriber) func())(cc, s)
backend := stubserver.StartTestService(t, nil)
t.Cleanup(backend.Stop)
wantStates := []connectivity.State{
connectivity.Connecting,
connectivity.Ready,
connectivity.Idle,
connectivity.Shutdown,
}
doneCh := make(chan struct{})
go func() {
defer close(doneCh)
for _, wantState := range wantStates {
select {
case gotState := <-s.onMsgCh:
if gotState != wantState {
t.Errorf("Received unexpected state: %q; want: %q", gotState, wantState)
}
case <-ctx.Done():
t.Error("Timeout when expecting the onMessage() callback to be invoked")
}
if t.Failed() {
break
}
}
}()
// Verify that the ClientConn moves to READY.
r.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: backend.Address}}})
// Verify that the ClientConn moves to IDLE as there is no activity.
awaitState(ctx, t, cc, connectivity.Idle)
cc.Close()
awaitState(ctx, t, cc, connectivity.Shutdown)
<-doneCh
}