balancer: fix aggregated state to not report idle with zero subconns (#4756)

This commit is contained in:
Doug Fawley 2021-09-10 16:25:09 -07:00 committed by GitHub
parent d25e31e741
commit 03268c8ed2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 6 deletions

View File

@ -372,6 +372,7 @@ type ConnectivityStateEvaluator struct {
numReady uint64 // Number of addrConns in ready state.
numConnecting uint64 // Number of addrConns in connecting state.
numTransientFailure uint64 // Number of addrConns in transient failure state.
numIdle uint64 // Number of addrConns in idle state.
}
// RecordTransition records state change happening in subConn and based on that
@ -380,7 +381,8 @@ type ConnectivityStateEvaluator struct {
// - If at least one SubConn in Ready, the aggregated state is Ready;
// - Else if at least one SubConn in Connecting, the aggregated state is Connecting;
// - Else if at least one SubConn is TransientFailure, the aggregated state is Transient Failure;
// - Else the aggregated state is Idle
// - Else if at least one SubConn is Idle, the aggregated state is Idle;
// - Else there are no subconns and the aggregated state is Transient Failure
//
// Shutdown is not considered.
func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState connectivity.State) connectivity.State {
@ -394,6 +396,8 @@ func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState conne
cse.numConnecting += updateVal
case connectivity.TransientFailure:
cse.numTransientFailure += updateVal
case connectivity.Idle:
cse.numIdle += updateVal
}
}
@ -407,5 +411,8 @@ func (cse *ConnectivityStateEvaluator) RecordTransition(oldState, newState conne
if cse.numTransientFailure > 0 {
return connectivity.TransientFailure
}
return connectivity.Idle
if cse.numIdle > 0 {
return connectivity.Idle
}
return connectivity.TransientFailure
}

View File

@ -133,6 +133,7 @@ func (b *baseBalancer) UpdateClientConnState(s balancer.ClientConnState) error {
}
b.subConns[aNoAttrs] = subConnInfo{subConn: sc, attrs: a.Attributes}
b.scStates[sc] = connectivity.Idle
b.csEvltr.RecordTransition(connectivity.Shutdown, connectivity.Idle)
sc.Connect()
} else {
// Always update the subconn's address in case the attributes
@ -213,10 +214,14 @@ func (b *baseBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.Su
}
return
}
if oldS == connectivity.TransientFailure && s == connectivity.Connecting {
// Once a subconn enters TRANSIENT_FAILURE, ignore subsequent
if oldS == connectivity.TransientFailure &&
(s == connectivity.Connecting || s == connectivity.Idle) {
// Once a subconn enters TRANSIENT_FAILURE, ignore subsequent IDLE or
// CONNECTING transitions to prevent the aggregated state from being
// always CONNECTING when many backends exist but are all down.
if s == connectivity.Idle {
sc.Connect()
}
return
}
b.scStates[sc] = s
@ -242,7 +247,6 @@ func (b *baseBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.Su
b.state == connectivity.TransientFailure {
b.regeneratePicker()
}
b.cc.UpdateState(balancer.State{ConnectivityState: b.state, Picker: b.picker})
}

View File

@ -47,7 +47,7 @@ func init() {
type rrPickerBuilder struct{}
func (*rrPickerBuilder) Build(info base.PickerBuildInfo) balancer.Picker {
logger.Infof("roundrobinPicker: newPicker called with info: %v", info)
logger.Infof("roundrobinPicker: Build called with info: %v", info)
if len(info.ReadySCs) == 0 {
return base.NewErrPicker(balancer.ErrNoSubConnAvailable)
}