Merge pull request #131162 from wojtek-t/simplify_etcd3_watcher
Simplify etcd3 watcher Kubernetes-commit: db21f3df3f44b5b4545c8164d0d6030b01db2ed0
This commit is contained in:
commit
c160237b46
2
go.mod
2
go.mod
|
@ -50,7 +50,7 @@ require (
|
||||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
gopkg.in/natefinch/lumberjack.v2 v2.2.1
|
||||||
k8s.io/api v0.0.0-20250423231958-d18a46229505
|
k8s.io/api v0.0.0-20250423231958-d18a46229505
|
||||||
k8s.io/apimachinery v0.0.0-20250423231524-954960919938
|
k8s.io/apimachinery v0.0.0-20250423231524-954960919938
|
||||||
k8s.io/client-go v0.0.0-20250423232513-451ac0fcb5bd
|
k8s.io/client-go v0.0.0-20250424032238-82c2b6c71657
|
||||||
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349
|
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349
|
||||||
k8s.io/klog/v2 v2.130.1
|
k8s.io/klog/v2 v2.130.1
|
||||||
k8s.io/kms v0.0.0-20250401105328-de9f6e9dd930
|
k8s.io/kms v0.0.0-20250401105328-de9f6e9dd930
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -371,8 +371,8 @@ k8s.io/api v0.0.0-20250423231958-d18a46229505 h1:CzcGzeX7BpE8DYfaOipLutwMTAZHcii
|
||||||
k8s.io/api v0.0.0-20250423231958-d18a46229505/go.mod h1:PEZtCkQxQ/XohDih7vofdKAdIIiS9kFruj1lF/EUMLs=
|
k8s.io/api v0.0.0-20250423231958-d18a46229505/go.mod h1:PEZtCkQxQ/XohDih7vofdKAdIIiS9kFruj1lF/EUMLs=
|
||||||
k8s.io/apimachinery v0.0.0-20250423231524-954960919938 h1:yoIMbzO4of8M4auqFKjNsbFlHJG9jCuoD+4sUJUPdn4=
|
k8s.io/apimachinery v0.0.0-20250423231524-954960919938 h1:yoIMbzO4of8M4auqFKjNsbFlHJG9jCuoD+4sUJUPdn4=
|
||||||
k8s.io/apimachinery v0.0.0-20250423231524-954960919938/go.mod h1:tJ77gZ1upNffdrQVxg+oIoEmvSIyTbz3RIPi9HKw+nw=
|
k8s.io/apimachinery v0.0.0-20250423231524-954960919938/go.mod h1:tJ77gZ1upNffdrQVxg+oIoEmvSIyTbz3RIPi9HKw+nw=
|
||||||
k8s.io/client-go v0.0.0-20250423232513-451ac0fcb5bd h1:rR8S6D2gcZ8zGdIobs1g8HLnwbSVvnTOXaI9LaS/TR4=
|
k8s.io/client-go v0.0.0-20250424032238-82c2b6c71657 h1:qBJHoi86zzpRsCHtDZnj0bccNrz7PaAwrdZB1uGT3xY=
|
||||||
k8s.io/client-go v0.0.0-20250423232513-451ac0fcb5bd/go.mod h1:NKnTaoOD5AOCgg0t9ZlglMkD6GXYAdkN5RfI1BYA4lo=
|
k8s.io/client-go v0.0.0-20250424032238-82c2b6c71657/go.mod h1:NKnTaoOD5AOCgg0t9ZlglMkD6GXYAdkN5RfI1BYA4lo=
|
||||||
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349 h1:TdFXFBAukREkHLOoqrybaIpwLRgmdc4ntO4nqP9ly3A=
|
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349 h1:TdFXFBAukREkHLOoqrybaIpwLRgmdc4ntO4nqP9ly3A=
|
||||||
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349/go.mod h1:d9qYA5xnPzVV2QGRVxRQkFCFhomjWs865vF8Nvfvhf8=
|
k8s.io/component-base v0.0.0-20250423233653-88ca0abd8349/go.mod h1:d9qYA5xnPzVV2QGRVxRQkFCFhomjWs865vF8Nvfvhf8=
|
||||||
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
|
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
|
||||||
|
|
|
@ -91,7 +91,6 @@ type watchChan struct {
|
||||||
cancel context.CancelFunc
|
cancel context.CancelFunc
|
||||||
incomingEventChan chan *event
|
incomingEventChan chan *event
|
||||||
resultChan chan watch.Event
|
resultChan chan watch.Event
|
||||||
errChan chan error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
|
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
|
||||||
|
@ -135,7 +134,6 @@ func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, re
|
||||||
internalPred: pred,
|
internalPred: pred,
|
||||||
incomingEventChan: make(chan *event, incomingBufSize),
|
incomingEventChan: make(chan *event, incomingBufSize),
|
||||||
resultChan: make(chan watch.Event, outgoingBufSize),
|
resultChan: make(chan watch.Event, outgoingBufSize),
|
||||||
errChan: make(chan error, 1),
|
|
||||||
}
|
}
|
||||||
if pred.Empty() {
|
if pred.Empty() {
|
||||||
// The filter doesn't filter out any object.
|
// The filter doesn't filter out any object.
|
||||||
|
@ -234,18 +232,6 @@ func (wc *watchChan) run(initialEventsEndBookmarkRequired, forceInitialEvents bo
|
||||||
wc.processEvents(&resultChanWG)
|
wc.processEvents(&resultChanWG)
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case err := <-wc.errChan:
|
|
||||||
if isCancelError(err) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
errResult := transformErrorToEvent(err)
|
|
||||||
if errResult != nil {
|
|
||||||
// error result is guaranteed to be received by user before closing ResultChan.
|
|
||||||
select {
|
|
||||||
case wc.resultChan <- *errResult:
|
|
||||||
case <-wc.ctx.Done(): // user has given up all results
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case <-watchClosedCh:
|
case <-watchClosedCh:
|
||||||
case <-wc.ctx.Done(): // user cancel
|
case <-wc.ctx.Done(): // user cancel
|
||||||
}
|
}
|
||||||
|
@ -309,7 +295,7 @@ func (wc *watchChan) sync() error {
|
||||||
// send items from the response until no more results
|
// send items from the response until no more results
|
||||||
for i, kv := range getResp.Kvs {
|
for i, kv := range getResp.Kvs {
|
||||||
lastKey = kv.Key
|
lastKey = kv.Key
|
||||||
wc.sendEvent(parseKV(kv))
|
wc.queueEvent(parseKV(kv))
|
||||||
// free kv early. Long lists can take O(seconds) to decode.
|
// free kv early. Long lists can take O(seconds) to decode.
|
||||||
getResp.Kvs[i] = nil
|
getResp.Kvs[i] = nil
|
||||||
}
|
}
|
||||||
|
@ -378,7 +364,7 @@ func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEnd
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if initialEventsEndBookmarkRequired {
|
if initialEventsEndBookmarkRequired {
|
||||||
wc.sendEvent(func() *event {
|
wc.queueEvent(func() *event {
|
||||||
e := progressNotifyEvent(wc.initialRev)
|
e := progressNotifyEvent(wc.initialRev)
|
||||||
e.isInitialEventsEndBookmark = true
|
e.isInitialEventsEndBookmark = true
|
||||||
return e
|
return e
|
||||||
|
@ -397,11 +383,16 @@ func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEnd
|
||||||
err := wres.Err()
|
err := wres.Err()
|
||||||
// If there is an error on server (e.g. compaction), the channel will return it before closed.
|
// If there is an error on server (e.g. compaction), the channel will return it before closed.
|
||||||
logWatchChannelErr(err)
|
logWatchChannelErr(err)
|
||||||
|
// sendError doesn't guarantee that no more items will be put into resultChan.
|
||||||
|
// However, by returning from startWatching here, we guarantee, that events
|
||||||
|
// with higher resourceVersion than the error will not be queue and thus also
|
||||||
|
// processed and send to the user.
|
||||||
|
// TODO(wojtek-t): Figure out if we can synchronously prevent more events.
|
||||||
wc.sendError(err)
|
wc.sendError(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if wres.IsProgressNotify() {
|
if wres.IsProgressNotify() {
|
||||||
wc.sendEvent(progressNotifyEvent(wres.Header.GetRevision()))
|
wc.queueEvent(progressNotifyEvent(wres.Header.GetRevision()))
|
||||||
metrics.RecordEtcdBookmark(wc.watcher.groupResource.String())
|
metrics.RecordEtcdBookmark(wc.watcher.groupResource.String())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -411,10 +402,15 @@ func (wc *watchChan) startWatching(watchClosedCh chan struct{}, initialEventsEnd
|
||||||
parsedEvent, err := parseEvent(e)
|
parsedEvent, err := parseEvent(e)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logWatchChannelErr(err)
|
logWatchChannelErr(err)
|
||||||
|
// sendError doesn't guarantee that no more items will be put into resultChan.
|
||||||
|
// However, by returning from startWatching here, we guarantee, that events
|
||||||
|
// with higher resourceVersion than the error will not be queue and thus also
|
||||||
|
// processed and send to the user.
|
||||||
|
// TODO(wojtek-t): Figure out if we can synchronously prevent more events.
|
||||||
wc.sendError(err)
|
wc.sendError(err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
wc.sendEvent(parsedEvent)
|
wc.queueEvent(parsedEvent)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// When we come to this point, it's only possible that client side ends the watch.
|
// When we come to this point, it's only possible that client side ends the watch.
|
||||||
|
@ -447,15 +443,7 @@ func (wc *watchChan) serialProcessEvents(wg *sync.WaitGroup) {
|
||||||
if res == nil {
|
if res == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if len(wc.resultChan) == cap(wc.resultChan) {
|
if !wc.sendEvent(res) {
|
||||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
|
||||||
}
|
|
||||||
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
|
||||||
// Because storing events in local will cause more memory usage.
|
|
||||||
// The worst case would be closing the fast watcher.
|
|
||||||
select {
|
|
||||||
case wc.resultChan <- *res:
|
|
||||||
case <-wc.ctx.Done():
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
case <-wc.ctx.Done():
|
case <-wc.ctx.Done():
|
||||||
|
@ -545,15 +533,7 @@ func (p *concurrentOrderedEventProcessing) collectEventProcessing(ctx context.Co
|
||||||
if r.event == nil {
|
if r.event == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if len(p.wc.resultChan) == cap(p.wc.resultChan) {
|
if !p.wc.sendEvent(r.event) {
|
||||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", p.wc.watcher.objectType, "groupResource", p.wc.watcher.groupResource)
|
|
||||||
}
|
|
||||||
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
|
||||||
// Because storing events in local will cause more memory usage.
|
|
||||||
// The worst case would be closing the fast watcher.
|
|
||||||
select {
|
|
||||||
case p.wc.resultChan <- *r.event:
|
|
||||||
case <-p.wc.ctx.Done():
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -654,14 +634,44 @@ func transformErrorToEvent(err error) *watch.Event {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sendError synchronously puts an error event into resultChan and
|
||||||
|
// trigger cancelling all goroutines.
|
||||||
func (wc *watchChan) sendError(err error) {
|
func (wc *watchChan) sendError(err error) {
|
||||||
|
// We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all.
|
||||||
|
// It's fine to double cancel.
|
||||||
|
defer wc.cancel()
|
||||||
|
|
||||||
|
if isCancelError(err) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
errResult := transformErrorToEvent(err)
|
||||||
|
if errResult != nil {
|
||||||
|
// error result is guaranteed to be received by user before closing ResultChan.
|
||||||
select {
|
select {
|
||||||
case wc.errChan <- err:
|
case wc.resultChan <- *errResult:
|
||||||
case <-wc.ctx.Done():
|
case <-wc.ctx.Done(): // user has given up all results
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wc *watchChan) sendEvent(e *event) {
|
// sendEvent synchronously puts an event into resultChan.
|
||||||
|
// Returns true if it was successful.
|
||||||
|
func (wc *watchChan) sendEvent(event *watch.Event) bool {
|
||||||
|
if len(wc.resultChan) == cap(wc.resultChan) {
|
||||||
|
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow dispatching events to watchers", "outgoingEvents", outgoingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
||||||
|
}
|
||||||
|
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
||||||
|
// Because storing events in local will cause more memory usage.
|
||||||
|
// The worst case would be closing the fast watcher.
|
||||||
|
select {
|
||||||
|
case wc.resultChan <- *event:
|
||||||
|
return true
|
||||||
|
case <-wc.ctx.Done():
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wc *watchChan) queueEvent(e *event) {
|
||||||
if len(wc.incomingEventChan) == incomingBufSize {
|
if len(wc.incomingEventChan) == incomingBufSize {
|
||||||
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow decoding, user not receiving fast, or other processing logic", "incomingEvents", incomingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
klog.V(3).InfoS("Fast watcher, slow processing. Probably caused by slow decoding, user not receiving fast, or other processing logic", "incomingEvents", incomingBufSize, "objectType", wc.watcher.objectType, "groupResource", wc.watcher.groupResource)
|
||||||
}
|
}
|
||||||
|
|
|
@ -174,7 +174,6 @@ func TestWatchErrResultNotBlockAfterCancel(t *testing.T) {
|
||||||
w := store.watcher.createWatchChan(ctx, "/abc", 0, false, false, storage.Everything)
|
w := store.watcher.createWatchChan(ctx, "/abc", 0, false, false, storage.Everything)
|
||||||
// make resultChan and errChan blocking to ensure ordering.
|
// make resultChan and errChan blocking to ensure ordering.
|
||||||
w.resultChan = make(chan watch.Event)
|
w.resultChan = make(chan watch.Event)
|
||||||
w.errChan = make(chan error)
|
|
||||||
// The event flow goes like:
|
// The event flow goes like:
|
||||||
// - first we send an error, it should block on resultChan.
|
// - first we send an error, it should block on resultChan.
|
||||||
// - Then we cancel ctx. The blocking on resultChan should be freed up
|
// - Then we cancel ctx. The blocking on resultChan should be freed up
|
||||||
|
@ -185,8 +184,13 @@ func TestWatchErrResultNotBlockAfterCancel(t *testing.T) {
|
||||||
w.run(false, true)
|
w.run(false, true)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}()
|
}()
|
||||||
w.errChan <- fmt.Errorf("some error")
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
w.sendError(fmt.Errorf("some error"))
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
cancel()
|
cancel()
|
||||||
|
// Ensure that both run() and sendError() don't hung forever.
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue