From f634ab90c09dc18fd6d7caaefbc5bbe29827428c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20Hu=C3=9F?= Date: Fri, 7 Feb 2020 21:06:14 +0100 Subject: [PATCH] fix(serving): Add an error window for ReadyCondition (#644) * fix(serving): Add an error window for ReadyCondition A default error window of 2 seconds (not sure if this should be made configurable as it is hard to explain/document) cause return an error during sync operations only, if a ReadyCondition == false stays for that long in false. This is needed to compensate situation where the condition is false when a race between revision and route creation occurse during a service creation. This should fix the flakes we have in our E2E tests as described in #544. * fix: words, words * fix(serving): More words + a check --- pkg/wait/wait_for_ready.go | 33 +++++++++++++++++++++++++++++---- pkg/wait/wait_for_ready_test.go | 9 +++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/pkg/wait/wait_for_ready.go b/pkg/wait/wait_for_ready.go index 957fa0f49..d5a5c5637 100644 --- a/pkg/wait/wait_for_ready.go +++ b/pkg/wait/wait_for_ready.go @@ -25,6 +25,10 @@ import ( "knative.dev/pkg/apis" ) +// Window for how long a ReadyCondition == false has to stay +// for being considered as an error +var ErrorWindow = 2 * time.Second + // Callbacks and configuration used while waiting type waitForReadyConfig struct { watchMaker WatchMaker @@ -87,7 +91,7 @@ func (w *waitForReadyConfig) Wait(name string, timeout time.Duration, msgCallbac floatingTimeout := timeout for { start := time.Now() - retry, timeoutReached, err := w.waitForReadyCondition(start, name, floatingTimeout, msgCallback) + retry, timeoutReached, err := w.waitForReadyCondition(start, name, floatingTimeout, ErrorWindow, msgCallback) if err != nil { return err, time.Since(start) } @@ -104,18 +108,31 @@ func (w *waitForReadyConfig) Wait(name string, timeout time.Duration, msgCallbac } } -func (w *waitForReadyConfig) waitForReadyCondition(start time.Time, name string, timeout time.Duration, msgCallback MessageCallback) (retry bool, timeoutReached bool, err error) { +func (w *waitForReadyConfig) waitForReadyCondition(start time.Time, name string, timeout time.Duration, errorWindow time.Duration, msgCallback MessageCallback) (retry bool, timeoutReached bool, err error) { watcher, err := w.watchMaker(name, timeout) if err != nil { return false, false, err } - defer watcher.Stop() + + // channel used to transport the error + errChan := make(chan error) + var errorTimer *time.Timer + // Stop error timer if it has been started because of + // a ConditionReady has been set to false + defer (func() { + if errorTimer != nil { + errorTimer.Stop() + } + })() + for { select { case <-time.After(timeout): return false, true, nil + case err = <-errChan: + return false, false, err case event, ok := <-watcher.ResultChan(): if !ok || event.Object == nil { return true, false, nil @@ -140,7 +157,15 @@ func (w *waitForReadyConfig) waitForReadyCondition(start time.Time, name string, case corev1.ConditionTrue: return false, false, nil case corev1.ConditionFalse: - return false, false, fmt.Errorf("%s: %s", cond.Reason, cond.Message) + // Fire up a timer waiting for the error window duration to still allow to reconcile + // to a true condition even after the condition went to false. If this is not the case within + // this window, then an error is returned. + // If there is already a timer running, we just log. + if errorTimer != nil { + errorTimer = time.AfterFunc(errorWindow, func() { + errChan <- fmt.Errorf("%s: %s", cond.Reason, cond.Message) + }) + } } if cond.Message != "" { msgCallback(time.Since(start), cond.Message) diff --git a/pkg/wait/wait_for_ready_test.go b/pkg/wait/wait_for_ready_test.go index fb20f43df..ad80ad19f 100644 --- a/pkg/wait/wait_for_ready_test.go +++ b/pkg/wait/wait_for_ready_test.go @@ -79,6 +79,7 @@ func prepareTestCases(name string) []waitForReadyTestCase { tc(peError, name, true), tc(peWrongGeneration, name, true), tc(peTimeout, name, true), + tc(peReadyFalseWithinErrorWindow, name, false), } } @@ -131,3 +132,11 @@ func peWrongGeneration(name string) ([]watch.Event, int) { {watch.Modified, CreateTestServiceWithConditions(name, corev1.ConditionTrue, corev1.ConditionTrue, "", "", 1, 2)}, }, len(messages) } + +func peReadyFalseWithinErrorWindow(name string) ([]watch.Event, int) { + messages := pMessages(1) + return []watch.Event{ + {watch.Added, CreateTestServiceWithConditions(name, corev1.ConditionFalse, corev1.ConditionFalse, "Route not ready", messages[0])}, + {watch.Modified, CreateTestServiceWithConditions(name, corev1.ConditionTrue, corev1.ConditionTrue, "Route ready", "")}, + }, len(messages) +}