client/pkg/wait/wait_for_ready.go

301 lines
10 KiB
Go

// Copyright © 2019 The Knative Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package wait
import (
"context"
"fmt"
"io"
"time"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"knative.dev/pkg/apis"
)
// Callbacks and configuration used while waiting
type waitForReadyConfig struct {
conditionsExtractor ConditionsExtractor
kind string
}
// Callbacks and configuration used while waiting for event
type waitForEvent struct {
eventDone EventDone
kind string
}
// EventDone is a marker to stop actual waiting on given event state
type EventDone func(ev *watch.Event) bool
// Interface used for waiting of a resource of a given name to reach a definitive
// state in its "Ready" condition.
type Wait interface {
// Wait on resource the resource with this name
// and write event messages for unknown event to the status writer.
// Returns an error (if any) and the overall time it took to wait
Wait(ctx context.Context, watcher watch.Interface, name string, options Options, msgCallback MessageCallback) (error, time.Duration)
}
type Options struct {
// Window for how long a ReadyCondition == false has to stay
// for being considered as an error (useful for flaky reconciliation
ErrorWindow *time.Duration
// Timeout for how long to wait at maximum
Timeout *time.Duration
}
// Create watch which is used when waiting for Ready condition
type WatchMaker func(name string, timeout time.Duration) (watch.Interface, error)
// Extract conditions from a runtime object
type ConditionsExtractor func(obj runtime.Object) (apis.Conditions, error)
// Callback for event messages
type MessageCallback func(durationSinceState time.Duration, message string)
// NewWaitForReady waits until the condition is set to Ready == True
func NewWaitForReady(kind string, extractor ConditionsExtractor) Wait {
return &waitForReadyConfig{
kind: kind,
conditionsExtractor: extractor,
}
}
// NewWaitForEvent creates a Wait object which waits until a specific event (i.e. when
// the EventDone function returns true)
func NewWaitForEvent(kind string, eventDone EventDone) Wait {
return &waitForEvent{
kind: kind,
eventDone: eventDone,
}
}
// SimpleMessageCallback returns a callback which prints out a simple event message to a given writer
func SimpleMessageCallback(out io.Writer) MessageCallback {
oldMessage := ""
return func(duration time.Duration, message string) {
txt := message
if message == oldMessage {
txt = "..."
}
fmt.Fprintf(out, "%7.3fs %s\n", float64(duration.Round(time.Millisecond))/float64(time.Second), txt)
oldMessage = message
}
}
// NoopMessageCallback is callback which does nothing
func NoopMessageCallback() MessageCallback {
return func(durationSinceState time.Duration, message string) {}
}
// Wait until a resource enters condition of type "Ready" to "False" or "True".
// `watchFunc` creates the actual watch, `kind` is the type what your are watching for
// (e.g. "service"), `timeout` is a timeout after which the watch should be cancelled if no
// target state has been entered yet and `out` is used for printing out status messages
// msgCallback gets called for every event with an 'Ready' condition == UNKNOWN with the event's message.
func (w *waitForReadyConfig) Wait(ctx context.Context, watcher watch.Interface, name string, options Options, msgCallback MessageCallback) (error, time.Duration) {
timeout := options.timeoutWithDefault()
timeoutTimer := time.NewTimer(timeout)
defer timeoutTimer.Stop()
for {
start := time.Now()
retry, timeoutReached, err := w.waitForReadyCondition(ctx, watcher, start, timeoutTimer, options.errorWindowWithDefault(), msgCallback)
if err != nil {
return err, time.Since(start)
}
if timeoutReached {
return fmt.Errorf("timeout: %s '%s' not ready after %d seconds", w.kind, name, int(timeout/time.Second)), time.Since(start)
}
if retry {
// sleep to prevent CPU pegging and restart the loop
time.Sleep(pollInterval)
continue
}
return nil, time.Since(start)
}
}
// waitForReadyCondition waits until the status condition "Ready" is set to true (good path) or return an error
// when the "Ready" condition is set to false. An error is also returned when the given timeout is reached (plus the
// return value of timeoutReached is set to true in this case).
// An errorWindow can be specified which takes into account of intermediate "false" ready conditions. So before returning
// an error, this methods waits for the errorWindow duration and if an "True" or "Unknown" event arrives in the meantime
// for the "Ready" condition, then the method continues to wait.
func (w *waitForReadyConfig) waitForReadyCondition(
ctx context.Context, watcher watch.Interface, start time.Time, timeoutTimer *time.Timer, errorWindow time.Duration, msgCallback MessageCallback,
) (retry bool, timeoutReached bool, err error) {
// channel used to transport the error that has been received
errChan := make(chan error)
var errorTimer *time.Timer
// Stop error timer if it has been started because of
// a ConditionReady has been set to false
defer (func() {
if errorTimer != nil {
errorTimer.Stop()
errorTimer = nil
}
})()
for {
select {
case <-ctx.Done():
return false, false, ctx.Err()
case <-timeoutTimer.C:
// We reached a timeout without receiving a "Ready" == "True" event
return false, true, nil
case err = <-errChan:
// The error timer fired and we have not received a recovery event ("True" / "Unknown") in the
// meantime. So the error status is considered to be final.
return false, false, err
case event, ok := <-watcher.ResultChan():
if !ok || event.Object == nil {
// retry only if the channel is still open
return ok, false, nil
}
// Check whether resource is in sync already (meta.generation == status.observedGeneration)
inSync, err := generationCheck(event.Object)
if err != nil {
return false, false, err
}
// Skip events if generations has not yet been consolidated, regardless of type.
// Wait for the next event to come in until the generations align
if !inSync {
continue
}
// Skip event if its not a MODIFIED event, as only MODIFIED events update the condition
// we are looking for.
// This will filter out all synthetic ADDED events that created bt the API server for
// the initial state. See https://kubernetes.io/docs/reference/using-api/api-concepts/#the-resourceversion-parameter
// for details:
// "Get State and Start at Most Recent: Start a watch at the most recent resource version,
// which must be consistent (i.e. served from etcd via a quorum read). To establish initial state,
// the watch begins with synthetic “Added” events of all resources instances that exist at the starting
// resource version. All following watch events are for all changes that occurred after the resource
// version the watch started at."
if event.Type != watch.Modified {
continue
}
conditions, err := w.conditionsExtractor(event.Object)
if err != nil {
return false, false, err
}
for _, cond := range conditions {
if cond.Type == apis.ConditionReady {
switch cond.Status {
case corev1.ConditionTrue:
// Any error timer running will be cancelled by the defer method that has been set above
return false, false, nil
case corev1.ConditionFalse:
// Fire up a timer waiting for the error window duration to still allow to reconcile
// to a true condition even after the condition went to false. If this is not the case within
// this window, then an error is returned.
// If there is already a timer running, we just log.
if errorTimer == nil {
err := fmt.Errorf("%s: %s", cond.Reason, cond.Message)
errorTimer = time.AfterFunc(errorWindow, func() {
errChan <- err
})
}
case corev1.ConditionUnknown:
// If an errorTimer is triggered because of a previous "False" event, but now
// we received an "Unknown" event during the error window, cancel the error timer
// to avoid to receive an error signal.
if errorTimer != nil {
errorTimer.Stop()
errorTimer = nil
}
}
if cond.Message != "" {
msgCallback(time.Since(start), cond.Message)
}
}
}
}
}
}
// Wait until the expected EventDone is satisfied
func (w *waitForEvent) Wait(ctx context.Context, watcher watch.Interface, name string, options Options, msgCallback MessageCallback) (error, time.Duration) {
timeout := options.timeoutWithDefault()
start := time.Now()
// channel used to transport the error
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err(), time.Since(start)
case <-timer.C:
return fmt.Errorf("timeout: %s '%s' not ready after %d seconds", w.kind, name, int(timeout/time.Second)), time.Since(start)
case event := <-watcher.ResultChan():
if w.eventDone(&event) {
return nil, time.Since(start)
}
}
}
}
func generationCheck(object runtime.Object) (bool, error) {
unstructured, err := runtime.DefaultUnstructuredConverter.ToUnstructured(object)
if err != nil {
return false, err
}
meta, ok := unstructured["metadata"].(map[string]interface{})
if !ok {
return false, fmt.Errorf("cannot extract metadata from %v", object)
}
status, ok := unstructured["status"].(map[string]interface{})
if !ok {
return false, fmt.Errorf("cannot extract status from %v", object)
}
observedGeneration, ok := status["observedGeneration"]
if !ok {
// Can be the case if not status has been attached yet
return false, nil
}
givenGeneration, ok := meta["generation"]
if !ok {
return false, fmt.Errorf("no field 'generation' in metadata of %v", object)
}
return givenGeneration == observedGeneration, nil
}
func (o Options) timeoutWithDefault() time.Duration {
if o.Timeout != nil {
return *o.Timeout
}
return 60 * time.Second
}
func (o Options) errorWindowWithDefault() time.Duration {
if o.ErrorWindow != nil {
return *o.ErrorWindow
}
return 2 * time.Second
}