Add retries to certain `linkerd check` checkers (#4171)

## Motivation Testing #4167 has revealed some `linkerd check` failures that occur only because the checks happen too quickly after cluster creation or install. If retried, they pass on the second time. Some checkers already handle this with the `retryDeadline` field. If a checker does not set this field, there is no retry. ## Solution Add retries to the `l5d-existence-replicasets` `l5d-existence-unschedulable-pods` checks so that these checks do not fail during a chained cluster creation > install > check process.
2020-03-16 13:15:42 -07:00 · 2020-03-16 13:15:42 -07:00 · e5b0ea28d4
parent 18b6e4a723
commit e5b0ea28d4
1 changed files with 8 additions and 6 deletions
--- a/pkg/healthcheck/healthcheck.go
+++ b/pkg/healthcheck/healthcheck.go
@ -600,9 +600,10 @@ func (hc *HealthChecker) allCategories() []category {
 					},
 				},
 				{
-					description: "control plane replica sets are ready",
-					hintAnchor:  "l5d-existence-replicasets",
-					fatal:       true,
+					description:   "control plane replica sets are ready",
+					hintAnchor:    "l5d-existence-replicasets",
+					retryDeadline: hc.RetryDeadline,
+					fatal:         true,
 					check: func(context.Context) error {
 						controlPlaneReplicaSet, err := hc.kubeAPI.GetReplicaSets(hc.ControlPlaneNamespace)
 						if err != nil {
@ -612,9 +613,10 @@ func (hc *HealthChecker) allCategories() []category {
 					},
 				},
 				{
-					description: "no unschedulable pods",
-					hintAnchor:  "l5d-existence-unschedulable-pods",
-					fatal:       true,
+					description:   "no unschedulable pods",
+					hintAnchor:    "l5d-existence-unschedulable-pods",
+					retryDeadline: hc.RetryDeadline,
+					fatal:         true,
 					check: func(context.Context) error {
 						// do not save this into hc.controlPlanePods, as this check may
 						// succeed prior to all expected control plane pods being up