Don't update health check status during initialDelaySeconds

When InitialDelaySeconds in the kube yaml is set for a helthcheck,
don't update the healthcheck status till those initial delay seconds are over.
We were waiting to update for a failing healtcheck, but when the healthcheck
was successful during the initial delay time, the status was being updated as healthy
immediately.
This is misleading to the users wondering why their healthcheck takes
much longer to fail for a failing case while it is quick to succeed for
a healthy case. It also doesn't match what the k8s InitialDelaySeconds
does. This change is only for kube play, podman healthcheck run is
unaffected.

Signed-off-by: Urvashi Mohnani <umohnani@redhat.com>
This commit is contained in:
Urvashi Mohnani 2023-11-27 10:07:31 -05:00
parent e0a524f630
commit f35d1c1c25
4 changed files with 121 additions and 2 deletions

View File

@ -153,6 +153,10 @@ const (
// of the container
UlimitAnnotation = "io.podman.annotations.ulimit"
// KubeHealthCheckAnnotation is used by kube play to tell podman that any health checks should follow
// the k8s behavior of waiting for the intialDelaySeconds to be over before updating the status
KubeHealthCheckAnnotation = "io.podman.annotations.kube.health.check"
// MaxKubeAnnotation is the max length of annotations allowed by Kubernetes.
MaxKubeAnnotation = 63
)

View File

@ -167,7 +167,7 @@ func (c *Container) runHealthCheck(ctx context.Context, isStartup bool) (define.
}
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod)
logStatus, err := c.updateHealthCheckLog(hcl, inStartPeriod, isStartup)
if err != nil {
return hcResult, "", fmt.Errorf("unable to update health check log %s for %s: %w", c.healthCheckLogPath(), c.ID(), err)
}
@ -375,10 +375,17 @@ func (c *Container) isUnhealthy() (bool, error) {
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod bool) (string, error) {
func (c *Container) updateHealthCheckLog(hcl define.HealthCheckLog, inStartPeriod, isStartup bool) (string, error) {
c.lock.Lock()
defer c.lock.Unlock()
// If we are playing a kube yaml then let's honor the start period time for
// both failing and succeeding cases to match kube behavior.
// So don't update the health check log till the start period is over
if _, ok := c.config.Spec.Annotations[define.KubeHealthCheckAnnotation]; ok && inStartPeriod && !isStartup {
return "", nil
}
healthCheck, err := c.getHealthCheckLog()
if err != nil {
return "", err

View File

@ -431,6 +431,8 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
s.Annotations[define.InspectAnnotationPublishAll] = publishAll
}
s.Annotations[define.KubeHealthCheckAnnotation] = "true"
// Environment Variables
envs := map[string]string{}
for _, env := range imageData.Config.Env {

View File

@ -821,3 +821,109 @@ EOF
run_podman rmi $local_image
}
@test "podman kube play healthcheck should wait initialDelaySeconds before updating status (healthy)" {
fname="$PODMAN_TMPDIR/play_kube_healthy_$(random_string 6).yaml"
echo "
apiVersion: v1
kind: Pod
metadata:
labels:
name: liveness-exec
spec:
containers:
- name: liveness
image: $IMAGE
args:
- /bin/sh
- -c
- touch /tmp/healthy && sleep 100
livenessProbe:
exec:
command:
- cat
- /tmp/healthy
initialDelaySeconds: 3
failureThreshold: 1
periodSeconds: 1
" > $fname
run_podman kube play $fname
ctrName="liveness-exec-liveness"
# Keep checking status. For the first 2 seconds it must be 'starting'
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 2)) ]]; do
run_podman inspect $ctrName --format "1-{{.State.Health.Status}}"
assert "$output" == "1-starting" "Health.Status at $((SECONDS - t0))"
sleep 0.5
done
# After 3 seconds it may take another second to go healthy. Wait.
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 3)) ]]; do
run_podman inspect $ctrName --format "2-{{.State.Health.Status}}"
if [[ "$output" = "2-healthy" ]]; then
break;
fi
sleep 0.5
done
assert $output == "2-healthy" "After 3 seconds"
run_podman kube down $fname
run_podman pod rm -a
run_podman rm -a
}
@test "podman kube play healthcheck should wait initialDelaySeconds before updating status (unhealthy)" {
fname="$PODMAN_TMPDIR/play_kube_unhealthy_$(random_string 6).yaml"
echo "
apiVersion: v1
kind: Pod
metadata:
labels:
name: liveness-exec
spec:
containers:
- name: liveness
image: $IMAGE
args:
- /bin/sh
- -c
- touch /tmp/healthy && sleep 100
livenessProbe:
exec:
command:
- cat
- /tmp/randomfile
initialDelaySeconds: 3
failureThreshold: 1
periodSeconds: 1
" > $fname
run_podman kube play $fname
ctrName="liveness-exec-liveness"
# Keep checking status. For the first 2 seconds it must be 'starting'
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 2)) ]]; do
run_podman inspect $ctrName --format "1-{{.State.Health.Status}}"
assert "$output" == "1-starting" "Health.Status at $((SECONDS - t0))"
sleep 0.5
done
# After 3 seconds it may take another second to go unhealthy. Wait.
t0=$SECONDS
while [[ $SECONDS -le $((t0 + 3)) ]]; do
run_podman inspect $ctrName --format "2-{{.State.Health.Status}}"
if [[ "$output" = "2-unhealthy" ]]; then
break;
fi
sleep 0.5
done
assert $output == "2-unhealthy" "After 3 seconds"
run_podman kube down $fname
run_podman pod rm -a
run_podman rm -a
}