mirror of https://github.com/kubernetes/kops.git
Fix docker-healthcheck to work around Docker bug.
This is a workaround to better detect moby/moby#38642 when Docker starts up and remains stuck. In this case `docker ps` will return nothing and exit 0, but no container can actually start. A better (but more expensive and more intrusive test) would be to `docker run --rm` some cheap test container to confirm we can actually start a container.
This commit is contained in:
parent
aa87f63b1a
commit
b67cf8fbcf
|
|
@ -17,7 +17,19 @@
|
|||
# This script is intended to be run periodically, to check the health
|
||||
# of docker. If it detects a failure, it will restart docker using systemctl.
|
||||
|
||||
if timeout 60 docker ps > /dev/null; then
|
||||
healthcheck() {
|
||||
if output=`timeout 60 docker network ls`; then
|
||||
echo "$output" | fgrep -qw host || {
|
||||
echo "docker 'host' network missing"
|
||||
return 1
|
||||
}
|
||||
else
|
||||
echo "docker returned $?"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
if healthcheck; then
|
||||
echo "docker healthy"
|
||||
exit 0
|
||||
fi
|
||||
|
|
@ -26,7 +38,7 @@ echo "docker failed"
|
|||
echo "Giving docker 30 seconds grace before restarting"
|
||||
sleep 30
|
||||
|
||||
if timeout 60 docker ps > /dev/null; then
|
||||
if healthcheck; then
|
||||
echo "docker recovered"
|
||||
exit 0
|
||||
fi
|
||||
|
|
@ -43,7 +55,7 @@ systemctl start docker
|
|||
echo "Waiting 120 seconds to give docker time to start"
|
||||
sleep 60
|
||||
|
||||
if timeout 60 docker ps > /dev/null; then
|
||||
if healthcheck; then
|
||||
echo "docker recovered"
|
||||
exit 0
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in New Issue