Move Docker "health-check" to DockerBuilder

This commit is contained in:
Ciprian Hacman 2019-12-29 18:42:59 +02:00
parent 88600407f4
commit 507230fe75
11 changed files with 392 additions and 28 deletions

View File

@ -1017,6 +1017,12 @@ func (b *DockerBuilder) Build(c *fi.ModelBuilderContext) error {
return err return err
} }
if b.Distribution.IsDebianFamily() {
c.AddTask(b.buildSystemdHealthCheckScript())
c.AddTask(b.buildSystemdHealthCheckService())
c.AddTask(b.buildSystemdHealthCheckTimer())
}
return nil return nil
} }
@ -1147,6 +1153,60 @@ func (b *DockerBuilder) buildSystemdService(dockerVersionMajor int, dockerVersio
return service return service
} }
func (b *DockerBuilder) buildSystemdHealthCheckScript() *nodetasks.File {
script := &nodetasks.File{
Path: "/opt/kops/bin/docker-healthcheck",
Contents: fi.NewStringResource(resources.DockerHealthCheck),
Type: nodetasks.FileType_File,
Mode: s("0755"),
}
return script
}
func (b *DockerBuilder) buildSystemdHealthCheckService() *nodetasks.Service {
manifest := &systemd.Manifest{}
manifest.Set("Unit", "Description", "Run docker-healthcheck once")
manifest.Set("Unit", "Documentation", "https://kops.sigs.k8s.io")
manifest.Set("Service", "Type", "oneshot")
manifest.Set("Service", "ExecStart", "/opt/kops/bin/docker-healthcheck")
manifest.Set("Install", "WantedBy", "multi-user.target")
manifestString := manifest.Render()
klog.V(8).Infof("Built service manifest %q\n%s", "docker-healthcheck.service", manifestString)
service := &nodetasks.Service{
Name: "docker-healthcheck.service",
Definition: s(manifestString),
}
service.InitDefaults()
return service
}
func (b *DockerBuilder) buildSystemdHealthCheckTimer() *nodetasks.Service {
manifest := &systemd.Manifest{}
manifest.Set("Unit", "Description", "Trigger docker-healthcheck periodically")
manifest.Set("Unit", "Documentation", "https://kops.sigs.k8s.io")
manifest.Set("Timer", "OnUnitInactiveSec", "10s")
manifest.Set("Timer", "Unit", "docker-healthcheck.service")
manifest.Set("Install", "WantedBy", "multi-user.target")
manifestString := manifest.Render()
klog.V(8).Infof("Built timer manifest %q\n%s", "docker-healthcheck.timer", manifestString)
service := &nodetasks.Service{
Name: "docker-healthcheck.timer",
Definition: s(manifestString),
}
service.InitDefaults()
return service
}
// buildContainerOSConfigurationDropIn is responsible for configuring the docker daemon options // buildContainerOSConfigurationDropIn is responsible for configuring the docker daemon options
func (b *DockerBuilder) buildContainerOSConfigurationDropIn(c *fi.ModelBuilderContext) error { func (b *DockerBuilder) buildContainerOSConfigurationDropIn(c *fi.ModelBuilderContext) error {
lines := []string{ lines := []string{

View File

@ -3,8 +3,9 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library( go_library(
name = "go_default_library", name = "go_default_library",
srcs = [ srcs = [
"containerd.go", "containerd_license.go",
"docker.go", "docker_healthcheck.go",
"docker_license.go",
], ],
importpath = "k8s.io/kops/nodeup/pkg/model/resources", importpath = "k8s.io/kops/nodeup/pkg/model/resources",
visibility = ["//visibility:public"], visibility = ["//visibility:public"],

View File

@ -1,6 +1,24 @@
#!/bin/bash /*
Copyright 2019 The Kubernetes Authors.
# Copyright 2015 The Kubernetes Authors All rights reserved. Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resources
var DockerHealthCheck = `#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -18,7 +36,7 @@
# of docker. If it detects a failure, it will restart docker using systemctl. # of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() { healthcheck() {
if output=`timeout 60 docker network ls`; then if output=` + "`timeout 60 docker network ls`" + `; then
echo "$output" | fgrep -qw host || { echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing" echo "docker 'host' network missing"
return 1 return 1
@ -47,7 +65,7 @@ echo "docker still unresponsive; triggering docker restart"
systemctl stop docker systemctl stop docker
echo "wait all tcp sockets to close" echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout` sleep ` + "`cat /proc/sys/net/ipv4/tcp_fin_timeout`" + `
sleep 10 sleep 10
systemctl start docker systemctl start docker
@ -61,3 +79,4 @@ if healthcheck; then
fi fi
echo "docker still failing" echo "docker still failing"
`

View File

@ -4,6 +4,74 @@ contents: |-
path: /etc/sysconfig/docker path: /etc/sysconfig/docker
type: file type: file
--- ---
contents: |
#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=`timeout 60 docker network ls`; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
}
else
echo "docker returned $?"
return 1
fi
}
if healthcheck; then
echo "docker healthy"
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout`
sleep 10
systemctl start docker
echo "Waiting 120 seconds to give docker time to start"
sleep 60
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"
mode: "0755"
path: /opt/kops/bin/docker-healthcheck
type: file
---
contents: |2 contents: |2
@ -217,6 +285,40 @@ preventStart: true
source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.1-0~xenial_amd64.deb source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.1-0~xenial_amd64.deb
version: 1.12.1-0~xenial version: 1.12.1-0~xenial
--- ---
Name: docker-healthcheck.service
definition: |
[Unit]
Description=Run docker-healthcheck once
Documentation=https://kops.sigs.k8s.io
[Service]
Type=oneshot
ExecStart=/opt/kops/bin/docker-healthcheck
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker-healthcheck.timer
definition: |
[Unit]
Description=Trigger docker-healthcheck periodically
Documentation=https://kops.sigs.k8s.io
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker.service Name: docker.service
definition: | definition: |
[Unit] [Unit]

View File

@ -4,6 +4,74 @@ contents: |-
path: /etc/sysconfig/docker path: /etc/sysconfig/docker
type: file type: file
--- ---
contents: |
#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=`timeout 60 docker network ls`; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
}
else
echo "docker returned $?"
return 1
fi
}
if healthcheck; then
echo "docker healthy"
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout`
sleep 10
systemctl start docker
echo "Waiting 120 seconds to give docker time to start"
sleep 60
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"
mode: "0755"
path: /opt/kops/bin/docker-healthcheck
type: file
---
contents: |2 contents: |2
@ -217,6 +285,40 @@ preventStart: true
source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.3-0~xenial_amd64.deb source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.3-0~xenial_amd64.deb
version: 1.12.3-0~xenial version: 1.12.3-0~xenial
--- ---
Name: docker-healthcheck.service
definition: |
[Unit]
Description=Run docker-healthcheck once
Documentation=https://kops.sigs.k8s.io
[Service]
Type=oneshot
ExecStart=/opt/kops/bin/docker-healthcheck
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker-healthcheck.timer
definition: |
[Unit]
Description=Trigger docker-healthcheck periodically
Documentation=https://kops.sigs.k8s.io
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker.service Name: docker.service
definition: | definition: |
[Unit] [Unit]
@ -236,7 +338,6 @@ definition: |
LimitNOFILE=1048576 LimitNOFILE=1048576
LimitNPROC=1048576 LimitNPROC=1048576
LimitCORE=infinity LimitCORE=infinity
TasksMax=infinity
Restart=always Restart=always
RestartSec=2s RestartSec=2s
StartLimitInterval=0 StartLimitInterval=0

View File

@ -4,6 +4,74 @@ contents: |-
path: /etc/sysconfig/docker path: /etc/sysconfig/docker
type: file type: file
--- ---
contents: |
#!/bin/bash
# Copyright 2019 The Kubernetes Authors All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is intended to be run periodically, to check the health
# of docker. If it detects a failure, it will restart docker using systemctl.
healthcheck() {
if output=`timeout 60 docker network ls`; then
echo "$output" | fgrep -qw host || {
echo "docker 'host' network missing"
return 1
}
else
echo "docker returned $?"
return 1
fi
}
if healthcheck; then
echo "docker healthy"
exit 0
fi
echo "docker failed"
echo "Giving docker 30 seconds grace before restarting"
sleep 30
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still unresponsive; triggering docker restart"
systemctl stop docker
echo "wait all tcp sockets to close"
sleep `cat /proc/sys/net/ipv4/tcp_fin_timeout`
sleep 10
systemctl start docker
echo "Waiting 120 seconds to give docker time to start"
sleep 60
if healthcheck; then
echo "docker recovered"
exit 0
fi
echo "docker still failing"
mode: "0755"
path: /opt/kops/bin/docker-healthcheck
type: file
---
contents: |2 contents: |2
@ -217,6 +285,40 @@ preventStart: true
source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.3-0~xenial_amd64.deb source: http://apt.dockerproject.org/repo/pool/main/d/docker-engine/docker-engine_1.12.3-0~xenial_amd64.deb
version: 1.12.3-0~xenial version: 1.12.3-0~xenial
--- ---
Name: docker-healthcheck.service
definition: |
[Unit]
Description=Run docker-healthcheck once
Documentation=https://kops.sigs.k8s.io
[Service]
Type=oneshot
ExecStart=/opt/kops/bin/docker-healthcheck
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker-healthcheck.timer
definition: |
[Unit]
Description=Trigger docker-healthcheck periodically
Documentation=https://kops.sigs.k8s.io
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target
enabled: true
manageState: true
running: true
smartRestart: true
---
Name: docker.service Name: docker.service
definition: | definition: |
[Unit] [Unit]

View File

@ -1,9 +0,0 @@
[Unit]
Description=Run docker-healthcheck once
[Service]
Type=oneshot
ExecStart=/opt/kubernetes/helpers/docker-healthcheck
[Install]
WantedBy=multi-user.target

View File

@ -1,9 +0,0 @@
[Unit]
Description=Trigger docker-healthcheck periodically
[Timer]
OnUnitInactiveSec=10s
Unit=docker-healthcheck.service
[Install]
WantedBy=multi-user.target