healtcheck phase 2

integration of healthcheck into create and run as well as inspect.
healthcheck enhancements are as follows:

* add the following options to create|run so that non-docker images can
define healthchecks at the container level.
  * --healthcheck-command
  * --healthcheck-retries
  * --healthcheck-interval
  * --healthcheck-start-period

* podman create|run --healthcheck-command=none disables healthcheck as
described by an image.
* the healthcheck itself and the healthcheck "history" can now be
observed in podman inspect
* added the wiring for healthcheck history which logs the health history
of the container, the current failed streak attempts, and log entries
for the last five attempts which themselves have start and stop times,
result, and a 500 character truncated (if needed) log of stderr/stdout.

The timings themselves are not implemented in this PR but will be in
future enablement (i.e. next).

Signed-off-by: baude <bbaude@redhat.com>
This commit is contained in:
baude 2019-03-06 12:12:35 -06:00
parent 7038cac53c
commit 03716cf7f3
12 changed files with 351 additions and 61 deletions

View File

@ -287,7 +287,26 @@ func getCreateFlags(c *cliconfig.PodmanCommand) {
createFlags.Bool(
"help", false, "",
)
createFlags.String(
"healthcheck-command", "",
"set a healthcheck command for the container ('none' disables the existing healthcheck)",
)
createFlags.String(
"healthcheck-interval", "30s",
"set an interval for the healthchecks",
)
createFlags.Uint(
"healthcheck-retries", 3,
"the number of retries allowed before a healthcheck is considered to be unhealthy",
)
createFlags.String(
"healthcheck-start-period", "0s",
"the initialization time needed for a container to bootstrap",
)
createFlags.String(
"healthcheck-timeout", "30s",
"the maximum time allowed to complete the healthcheck before an interval is considered failed",
)
createFlags.StringP(
"hostname", "h", "",
"Set container hostname",

View File

@ -48,6 +48,6 @@ func healthCheckCmd(c *cliconfig.HealthCheckValues) error {
}
return err
}
fmt.Println("\nhealthy")
fmt.Println("healthy")
return nil
}

View File

@ -523,6 +523,7 @@ func GetCtrInspectInfo(config *libpod.ContainerConfig, ctrInspectData *inspect.C
StopSignal: config.StopSignal,
Cmd: config.Spec.Process.Args,
Entrypoint: strings.Join(createArtifact.Entrypoint, " "),
Healthcheck: config.HealthCheckConfig,
},
}
return data, nil

View File

@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/image/manifest"
"github.com/containers/libpod/cmd/podman/cliconfig"
@ -26,6 +27,7 @@ import (
"github.com/docker/docker/pkg/signal"
"github.com/docker/go-connections/nat"
"github.com/docker/go-units"
"github.com/google/shlex"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/opentracing/opentracing-go"
@ -40,8 +42,7 @@ func getContext() context.Context {
func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *libpod.Runtime) (*libpod.Container, *cc.CreateConfig, error) {
var (
hasHealthCheck bool
healthCheck *manifest.Schema2HealthConfig
healthCheck *manifest.Schema2HealthConfig
)
if c.Bool("trace") {
span, _ := opentracing.StartSpanFromContext(ctx, "createContainer")
@ -89,18 +90,31 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
imageName = newImage.ID()
}
// add healthcheck if it exists AND is correct mediatype
_, mediaType, err := newImage.Manifest(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID())
}
if mediaType == manifest.DockerV2Schema2MediaType {
healthCheck, err = newImage.GetHealthCheck(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0])
}
if healthCheck != nil {
hasHealthCheck = true
var healthCheckCommandInput string
// if the user disabled the healthcheck with "none", we skip adding it
healthCheckCommandInput = c.String("healthcheck-command")
// the user didnt disable the healthcheck but did pass in a healthcheck command
// now we need to make a healthcheck from the commandline input
if healthCheckCommandInput != "none" {
if len(healthCheckCommandInput) > 0 {
healthCheck, err = makeHealthCheckFromCli(c)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to create healthcheck")
}
} else {
// the user did not disable the health check and did not pass in a healthcheck
// command as input. so now we add healthcheck if it exists AND is correct mediatype
_, mediaType, err := newImage.Manifest(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID())
}
if mediaType == manifest.DockerV2Schema2MediaType {
healthCheck, err = newImage.GetHealthCheck(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0])
}
}
}
}
}
@ -111,7 +125,6 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
// Because parseCreateOpts does derive anything from the image, we add health check
// at this point. The rest is done by WithOptions.
createConfig.HasHealthCheck = hasHealthCheck
createConfig.HealthCheck = healthCheck
ctr, err := CreateContainerFromCreateConfig(runtime, createConfig, ctx, nil)
@ -835,3 +848,58 @@ var defaultEnvVariables = map[string]string{
"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM": "xterm",
}
func makeHealthCheckFromCli(c *cliconfig.PodmanCommand) (*manifest.Schema2HealthConfig, error) {
inCommand := c.String("healthcheck-command")
inInterval := c.String("healthcheck-interval")
inRetries := c.Uint("healthcheck-retries")
inTimeout := c.String("healthcheck-timeout")
inStartPeriod := c.String("healthcheck-start-period")
// Every healthcheck requires a command
if len(inCommand) == 0 {
return nil, errors.New("Must define a healthcheck command for all healthchecks")
}
cmd, err := shlex.Split(inCommand)
if err != nil {
return nil, errors.Wrap(err, "failed to parse healthcheck command")
}
hc := manifest.Schema2HealthConfig{
Test: cmd,
}
intervalDuration, err := time.ParseDuration(inInterval)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-interval %s ", inInterval)
}
if intervalDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-interval must be at least 1 second")
}
hc.Interval = intervalDuration
if inRetries < 1 {
return nil, errors.New("healthcheck-retries must be greater than 0.")
}
timeoutDuration, err := time.ParseDuration(inTimeout)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-timeout %s", inTimeout)
}
if timeoutDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-timeout must be at least 1 second")
}
hc.Timeout = timeoutDuration
startPeriodDuration, err := time.ParseDuration(inStartPeriod)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-start-period %s", inStartPeriod)
}
if startPeriodDuration < time.Duration(0) {
return nil, errors.New("healthcheck-start-period must be a 0 seconds or greater")
}
hc.StartPeriod = startPeriodDuration
return &hc, nil
}

View File

@ -256,6 +256,23 @@ The following example maps uids 0-2000 in the container to the uids 30000-31999
Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**=""
Container host name

View File

@ -264,6 +264,23 @@ The example maps gids 0-2000 in the container to the gids 30000-31999 on the hos
Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**=""
Container host name

View File

@ -368,7 +368,7 @@ type ContainerConfig struct {
Systemd bool `json:"systemd"`
// HealtchCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
}
// ContainerStatus returns a string representation for users

View File

@ -127,6 +127,17 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data)
IsInfra: c.IsInfra(),
}
if c.config.HealthCheckConfig != nil {
// This container has a healthcheck defined in it; we need to add it's state
healthCheckState, err := c.GetHealthCheckLog()
if err != nil {
// An error here is not considered fatal; no health state will be displayed
logrus.Error(err)
} else {
data.State.Healthcheck = healthCheckState
}
}
// Copy port mappings into network settings
if config.PortMappings != nil {
data.NetworkSettings.Ports = config.PortMappings

View File

@ -1,9 +1,15 @@
package libpod
import (
"bufio"
"bytes"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"github.com/containers/libpod/pkg/inspect"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -30,8 +36,29 @@ const (
HealthCheckInternalError HealthCheckStatus = iota
// HealthCheckDefined means the healthcheck was found on the container
HealthCheckDefined HealthCheckStatus = iota
// MaxHealthCheckNumberLogs is the maximum number of attempts we keep
// in the healtcheck history file
MaxHealthCheckNumberLogs int = 5
// MaxHealthCheckLogLength in characters
MaxHealthCheckLogLength = 500
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
// HealthCheckUnhealthy describes an unhealthy container
HealthCheckUnhealthy string = "unhealthy"
)
// hcWriteCloser allows us to use bufio as a WriteCloser
type hcWriteCloser struct {
*bufio.Writer
}
// Used to add a closer to bufio
func (hcwc hcWriteCloser) Close() error {
return nil
}
// HealthCheck verifies the state and validity of the healthcheck configuration
// on the container and then executes the healthcheck
func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
@ -48,33 +75,51 @@ func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
// RunHealthCheck runs the health check as defined by the container
func (c *Container) RunHealthCheck() (HealthCheckStatus, error) {
var newCommand []string
var (
newCommand []string
returnCode int
capture bytes.Buffer
)
hcStatus, err := checkHealthCheckCanBeRun(c)
if err != nil {
return hcStatus, err
}
hcCommand := c.HealthCheckConfig().Test
if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" {
newCommand = []string{"sh", "-c"}
newCommand = append(newCommand, hcCommand[1:]...)
newCommand = []string{"sh", "-c", strings.Join(hcCommand[1:], " ")}
} else {
newCommand = hcCommand
}
// TODO when history/logging is implemented for healthcheck, we need to change the output streams
// so we can capture i/o
captureBuffer := bufio.NewWriter(&capture)
hcw := hcWriteCloser{
captureBuffer,
}
streams := new(AttachStreams)
streams.OutputStream = os.Stdout
streams.ErrorStream = os.Stderr
streams.OutputStream = hcw
streams.ErrorStream = hcw
streams.InputStream = os.Stdin
streams.AttachOutput = true
streams.AttachError = true
streams.AttachInput = true
logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID())
if err := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0); err != nil {
return HealthCheckFailure, err
timeStart := time.Now()
hcResult := HealthCheckSuccess
hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0)
if hcErr != nil {
hcResult = HealthCheckFailure
returnCode = 1
}
return HealthCheckSuccess, nil
timeEnd := time.Now()
eventLog := capture.String()
if len(eventLog) > MaxHealthCheckLogLength {
eventLog = eventLog[:MaxHealthCheckLogLength]
}
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl); err != nil {
return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID())
}
return hcResult, hcErr
}
func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
@ -90,3 +135,67 @@ func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
}
return HealthCheckDefined, nil
}
func newHealthCheckLog(start, end time.Time, exitCode int, log string) inspect.HealthCheckLog {
return inspect.HealthCheckLog{
Start: start.Format(time.RFC3339Nano),
End: end.Format(time.RFC3339Nano),
ExitCode: exitCode,
Output: log,
}
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error {
healthCheck, err := c.GetHealthCheckLog()
if err != nil {
return err
}
if hcl.ExitCode == 0 {
// set status to healthy, reset failing state to 0
healthCheck.Status = HealthCheckHealthy
healthCheck.FailingStreak = 0
} else {
if len(healthCheck.Status) < 1 {
healthCheck.Status = HealthCheckHealthy
}
// increment failing streak
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
// if failing streak > retries, then status to unhealthy
if int(healthCheck.FailingStreak) > c.HealthCheckConfig().Retries {
healthCheck.Status = HealthCheckUnhealthy
}
}
healthCheck.Log = append(healthCheck.Log, hcl)
if len(healthCheck.Log) > MaxHealthCheckNumberLogs {
healthCheck.Log = healthCheck.Log[1:]
}
newResults, err := json.Marshal(healthCheck)
if err != nil {
return errors.Wrapf(err, "unable to marshall healthchecks for writing")
}
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
}
// HealthCheckLogPath returns the path for where the health check log is
func (c *Container) healthCheckLogPath() string {
return filepath.Join(filepath.Dir(c.LogPath()), "healthcheck.log")
}
// GetHealthCheckLog returns HealthCheck results by reading the container's
// health check log file. If the health check log file does not exist, then
// an empty healthcheck struct is returned
func (c *Container) GetHealthCheckLog() (inspect.HealthCheckResults, error) {
var healthCheck inspect.HealthCheckResults
if _, err := os.Stat(c.healthCheckLogPath()); os.IsNotExist(err) {
return healthCheck, nil
}
b, err := ioutil.ReadFile(c.healthCheckLogPath())
if err != nil {
return healthCheck, errors.Wrapf(err, "failed to read health check log file %s", c.healthCheckLogPath())
}
if err := json.Unmarshal(b, &healthCheck); err != nil {
return healthCheck, errors.Wrapf(err, "failed to unmarshal existing healthcheck results in %s", c.healthCheckLogPath())
}
return healthCheck, nil
}

View File

@ -3,11 +3,12 @@ package inspect
import (
"time"
"github.com/containers/image/manifest"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/docker/go-connections/nat"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-spec/specs-go"
)
// ContainerData holds the podman inspect data for a container
@ -78,24 +79,25 @@ type HostConfig struct {
// CtrConfig holds information about the container configuration
type CtrConfig struct {
Hostname string `json:"Hostname"`
DomainName string `json:"Domainname"` //TODO
User specs.User `json:"User"`
AttachStdin bool `json:"AttachStdin"` //TODO
AttachStdout bool `json:"AttachStdout"` //TODO
AttachStderr bool `json:"AttachStderr"` //TODO
Tty bool `json:"Tty"`
OpenStdin bool `json:"OpenStdin"`
StdinOnce bool `json:"StdinOnce"` //TODO
Env []string `json:"Env"`
Cmd []string `json:"Cmd"`
Image string `json:"Image"`
Volumes map[string]struct{} `json:"Volumes"`
WorkingDir string `json:"WorkingDir"`
Entrypoint string `json:"Entrypoint"`
Labels map[string]string `json:"Labels"`
Annotations map[string]string `json:"Annotations"`
StopSignal uint `json:"StopSignal"`
Hostname string `json:"Hostname"`
DomainName string `json:"Domainname"` //TODO
User specs.User `json:"User"`
AttachStdin bool `json:"AttachStdin"` //TODO
AttachStdout bool `json:"AttachStdout"` //TODO
AttachStderr bool `json:"AttachStderr"` //TODO
Tty bool `json:"Tty"`
OpenStdin bool `json:"OpenStdin"`
StdinOnce bool `json:"StdinOnce"` //TODO
Env []string `json:"Env"`
Cmd []string `json:"Cmd"`
Image string `json:"Image"`
Volumes map[string]struct{} `json:"Volumes"`
WorkingDir string `json:"WorkingDir"`
Entrypoint string `json:"Entrypoint"`
Labels map[string]string `json:"Labels"`
Annotations map[string]string `json:"Annotations"`
StopSignal uint `json:"StopSignal"`
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
}
// LogConfig holds the log information for a container
@ -178,18 +180,19 @@ type ContainerInspectData struct {
// ContainerInspectState represents the state of a container.
type ContainerInspectState struct {
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
Healthcheck HealthCheckResults `json:"Healthcheck,omitempty"`
}
// NetworkSettings holds information about the newtwork settings of the container
@ -227,3 +230,25 @@ type ImageResult struct {
Labels map[string]string
Dangling bool
}
// HealthCheckResults describes the results/logs from a healthcheck
type HealthCheckResults struct {
// Status healthy or unhealthy
Status string `json:"Status"`
// FailingStreak is the number of consecutive failed healthchecks
FailingStreak int `json:"FailingStreak"`
// Log describes healthcheck attempts and results
Log []HealthCheckLog `json:"Log"`
}
// HealthCheckLog describes the results of a single healthcheck
type HealthCheckLog struct {
// Start time as string
Start string `json:"Start"`
// End time as a string
End string `json:"End"`
// Exitcode is 0 or 1
ExitCode int `json:"ExitCode"`
// Output is the stdout/stderr from the healthcheck command
Output string `json:"Output"`
}

View File

@ -87,7 +87,6 @@ type CreateConfig struct {
Env map[string]string //env
ExposedPorts map[nat.Port]struct{}
GroupAdd []string // group-add
HasHealthCheck bool
HealthCheck *manifest.Schema2HealthConfig
HostAdd []string //add-host
Hostname string //hostname
@ -562,7 +561,7 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime, pod *l
// Always use a cleanup process to clean up Podman after termination
options = append(options, libpod.WithExitCommand(c.createExitCommand()))
if c.HasHealthCheck {
if c.HealthCheck != nil {
options = append(options, libpod.WithHealthCheck(c.HealthCheck))
logrus.Debugf("New container has a health check")
}

View File

@ -719,4 +719,28 @@ USER mail`
Expect(session.OutputToString()).To(Not(ContainSubstring("/dev/shm type tmpfs (ro,")))
})
It("podman run with bad healthcheck interval", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-interval", "0.5s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck retries", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-retries", "0", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck timeout", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-timeout", "0s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck start-period", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-start-period", "-1s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
})