healtcheck phase 2

integration of healthcheck into create and run as well as inspect.
healthcheck enhancements are as follows:

* add the following options to create|run so that non-docker images can
define healthchecks at the container level.
  * --healthcheck-command
  * --healthcheck-retries
  * --healthcheck-interval
  * --healthcheck-start-period

* podman create|run --healthcheck-command=none disables healthcheck as
described by an image.
* the healthcheck itself and the healthcheck "history" can now be
observed in podman inspect
* added the wiring for healthcheck history which logs the health history
of the container, the current failed streak attempts, and log entries
for the last five attempts which themselves have start and stop times,
result, and a 500 character truncated (if needed) log of stderr/stdout.

The timings themselves are not implemented in this PR but will be in
future enablement (i.e. next).

Signed-off-by: baude <bbaude@redhat.com>
This commit is contained in:
baude 2019-03-06 12:12:35 -06:00
parent 7038cac53c
commit 03716cf7f3
12 changed files with 351 additions and 61 deletions

View File

@ -287,7 +287,26 @@ func getCreateFlags(c *cliconfig.PodmanCommand) {
createFlags.Bool( createFlags.Bool(
"help", false, "", "help", false, "",
) )
createFlags.String(
"healthcheck-command", "",
"set a healthcheck command for the container ('none' disables the existing healthcheck)",
)
createFlags.String(
"healthcheck-interval", "30s",
"set an interval for the healthchecks",
)
createFlags.Uint(
"healthcheck-retries", 3,
"the number of retries allowed before a healthcheck is considered to be unhealthy",
)
createFlags.String(
"healthcheck-start-period", "0s",
"the initialization time needed for a container to bootstrap",
)
createFlags.String(
"healthcheck-timeout", "30s",
"the maximum time allowed to complete the healthcheck before an interval is considered failed",
)
createFlags.StringP( createFlags.StringP(
"hostname", "h", "", "hostname", "h", "",
"Set container hostname", "Set container hostname",

View File

@ -48,6 +48,6 @@ func healthCheckCmd(c *cliconfig.HealthCheckValues) error {
} }
return err return err
} }
fmt.Println("\nhealthy") fmt.Println("healthy")
return nil return nil
} }

View File

@ -523,6 +523,7 @@ func GetCtrInspectInfo(config *libpod.ContainerConfig, ctrInspectData *inspect.C
StopSignal: config.StopSignal, StopSignal: config.StopSignal,
Cmd: config.Spec.Process.Args, Cmd: config.Spec.Process.Args,
Entrypoint: strings.Join(createArtifact.Entrypoint, " "), Entrypoint: strings.Join(createArtifact.Entrypoint, " "),
Healthcheck: config.HealthCheckConfig,
}, },
} }
return data, nil return data, nil

View File

@ -11,6 +11,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
"time"
"github.com/containers/image/manifest" "github.com/containers/image/manifest"
"github.com/containers/libpod/cmd/podman/cliconfig" "github.com/containers/libpod/cmd/podman/cliconfig"
@ -26,6 +27,7 @@ import (
"github.com/docker/docker/pkg/signal" "github.com/docker/docker/pkg/signal"
"github.com/docker/go-connections/nat" "github.com/docker/go-connections/nat"
"github.com/docker/go-units" "github.com/docker/go-units"
"github.com/google/shlex"
spec "github.com/opencontainers/runtime-spec/specs-go" spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label" "github.com/opencontainers/selinux/go-selinux/label"
"github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go"
@ -40,7 +42,6 @@ func getContext() context.Context {
func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *libpod.Runtime) (*libpod.Container, *cc.CreateConfig, error) { func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *libpod.Runtime) (*libpod.Container, *cc.CreateConfig, error) {
var ( var (
hasHealthCheck bool
healthCheck *manifest.Schema2HealthConfig healthCheck *manifest.Schema2HealthConfig
) )
if c.Bool("trace") { if c.Bool("trace") {
@ -89,7 +90,21 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
imageName = newImage.ID() imageName = newImage.ID()
} }
// add healthcheck if it exists AND is correct mediatype var healthCheckCommandInput string
// if the user disabled the healthcheck with "none", we skip adding it
healthCheckCommandInput = c.String("healthcheck-command")
// the user didnt disable the healthcheck but did pass in a healthcheck command
// now we need to make a healthcheck from the commandline input
if healthCheckCommandInput != "none" {
if len(healthCheckCommandInput) > 0 {
healthCheck, err = makeHealthCheckFromCli(c)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to create healthcheck")
}
} else {
// the user did not disable the health check and did not pass in a healthcheck
// command as input. so now we add healthcheck if it exists AND is correct mediatype
_, mediaType, err := newImage.Manifest(ctx) _, mediaType, err := newImage.Manifest(ctx)
if err != nil { if err != nil {
return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID()) return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID())
@ -99,8 +114,7 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
if err != nil { if err != nil {
return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0]) return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0])
} }
if healthCheck != nil { }
hasHealthCheck = true
} }
} }
} }
@ -111,7 +125,6 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
// Because parseCreateOpts does derive anything from the image, we add health check // Because parseCreateOpts does derive anything from the image, we add health check
// at this point. The rest is done by WithOptions. // at this point. The rest is done by WithOptions.
createConfig.HasHealthCheck = hasHealthCheck
createConfig.HealthCheck = healthCheck createConfig.HealthCheck = healthCheck
ctr, err := CreateContainerFromCreateConfig(runtime, createConfig, ctx, nil) ctr, err := CreateContainerFromCreateConfig(runtime, createConfig, ctx, nil)
@ -835,3 +848,58 @@ var defaultEnvVariables = map[string]string{
"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM": "xterm", "TERM": "xterm",
} }
func makeHealthCheckFromCli(c *cliconfig.PodmanCommand) (*manifest.Schema2HealthConfig, error) {
inCommand := c.String("healthcheck-command")
inInterval := c.String("healthcheck-interval")
inRetries := c.Uint("healthcheck-retries")
inTimeout := c.String("healthcheck-timeout")
inStartPeriod := c.String("healthcheck-start-period")
// Every healthcheck requires a command
if len(inCommand) == 0 {
return nil, errors.New("Must define a healthcheck command for all healthchecks")
}
cmd, err := shlex.Split(inCommand)
if err != nil {
return nil, errors.Wrap(err, "failed to parse healthcheck command")
}
hc := manifest.Schema2HealthConfig{
Test: cmd,
}
intervalDuration, err := time.ParseDuration(inInterval)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-interval %s ", inInterval)
}
if intervalDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-interval must be at least 1 second")
}
hc.Interval = intervalDuration
if inRetries < 1 {
return nil, errors.New("healthcheck-retries must be greater than 0.")
}
timeoutDuration, err := time.ParseDuration(inTimeout)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-timeout %s", inTimeout)
}
if timeoutDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-timeout must be at least 1 second")
}
hc.Timeout = timeoutDuration
startPeriodDuration, err := time.ParseDuration(inStartPeriod)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-start-period %s", inStartPeriod)
}
if startPeriodDuration < time.Duration(0) {
return nil, errors.New("healthcheck-start-period must be a 0 seconds or greater")
}
hc.StartPeriod = startPeriodDuration
return &hc, nil
}

View File

@ -256,6 +256,23 @@ The following example maps uids 0-2000 in the container to the uids 30000-31999
Add additional groups to run as Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**="" **--hostname**=""
Container host name Container host name

View File

@ -264,6 +264,23 @@ The example maps gids 0-2000 in the container to the gids 30000-31999 on the hos
Add additional groups to run as Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**="" **--hostname**=""
Container host name Container host name

View File

@ -368,7 +368,7 @@ type ContainerConfig struct {
Systemd bool `json:"systemd"` Systemd bool `json:"systemd"`
// HealtchCheckConfig has the health check command and related timings // HealtchCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
} }
// ContainerStatus returns a string representation for users // ContainerStatus returns a string representation for users

View File

@ -127,6 +127,17 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data)
IsInfra: c.IsInfra(), IsInfra: c.IsInfra(),
} }
if c.config.HealthCheckConfig != nil {
// This container has a healthcheck defined in it; we need to add it's state
healthCheckState, err := c.GetHealthCheckLog()
if err != nil {
// An error here is not considered fatal; no health state will be displayed
logrus.Error(err)
} else {
data.State.Healthcheck = healthCheckState
}
}
// Copy port mappings into network settings // Copy port mappings into network settings
if config.PortMappings != nil { if config.PortMappings != nil {
data.NetworkSettings.Ports = config.PortMappings data.NetworkSettings.Ports = config.PortMappings

View File

@ -1,9 +1,15 @@
package libpod package libpod
import ( import (
"bufio"
"bytes"
"io/ioutil"
"os" "os"
"path/filepath"
"strings" "strings"
"time"
"github.com/containers/libpod/pkg/inspect"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -30,8 +36,29 @@ const (
HealthCheckInternalError HealthCheckStatus = iota HealthCheckInternalError HealthCheckStatus = iota
// HealthCheckDefined means the healthcheck was found on the container // HealthCheckDefined means the healthcheck was found on the container
HealthCheckDefined HealthCheckStatus = iota HealthCheckDefined HealthCheckStatus = iota
// MaxHealthCheckNumberLogs is the maximum number of attempts we keep
// in the healtcheck history file
MaxHealthCheckNumberLogs int = 5
// MaxHealthCheckLogLength in characters
MaxHealthCheckLogLength = 500
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
// HealthCheckUnhealthy describes an unhealthy container
HealthCheckUnhealthy string = "unhealthy"
) )
// hcWriteCloser allows us to use bufio as a WriteCloser
type hcWriteCloser struct {
*bufio.Writer
}
// Used to add a closer to bufio
func (hcwc hcWriteCloser) Close() error {
return nil
}
// HealthCheck verifies the state and validity of the healthcheck configuration // HealthCheck verifies the state and validity of the healthcheck configuration
// on the container and then executes the healthcheck // on the container and then executes the healthcheck
func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) { func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
@ -48,33 +75,51 @@ func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
// RunHealthCheck runs the health check as defined by the container // RunHealthCheck runs the health check as defined by the container
func (c *Container) RunHealthCheck() (HealthCheckStatus, error) { func (c *Container) RunHealthCheck() (HealthCheckStatus, error) {
var newCommand []string var (
newCommand []string
returnCode int
capture bytes.Buffer
)
hcStatus, err := checkHealthCheckCanBeRun(c) hcStatus, err := checkHealthCheckCanBeRun(c)
if err != nil { if err != nil {
return hcStatus, err return hcStatus, err
} }
hcCommand := c.HealthCheckConfig().Test hcCommand := c.HealthCheckConfig().Test
if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" { if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" {
newCommand = []string{"sh", "-c"} newCommand = []string{"sh", "-c", strings.Join(hcCommand[1:], " ")}
newCommand = append(newCommand, hcCommand[1:]...)
} else { } else {
newCommand = hcCommand newCommand = hcCommand
} }
// TODO when history/logging is implemented for healthcheck, we need to change the output streams captureBuffer := bufio.NewWriter(&capture)
// so we can capture i/o hcw := hcWriteCloser{
captureBuffer,
}
streams := new(AttachStreams) streams := new(AttachStreams)
streams.OutputStream = os.Stdout streams.OutputStream = hcw
streams.ErrorStream = os.Stderr streams.ErrorStream = hcw
streams.InputStream = os.Stdin streams.InputStream = os.Stdin
streams.AttachOutput = true streams.AttachOutput = true
streams.AttachError = true streams.AttachError = true
streams.AttachInput = true streams.AttachInput = true
logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID()) logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID())
if err := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0); err != nil { timeStart := time.Now()
return HealthCheckFailure, err hcResult := HealthCheckSuccess
hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0)
if hcErr != nil {
hcResult = HealthCheckFailure
returnCode = 1
} }
return HealthCheckSuccess, nil timeEnd := time.Now()
eventLog := capture.String()
if len(eventLog) > MaxHealthCheckLogLength {
eventLog = eventLog[:MaxHealthCheckLogLength]
}
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl); err != nil {
return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID())
}
return hcResult, hcErr
} }
func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) { func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
@ -90,3 +135,67 @@ func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
} }
return HealthCheckDefined, nil return HealthCheckDefined, nil
} }
func newHealthCheckLog(start, end time.Time, exitCode int, log string) inspect.HealthCheckLog {
return inspect.HealthCheckLog{
Start: start.Format(time.RFC3339Nano),
End: end.Format(time.RFC3339Nano),
ExitCode: exitCode,
Output: log,
}
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error {
healthCheck, err := c.GetHealthCheckLog()
if err != nil {
return err
}
if hcl.ExitCode == 0 {
// set status to healthy, reset failing state to 0
healthCheck.Status = HealthCheckHealthy
healthCheck.FailingStreak = 0
} else {
if len(healthCheck.Status) < 1 {
healthCheck.Status = HealthCheckHealthy
}
// increment failing streak
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
// if failing streak > retries, then status to unhealthy
if int(healthCheck.FailingStreak) > c.HealthCheckConfig().Retries {
healthCheck.Status = HealthCheckUnhealthy
}
}
healthCheck.Log = append(healthCheck.Log, hcl)
if len(healthCheck.Log) > MaxHealthCheckNumberLogs {
healthCheck.Log = healthCheck.Log[1:]
}
newResults, err := json.Marshal(healthCheck)
if err != nil {
return errors.Wrapf(err, "unable to marshall healthchecks for writing")
}
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
}
// HealthCheckLogPath returns the path for where the health check log is
func (c *Container) healthCheckLogPath() string {
return filepath.Join(filepath.Dir(c.LogPath()), "healthcheck.log")
}
// GetHealthCheckLog returns HealthCheck results by reading the container's
// health check log file. If the health check log file does not exist, then
// an empty healthcheck struct is returned
func (c *Container) GetHealthCheckLog() (inspect.HealthCheckResults, error) {
var healthCheck inspect.HealthCheckResults
if _, err := os.Stat(c.healthCheckLogPath()); os.IsNotExist(err) {
return healthCheck, nil
}
b, err := ioutil.ReadFile(c.healthCheckLogPath())
if err != nil {
return healthCheck, errors.Wrapf(err, "failed to read health check log file %s", c.healthCheckLogPath())
}
if err := json.Unmarshal(b, &healthCheck); err != nil {
return healthCheck, errors.Wrapf(err, "failed to unmarshal existing healthcheck results in %s", c.healthCheckLogPath())
}
return healthCheck, nil
}

View File

@ -3,11 +3,12 @@ package inspect
import ( import (
"time" "time"
"github.com/containers/image/manifest"
"github.com/cri-o/ocicni/pkg/ocicni" "github.com/cri-o/ocicni/pkg/ocicni"
"github.com/docker/go-connections/nat" "github.com/docker/go-connections/nat"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go/v1" "github.com/opencontainers/image-spec/specs-go/v1"
specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
) )
// ContainerData holds the podman inspect data for a container // ContainerData holds the podman inspect data for a container
@ -96,6 +97,7 @@ type CtrConfig struct {
Labels map[string]string `json:"Labels"` Labels map[string]string `json:"Labels"`
Annotations map[string]string `json:"Annotations"` Annotations map[string]string `json:"Annotations"`
StopSignal uint `json:"StopSignal"` StopSignal uint `json:"StopSignal"`
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
} }
// LogConfig holds the log information for a container // LogConfig holds the log information for a container
@ -190,6 +192,7 @@ type ContainerInspectState struct {
Error string `json:"Error"` // TODO Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"` StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"` FinishedAt time.Time `json:"FinishedAt"`
Healthcheck HealthCheckResults `json:"Healthcheck,omitempty"`
} }
// NetworkSettings holds information about the newtwork settings of the container // NetworkSettings holds information about the newtwork settings of the container
@ -227,3 +230,25 @@ type ImageResult struct {
Labels map[string]string Labels map[string]string
Dangling bool Dangling bool
} }
// HealthCheckResults describes the results/logs from a healthcheck
type HealthCheckResults struct {
// Status healthy or unhealthy
Status string `json:"Status"`
// FailingStreak is the number of consecutive failed healthchecks
FailingStreak int `json:"FailingStreak"`
// Log describes healthcheck attempts and results
Log []HealthCheckLog `json:"Log"`
}
// HealthCheckLog describes the results of a single healthcheck
type HealthCheckLog struct {
// Start time as string
Start string `json:"Start"`
// End time as a string
End string `json:"End"`
// Exitcode is 0 or 1
ExitCode int `json:"ExitCode"`
// Output is the stdout/stderr from the healthcheck command
Output string `json:"Output"`
}

View File

@ -87,7 +87,6 @@ type CreateConfig struct {
Env map[string]string //env Env map[string]string //env
ExposedPorts map[nat.Port]struct{} ExposedPorts map[nat.Port]struct{}
GroupAdd []string // group-add GroupAdd []string // group-add
HasHealthCheck bool
HealthCheck *manifest.Schema2HealthConfig HealthCheck *manifest.Schema2HealthConfig
HostAdd []string //add-host HostAdd []string //add-host
Hostname string //hostname Hostname string //hostname
@ -562,7 +561,7 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime, pod *l
// Always use a cleanup process to clean up Podman after termination // Always use a cleanup process to clean up Podman after termination
options = append(options, libpod.WithExitCommand(c.createExitCommand())) options = append(options, libpod.WithExitCommand(c.createExitCommand()))
if c.HasHealthCheck { if c.HealthCheck != nil {
options = append(options, libpod.WithHealthCheck(c.HealthCheck)) options = append(options, libpod.WithHealthCheck(c.HealthCheck))
logrus.Debugf("New container has a health check") logrus.Debugf("New container has a health check")
} }

View File

@ -719,4 +719,28 @@ USER mail`
Expect(session.OutputToString()).To(Not(ContainSubstring("/dev/shm type tmpfs (ro,"))) Expect(session.OutputToString()).To(Not(ContainSubstring("/dev/shm type tmpfs (ro,")))
}) })
It("podman run with bad healthcheck interval", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-interval", "0.5s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck retries", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-retries", "0", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck timeout", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-timeout", "0s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck start-period", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-start-period", "-1s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
}) })