Merge pull request #2562 from baude/healtcheckphase2

healthcheck phase 2
This commit is contained in:
OpenShift Merge Robot 2019-03-12 13:09:13 -07:00 committed by GitHub
commit 883566fbc0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 351 additions and 61 deletions

View File

@ -287,7 +287,26 @@ func getCreateFlags(c *cliconfig.PodmanCommand) {
createFlags.Bool(
"help", false, "",
)
createFlags.String(
"healthcheck-command", "",
"set a healthcheck command for the container ('none' disables the existing healthcheck)",
)
createFlags.String(
"healthcheck-interval", "30s",
"set an interval for the healthchecks",
)
createFlags.Uint(
"healthcheck-retries", 3,
"the number of retries allowed before a healthcheck is considered to be unhealthy",
)
createFlags.String(
"healthcheck-start-period", "0s",
"the initialization time needed for a container to bootstrap",
)
createFlags.String(
"healthcheck-timeout", "30s",
"the maximum time allowed to complete the healthcheck before an interval is considered failed",
)
createFlags.StringP(
"hostname", "h", "",
"Set container hostname",

View File

@ -48,6 +48,6 @@ func healthCheckCmd(c *cliconfig.HealthCheckValues) error {
}
return err
}
fmt.Println("\nhealthy")
fmt.Println("healthy")
return nil
}

View File

@ -523,6 +523,7 @@ func GetCtrInspectInfo(config *libpod.ContainerConfig, ctrInspectData *inspect.C
StopSignal: config.StopSignal,
Cmd: config.Spec.Process.Args,
Entrypoint: strings.Join(createArtifact.Entrypoint, " "),
Healthcheck: config.HealthCheckConfig,
},
}
return data, nil

View File

@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"syscall"
"time"
"github.com/containers/image/manifest"
"github.com/containers/libpod/cmd/podman/cliconfig"
@ -26,6 +27,7 @@ import (
"github.com/docker/docker/pkg/signal"
"github.com/docker/go-connections/nat"
"github.com/docker/go-units"
"github.com/google/shlex"
spec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/opentracing/opentracing-go"
@ -40,8 +42,7 @@ func getContext() context.Context {
func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *libpod.Runtime) (*libpod.Container, *cc.CreateConfig, error) {
var (
hasHealthCheck bool
healthCheck *manifest.Schema2HealthConfig
healthCheck *manifest.Schema2HealthConfig
)
if c.Bool("trace") {
span, _ := opentracing.StartSpanFromContext(ctx, "createContainer")
@ -89,18 +90,31 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
imageName = newImage.ID()
}
// add healthcheck if it exists AND is correct mediatype
_, mediaType, err := newImage.Manifest(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID())
}
if mediaType == manifest.DockerV2Schema2MediaType {
healthCheck, err = newImage.GetHealthCheck(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0])
}
if healthCheck != nil {
hasHealthCheck = true
var healthCheckCommandInput string
// if the user disabled the healthcheck with "none", we skip adding it
healthCheckCommandInput = c.String("healthcheck-command")
// the user didnt disable the healthcheck but did pass in a healthcheck command
// now we need to make a healthcheck from the commandline input
if healthCheckCommandInput != "none" {
if len(healthCheckCommandInput) > 0 {
healthCheck, err = makeHealthCheckFromCli(c)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to create healthcheck")
}
} else {
// the user did not disable the health check and did not pass in a healthcheck
// command as input. so now we add healthcheck if it exists AND is correct mediatype
_, mediaType, err := newImage.Manifest(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to determine mediatype of image %s", newImage.ID())
}
if mediaType == manifest.DockerV2Schema2MediaType {
healthCheck, err = newImage.GetHealthCheck(ctx)
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to get healthcheck for %s", c.InputArgs[0])
}
}
}
}
}
@ -111,7 +125,6 @@ func CreateContainer(ctx context.Context, c *cliconfig.PodmanCommand, runtime *l
// Because parseCreateOpts does derive anything from the image, we add health check
// at this point. The rest is done by WithOptions.
createConfig.HasHealthCheck = hasHealthCheck
createConfig.HealthCheck = healthCheck
ctr, err := CreateContainerFromCreateConfig(runtime, createConfig, ctx, nil)
@ -835,3 +848,58 @@ var defaultEnvVariables = map[string]string{
"PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"TERM": "xterm",
}
func makeHealthCheckFromCli(c *cliconfig.PodmanCommand) (*manifest.Schema2HealthConfig, error) {
inCommand := c.String("healthcheck-command")
inInterval := c.String("healthcheck-interval")
inRetries := c.Uint("healthcheck-retries")
inTimeout := c.String("healthcheck-timeout")
inStartPeriod := c.String("healthcheck-start-period")
// Every healthcheck requires a command
if len(inCommand) == 0 {
return nil, errors.New("Must define a healthcheck command for all healthchecks")
}
cmd, err := shlex.Split(inCommand)
if err != nil {
return nil, errors.Wrap(err, "failed to parse healthcheck command")
}
hc := manifest.Schema2HealthConfig{
Test: cmd,
}
intervalDuration, err := time.ParseDuration(inInterval)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-interval %s ", inInterval)
}
if intervalDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-interval must be at least 1 second")
}
hc.Interval = intervalDuration
if inRetries < 1 {
return nil, errors.New("healthcheck-retries must be greater than 0.")
}
timeoutDuration, err := time.ParseDuration(inTimeout)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-timeout %s", inTimeout)
}
if timeoutDuration < time.Duration(time.Second*1) {
return nil, errors.New("healthcheck-timeout must be at least 1 second")
}
hc.Timeout = timeoutDuration
startPeriodDuration, err := time.ParseDuration(inStartPeriod)
if err != nil {
return nil, errors.Wrapf(err, "invalid healthcheck-start-period %s", inStartPeriod)
}
if startPeriodDuration < time.Duration(0) {
return nil, errors.New("healthcheck-start-period must be a 0 seconds or greater")
}
hc.StartPeriod = startPeriodDuration
return &hc, nil
}

View File

@ -256,6 +256,23 @@ The following example maps uids 0-2000 in the container to the uids 30000-31999
Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**=""
Container host name

View File

@ -264,6 +264,23 @@ The example maps gids 0-2000 in the container to the gids 30000-31999 on the hos
Add additional groups to run as
**--healthchech**=""
Set or alter a healthcheck for a container. The value must be of the format of:
`[OPTIONS] CMD command`
where options can be any of the follow:
* --interval=DURATION (default: 30s)
* --timeout=DURATION (default: 30s)
* --start-period=DURATION (default: 0s)
* --retries=N (default: 3)
Note: options are *not* required.
The command is a command to be executed inside your container that determines your container health. The
command is required.
**--hostname**=""
Container host name

View File

@ -368,7 +368,7 @@ type ContainerConfig struct {
Systemd bool `json:"systemd"`
// HealtchCheckConfig has the health check command and related timings
HealthCheckConfig *manifest.Schema2HealthConfig
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
}
// ContainerStatus returns a string representation for users

View File

@ -127,6 +127,17 @@ func (c *Container) getContainerInspectData(size bool, driverData *inspect.Data)
IsInfra: c.IsInfra(),
}
if c.config.HealthCheckConfig != nil {
// This container has a healthcheck defined in it; we need to add it's state
healthCheckState, err := c.GetHealthCheckLog()
if err != nil {
// An error here is not considered fatal; no health state will be displayed
logrus.Error(err)
} else {
data.State.Healthcheck = healthCheckState
}
}
// Copy port mappings into network settings
if config.PortMappings != nil {
data.NetworkSettings.Ports = config.PortMappings

View File

@ -1,9 +1,15 @@
package libpod
import (
"bufio"
"bytes"
"io/ioutil"
"os"
"path/filepath"
"strings"
"time"
"github.com/containers/libpod/pkg/inspect"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
@ -30,8 +36,29 @@ const (
HealthCheckInternalError HealthCheckStatus = iota
// HealthCheckDefined means the healthcheck was found on the container
HealthCheckDefined HealthCheckStatus = iota
// MaxHealthCheckNumberLogs is the maximum number of attempts we keep
// in the healtcheck history file
MaxHealthCheckNumberLogs int = 5
// MaxHealthCheckLogLength in characters
MaxHealthCheckLogLength = 500
// HealthCheckHealthy describes a healthy container
HealthCheckHealthy string = "healthy"
// HealthCheckUnhealthy describes an unhealthy container
HealthCheckUnhealthy string = "unhealthy"
)
// hcWriteCloser allows us to use bufio as a WriteCloser
type hcWriteCloser struct {
*bufio.Writer
}
// Used to add a closer to bufio
func (hcwc hcWriteCloser) Close() error {
return nil
}
// HealthCheck verifies the state and validity of the healthcheck configuration
// on the container and then executes the healthcheck
func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
@ -48,33 +75,51 @@ func (r *Runtime) HealthCheck(name string) (HealthCheckStatus, error) {
// RunHealthCheck runs the health check as defined by the container
func (c *Container) RunHealthCheck() (HealthCheckStatus, error) {
var newCommand []string
var (
newCommand []string
returnCode int
capture bytes.Buffer
)
hcStatus, err := checkHealthCheckCanBeRun(c)
if err != nil {
return hcStatus, err
}
hcCommand := c.HealthCheckConfig().Test
if len(hcCommand) > 0 && hcCommand[0] == "CMD-SHELL" {
newCommand = []string{"sh", "-c"}
newCommand = append(newCommand, hcCommand[1:]...)
newCommand = []string{"sh", "-c", strings.Join(hcCommand[1:], " ")}
} else {
newCommand = hcCommand
}
// TODO when history/logging is implemented for healthcheck, we need to change the output streams
// so we can capture i/o
captureBuffer := bufio.NewWriter(&capture)
hcw := hcWriteCloser{
captureBuffer,
}
streams := new(AttachStreams)
streams.OutputStream = os.Stdout
streams.ErrorStream = os.Stderr
streams.OutputStream = hcw
streams.ErrorStream = hcw
streams.InputStream = os.Stdin
streams.AttachOutput = true
streams.AttachError = true
streams.AttachInput = true
logrus.Debugf("executing health check command %s for %s", strings.Join(newCommand, " "), c.ID())
if err := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0); err != nil {
return HealthCheckFailure, err
timeStart := time.Now()
hcResult := HealthCheckSuccess
hcErr := c.Exec(false, false, []string{}, newCommand, "", "", streams, 0)
if hcErr != nil {
hcResult = HealthCheckFailure
returnCode = 1
}
return HealthCheckSuccess, nil
timeEnd := time.Now()
eventLog := capture.String()
if len(eventLog) > MaxHealthCheckLogLength {
eventLog = eventLog[:MaxHealthCheckLogLength]
}
hcl := newHealthCheckLog(timeStart, timeEnd, returnCode, eventLog)
if err := c.updateHealthCheckLog(hcl); err != nil {
return hcResult, errors.Wrapf(err, "unable to update health check log %s for %s", c.healthCheckLogPath(), c.ID())
}
return hcResult, hcErr
}
func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
@ -90,3 +135,67 @@ func checkHealthCheckCanBeRun(c *Container) (HealthCheckStatus, error) {
}
return HealthCheckDefined, nil
}
func newHealthCheckLog(start, end time.Time, exitCode int, log string) inspect.HealthCheckLog {
return inspect.HealthCheckLog{
Start: start.Format(time.RFC3339Nano),
End: end.Format(time.RFC3339Nano),
ExitCode: exitCode,
Output: log,
}
}
// UpdateHealthCheckLog parses the health check results and writes the log
func (c *Container) updateHealthCheckLog(hcl inspect.HealthCheckLog) error {
healthCheck, err := c.GetHealthCheckLog()
if err != nil {
return err
}
if hcl.ExitCode == 0 {
// set status to healthy, reset failing state to 0
healthCheck.Status = HealthCheckHealthy
healthCheck.FailingStreak = 0
} else {
if len(healthCheck.Status) < 1 {
healthCheck.Status = HealthCheckHealthy
}
// increment failing streak
healthCheck.FailingStreak = healthCheck.FailingStreak + 1
// if failing streak > retries, then status to unhealthy
if int(healthCheck.FailingStreak) > c.HealthCheckConfig().Retries {
healthCheck.Status = HealthCheckUnhealthy
}
}
healthCheck.Log = append(healthCheck.Log, hcl)
if len(healthCheck.Log) > MaxHealthCheckNumberLogs {
healthCheck.Log = healthCheck.Log[1:]
}
newResults, err := json.Marshal(healthCheck)
if err != nil {
return errors.Wrapf(err, "unable to marshall healthchecks for writing")
}
return ioutil.WriteFile(c.healthCheckLogPath(), newResults, 0700)
}
// HealthCheckLogPath returns the path for where the health check log is
func (c *Container) healthCheckLogPath() string {
return filepath.Join(filepath.Dir(c.LogPath()), "healthcheck.log")
}
// GetHealthCheckLog returns HealthCheck results by reading the container's
// health check log file. If the health check log file does not exist, then
// an empty healthcheck struct is returned
func (c *Container) GetHealthCheckLog() (inspect.HealthCheckResults, error) {
var healthCheck inspect.HealthCheckResults
if _, err := os.Stat(c.healthCheckLogPath()); os.IsNotExist(err) {
return healthCheck, nil
}
b, err := ioutil.ReadFile(c.healthCheckLogPath())
if err != nil {
return healthCheck, errors.Wrapf(err, "failed to read health check log file %s", c.healthCheckLogPath())
}
if err := json.Unmarshal(b, &healthCheck); err != nil {
return healthCheck, errors.Wrapf(err, "failed to unmarshal existing healthcheck results in %s", c.healthCheckLogPath())
}
return healthCheck, nil
}

View File

@ -3,11 +3,12 @@ package inspect
import (
"time"
"github.com/containers/image/manifest"
"github.com/cri-o/ocicni/pkg/ocicni"
"github.com/docker/go-connections/nat"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/specs-go/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-spec/specs-go"
)
// ContainerData holds the podman inspect data for a container
@ -78,24 +79,25 @@ type HostConfig struct {
// CtrConfig holds information about the container configuration
type CtrConfig struct {
Hostname string `json:"Hostname"`
DomainName string `json:"Domainname"` //TODO
User specs.User `json:"User"`
AttachStdin bool `json:"AttachStdin"` //TODO
AttachStdout bool `json:"AttachStdout"` //TODO
AttachStderr bool `json:"AttachStderr"` //TODO
Tty bool `json:"Tty"`
OpenStdin bool `json:"OpenStdin"`
StdinOnce bool `json:"StdinOnce"` //TODO
Env []string `json:"Env"`
Cmd []string `json:"Cmd"`
Image string `json:"Image"`
Volumes map[string]struct{} `json:"Volumes"`
WorkingDir string `json:"WorkingDir"`
Entrypoint string `json:"Entrypoint"`
Labels map[string]string `json:"Labels"`
Annotations map[string]string `json:"Annotations"`
StopSignal uint `json:"StopSignal"`
Hostname string `json:"Hostname"`
DomainName string `json:"Domainname"` //TODO
User specs.User `json:"User"`
AttachStdin bool `json:"AttachStdin"` //TODO
AttachStdout bool `json:"AttachStdout"` //TODO
AttachStderr bool `json:"AttachStderr"` //TODO
Tty bool `json:"Tty"`
OpenStdin bool `json:"OpenStdin"`
StdinOnce bool `json:"StdinOnce"` //TODO
Env []string `json:"Env"`
Cmd []string `json:"Cmd"`
Image string `json:"Image"`
Volumes map[string]struct{} `json:"Volumes"`
WorkingDir string `json:"WorkingDir"`
Entrypoint string `json:"Entrypoint"`
Labels map[string]string `json:"Labels"`
Annotations map[string]string `json:"Annotations"`
StopSignal uint `json:"StopSignal"`
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
}
// LogConfig holds the log information for a container
@ -178,18 +180,19 @@ type ContainerInspectData struct {
// ContainerInspectState represents the state of a container.
type ContainerInspectState struct {
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
OciVersion string `json:"OciVersion"`
Status string `json:"Status"`
Running bool `json:"Running"`
Paused bool `json:"Paused"`
Restarting bool `json:"Restarting"` // TODO
OOMKilled bool `json:"OOMKilled"`
Dead bool `json:"Dead"`
Pid int `json:"Pid"`
ExitCode int32 `json:"ExitCode"`
Error string `json:"Error"` // TODO
StartedAt time.Time `json:"StartedAt"`
FinishedAt time.Time `json:"FinishedAt"`
Healthcheck HealthCheckResults `json:"Healthcheck,omitempty"`
}
// NetworkSettings holds information about the newtwork settings of the container
@ -227,3 +230,25 @@ type ImageResult struct {
Labels map[string]string
Dangling bool
}
// HealthCheckResults describes the results/logs from a healthcheck
type HealthCheckResults struct {
// Status healthy or unhealthy
Status string `json:"Status"`
// FailingStreak is the number of consecutive failed healthchecks
FailingStreak int `json:"FailingStreak"`
// Log describes healthcheck attempts and results
Log []HealthCheckLog `json:"Log"`
}
// HealthCheckLog describes the results of a single healthcheck
type HealthCheckLog struct {
// Start time as string
Start string `json:"Start"`
// End time as a string
End string `json:"End"`
// Exitcode is 0 or 1
ExitCode int `json:"ExitCode"`
// Output is the stdout/stderr from the healthcheck command
Output string `json:"Output"`
}

View File

@ -87,7 +87,6 @@ type CreateConfig struct {
Env map[string]string //env
ExposedPorts map[nat.Port]struct{}
GroupAdd []string // group-add
HasHealthCheck bool
HealthCheck *manifest.Schema2HealthConfig
HostAdd []string //add-host
Hostname string //hostname
@ -562,7 +561,7 @@ func (c *CreateConfig) GetContainerCreateOptions(runtime *libpod.Runtime, pod *l
// Always use a cleanup process to clean up Podman after termination
options = append(options, libpod.WithExitCommand(c.createExitCommand()))
if c.HasHealthCheck {
if c.HealthCheck != nil {
options = append(options, libpod.WithHealthCheck(c.HealthCheck))
logrus.Debugf("New container has a health check")
}

View File

@ -719,4 +719,28 @@ USER mail`
Expect(session.OutputToString()).To(Not(ContainSubstring("/dev/shm type tmpfs (ro,")))
})
It("podman run with bad healthcheck interval", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-interval", "0.5s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck retries", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-retries", "0", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck timeout", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-timeout", "0s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
It("podman run with bad healthcheck start-period", func() {
session := podmanTest.Podman([]string{"run", "-dt", "--healthcheck-cmd", "foo", "--healthcheck-start-period", "-1s", ALPINE, "top"})
session.Wait()
Expect(session.ExitCode()).ToNot(Equal(0))
})
})