diff --git a/cmd/podman/kube/play.go b/cmd/podman/kube/play.go index 2292899744..e8bcc29cf6 100644 --- a/cmd/podman/kube/play.go +++ b/cmd/podman/kube/play.go @@ -181,18 +181,19 @@ func playFlags(cmd *cobra.Command) { flags.StringVar(&playOptions.ContextDir, contextDirFlagName, "", "Path to top level of context directory") _ = cmd.RegisterFlagCompletionFunc(contextDirFlagName, completion.AutocompleteDefault) - // NOTE: The service-container flag is marked as hidden as it - // is purely designed for running kube-play or play-kube in systemd units. - // It is not something users should need to know or care about. - // - // Having a flag rather than an env variable is cleaner. - serviceFlagName := "service-container" - flags.BoolVar(&playOptions.ServiceContainer, serviceFlagName, false, "Starts a service container before all pods") - _ = flags.MarkHidden("service-container") - flags.StringVar(&playOptions.SignaturePolicy, "signature-policy", "", "`Pathname` of signature policy file (not usually used)") _ = flags.MarkHidden("signature-policy") + + // Below flags are local-only and hidden since they are used in + // kube-play's systemd integration only and hence hidden from + // users. + serviceFlagName := "service-container" + flags.BoolVar(&playOptions.ServiceContainer, serviceFlagName, false, "Starts a service container before all pods") + _ = flags.MarkHidden(serviceFlagName) + exitFlagName := "service-exit-code-propagation" + flags.StringVar(&playOptions.ExitCodePropagation, exitFlagName, "", "Exit-code propagation of the service container") + _ = flags.MarkHidden(exitFlagName) } } @@ -450,6 +451,9 @@ func kubeplay(body io.Reader) error { if err != nil { return err } + if report.ExitCode != nil { + registry.SetExitCode(int(*report.ExitCode)) + } if err := printPlayReport(report); err != nil { return err } diff --git a/docs/source/markdown/podman-container-inspect.1.md.in b/docs/source/markdown/podman-container-inspect.1.md.in index a8b1a8bb1c..b9e650690a 100644 --- a/docs/source/markdown/podman-container-inspect.1.md.in +++ b/docs/source/markdown/podman-container-inspect.1.md.in @@ -20,46 +20,47 @@ The keys of the returned JSON can be used as the values for the --format flag (s Valid placeholders for the Go template are listed below: -| **Placeholder** | **Description** | -| ----------------- | ------------------ | -| .AppArmorProfile | AppArmor profile (string) | -| .Args | Command-line arguments (array of strings) | -| .BoundingCaps | Bounding capability set (array of strings) | -| .Config ... | Structure with config info | -| .ConmonPidFile | Path to file containing conmon pid (string) | -| .Created | Container creation time (string, ISO3601) | -| .Dependencies | Dependencies (array of strings) | -| .Driver | Storage driver (string) | -| .EffectiveCaps | Effective capability set (array of strings) | -| .ExecIDs | Exec IDs (array of strings) | -| .GraphDriver ... | Further details of graph driver (struct) | -| .HostConfig ... | Host config details (struct) | -| .HostnamePath | Path to file containing hostname (string) | -| .HostsPath | Path to container /etc/hosts file (string) | -| .ID | Container ID (full 64-char hash) | -| .Image | Container image ID (64-char hash) | -| .ImageDigest | Container image digest (sha256:+64-char hash) | -| .ImageName | Container image name (string) | -| .IsInfra | Is this an infra container? (string: true/false) | -| .IsService | Is this a service container? (string: true/false) | -| .MountLabel | SELinux label of mount (string) | -| .Mounts | Mounts (array of strings) | -| .Name | Container name (string) | -| .Namespace | Container namespace (string) | -| .NetworkSettings ... | Network settings (struct) | -| .OCIConfigPath | Path to OCI config file (string) | -| .OCIRuntime | OCI runtime name (string) | -| .Path | Path to container command (string) | -| .PidFile | Path to file containing container PID (string) | -| .Pod | Parent pod (string) | -| .ProcessLabel | SELinux label of process (string) | -| .ResolvConfPath | Path to container's resolv.conf file (string) | -| .RestartCount | Number of times container has been restarted (int) | -| .Rootfs | Container rootfs (string) | -| .SizeRootFs | Size of rootfs, in bytes [1] | -| .SizeRw | Size of upper (R/W) container layer, in bytes [1] | -| .State ... | Container state info (struct) | -| .StaticDir | Path to container metadata dir (string) | +| **Placeholder** | **Description** | +| ------------------------ | -------------------------------------------------- | +| .AppArmorProfile | AppArmor profile (string) | +| .Args | Command-line arguments (array of strings) | +| .BoundingCaps | Bounding capability set (array of strings) | +| .Config ... | Structure with config info | +| .ConmonPidFile | Path to file containing conmon pid (string) | +| .Created | Container creation time (string, ISO3601) | +| .Dependencies | Dependencies (array of strings) | +| .Driver | Storage driver (string) | +| .EffectiveCaps | Effective capability set (array of strings) | +| .ExecIDs | Exec IDs (array of strings) | +| .GraphDriver ... | Further details of graph driver (struct) | +| .HostConfig ... | Host config details (struct) | +| .HostnamePath | Path to file containing hostname (string) | +| .HostsPath | Path to container /etc/hosts file (string) | +| .ID | Container ID (full 64-char hash) | +| .Image | Container image ID (64-char hash) | +| .ImageDigest | Container image digest (sha256:+64-char hash) | +| .ImageName | Container image name (string) | +| .IsInfra | Is this an infra container? (string: true/false) | +| .IsService | Is this a service container? (string: true/false) | +| .KubeExitCodePropagation | Kube exit-code propagation (string) | +| .MountLabel | SELinux label of mount (string) | +| .Mounts | Mounts (array of strings) | +| .Name | Container name (string) | +| .Namespace | Container namespace (string) | +| .NetworkSettings ... | Network settings (struct) | +| .OCIConfigPath | Path to OCI config file (string) | +| .OCIRuntime | OCI runtime name (string) | +| .Path | Path to container command (string) | +| .PidFile | Path to file containing container PID (string) | +| .Pod | Parent pod (string) | +| .ProcessLabel | SELinux label of process (string) | +| .ResolvConfPath | Path to container's resolv.conf file (string) | +| .RestartCount | Number of times container has been restarted (int) | +| .Rootfs | Container rootfs (string) | +| .SizeRootFs | Size of rootfs, in bytes [1] | +| .SizeRw | Size of upper (R/W) container layer, in bytes [1] | +| .State ... | Container state info (struct) | +| .StaticDir | Path to container metadata dir (string) | [1] This format specifier requires the **--size** option diff --git a/docs/source/markdown/podman-systemd.unit.5.md b/docs/source/markdown/podman-systemd.unit.5.md index 78ecc26ef5..c7b615c7da 100644 --- a/docs/source/markdown/podman-systemd.unit.5.md +++ b/docs/source/markdown/podman-systemd.unit.5.md @@ -481,15 +481,15 @@ There is only one required key, `Yaml`, which defines the path to the Kubernetes Valid options for `[Kube]` are listed below: -| **[Kube] options** | **podman kube play equivalent** | -| ----------------- | ------------------ | -| ConfigMap=/tmp/config.map | --config-map /tmp/config.map | -| LogDriver=journald | --log-driver journald | -| Network=host | --net host | -| PodmanArgs=--annotation=key=value | --annotation=key=value | -| PublishPort=59-60 | --publish=59-60 | -| UserNS=keep-id:uid=200,gid=210 | --userns keep-id:uid=200,gid=210 | -| Yaml=/tmp/kube.yaml | podman kube play /tmp/kube.yaml | +| **[Kube] options** | **podman kube play equivalent** | +| ----------------------------------- | ------------------------------------------- | +| ConfigMap=/tmp/config.map | --config-map /tmp/config.map | +| LogDriver=journald | --log-driver journald | +| Network=host | --net host | +| PodmanArgs=\-\-annotation=key=value | --annotation=key=value | +| PublishPort=59-60 | --publish=59-60 | +| UserNS=keep-id:uid=200,gid=210 | --userns keep-id:uid=200,gid=210 | +| Yaml=/tmp/kube.yaml | podman kube play /tmp/kube.yaml | Supported keys in the `[Kube]` section are: @@ -501,6 +501,15 @@ it may be absolute or relative to the location of the unit file. This key may be used multiple times +### `ExitCodePropagation=` + +Control how the main PID of the systemd service should exit. The following values are supported: +- `all`: exit non-zero if all containers have failed (i.e., exited non-zero) +- `any`: exit non-zero if any container has failed +- `none`: exit zero and ignore failed containers + +The current default value is `none`. + ### `LogDriver=` Set the log-driver Podman uses when running the container. diff --git a/libpod/boltdb_state.go b/libpod/boltdb_state.go index 174dffeeb0..3edd6e6d47 100644 --- a/libpod/boltdb_state.go +++ b/libpod/boltdb_state.go @@ -1451,7 +1451,8 @@ func (s *BoltState) GetContainerExitCodeTimeStamp(id string) (*time.Time, error) }) } -// PruneExitCodes removes exit codes older than 5 minutes. +// PruneExitCodes removes exit codes older than 5 minutes unless the associated +// container still exists. func (s *BoltState) PruneContainerExitCodes() error { if !s.valid { return define.ErrDBClosed @@ -1472,7 +1473,17 @@ func (s *BoltState) PruneContainerExitCodes() error { return err } + ctrsBucket, err := getCtrBucket(tx) + if err != nil { + return err + } + return timeStampBucket.ForEach(func(rawID, rawTimeStamp []byte) error { + if ctrsBucket.Bucket(rawID) != nil { + // If the container still exists, don't prune + // its exit code since we may still need it. + return nil + } var timeStamp time.Time if err := timeStamp.UnmarshalText(rawTimeStamp); err != nil { return fmt.Errorf("converting raw time stamp %v of container %s from DB: %w", rawTimeStamp, string(rawID), err) diff --git a/libpod/container_config.go b/libpod/container_config.go index 6aabc817ac..42b565dbb4 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -360,6 +360,8 @@ type ContainerMiscConfig struct { CgroupParent string `json:"cgroupParent"` // GroupEntry specifies arbitrary data to append to a file. GroupEntry string `json:"group_entry,omitempty"` + // KubeExitCodePropagation of the service container. + KubeExitCodePropagation define.KubeExitCodePropagation `json:"kubeExitCodePropagation"` // LogPath log location LogPath string `json:"logPath"` // LogTag is the tag used for logging diff --git a/libpod/container_inspect.go b/libpod/container_inspect.go index 0da2a4d059..d6f404c701 100644 --- a/libpod/container_inspect.go +++ b/libpod/container_inspect.go @@ -141,30 +141,31 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver CheckpointLog: runtimeInfo.CheckpointLog, RestoreLog: runtimeInfo.RestoreLog, }, - Image: config.RootfsImageID, - ImageName: config.RootfsImageName, - Namespace: config.Namespace, - Rootfs: config.Rootfs, - Pod: config.Pod, - ResolvConfPath: resolvPath, - HostnamePath: hostnamePath, - HostsPath: hostsPath, - StaticDir: config.StaticDir, - OCIRuntime: config.OCIRuntime, - ConmonPidFile: config.ConmonPidFile, - PidFile: config.PidFile, - Name: config.Name, - RestartCount: int32(runtimeInfo.RestartCount), - Driver: driverData.Name, - MountLabel: config.MountLabel, - ProcessLabel: config.ProcessLabel, - AppArmorProfile: ctrSpec.Process.ApparmorProfile, - ExecIDs: execIDs, - GraphDriver: driverData, - Mounts: inspectMounts, - Dependencies: c.Dependencies(), - IsInfra: c.IsInfra(), - IsService: c.IsService(), + Image: config.RootfsImageID, + ImageName: config.RootfsImageName, + Namespace: config.Namespace, + Rootfs: config.Rootfs, + Pod: config.Pod, + ResolvConfPath: resolvPath, + HostnamePath: hostnamePath, + HostsPath: hostsPath, + StaticDir: config.StaticDir, + OCIRuntime: config.OCIRuntime, + ConmonPidFile: config.ConmonPidFile, + PidFile: config.PidFile, + Name: config.Name, + RestartCount: int32(runtimeInfo.RestartCount), + Driver: driverData.Name, + MountLabel: config.MountLabel, + ProcessLabel: config.ProcessLabel, + AppArmorProfile: ctrSpec.Process.ApparmorProfile, + ExecIDs: execIDs, + GraphDriver: driverData, + Mounts: inspectMounts, + Dependencies: c.Dependencies(), + IsInfra: c.IsInfra(), + IsService: c.IsService(), + KubeExitCodePropagation: config.KubeExitCodePropagation.String(), } if config.RootfsImageID != "" { // May not be set if the container was created with --rootfs diff --git a/libpod/define/container_inspect.go b/libpod/define/container_inspect.go index 750ddf1aeb..0309a8dde0 100644 --- a/libpod/define/container_inspect.go +++ b/libpod/define/container_inspect.go @@ -654,44 +654,45 @@ type InspectNetworkSettings struct { // compatible with `docker inspect` JSON, but additional fields have been added // as required to share information not in the original output. type InspectContainerData struct { - ID string `json:"Id"` - Created time.Time `json:"Created"` - Path string `json:"Path"` - Args []string `json:"Args"` - State *InspectContainerState `json:"State"` - Image string `json:"Image"` - ImageDigest string `json:"ImageDigest"` - ImageName string `json:"ImageName"` - Rootfs string `json:"Rootfs"` - Pod string `json:"Pod"` - ResolvConfPath string `json:"ResolvConfPath"` - HostnamePath string `json:"HostnamePath"` - HostsPath string `json:"HostsPath"` - StaticDir string `json:"StaticDir"` - OCIConfigPath string `json:"OCIConfigPath,omitempty"` - OCIRuntime string `json:"OCIRuntime,omitempty"` - ConmonPidFile string `json:"ConmonPidFile"` - PidFile string `json:"PidFile"` - Name string `json:"Name"` - RestartCount int32 `json:"RestartCount"` - Driver string `json:"Driver"` - MountLabel string `json:"MountLabel"` - ProcessLabel string `json:"ProcessLabel"` - AppArmorProfile string `json:"AppArmorProfile"` - EffectiveCaps []string `json:"EffectiveCaps"` - BoundingCaps []string `json:"BoundingCaps"` - ExecIDs []string `json:"ExecIDs"` - GraphDriver *DriverData `json:"GraphDriver"` - SizeRw *int64 `json:"SizeRw,omitempty"` - SizeRootFs int64 `json:"SizeRootFs,omitempty"` - Mounts []InspectMount `json:"Mounts"` - Dependencies []string `json:"Dependencies"` - NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` - Namespace string `json:"Namespace"` - IsInfra bool `json:"IsInfra"` - IsService bool `json:"IsService"` - Config *InspectContainerConfig `json:"Config"` - HostConfig *InspectContainerHostConfig `json:"HostConfig"` + ID string `json:"Id"` + Created time.Time `json:"Created"` + Path string `json:"Path"` + Args []string `json:"Args"` + State *InspectContainerState `json:"State"` + Image string `json:"Image"` + ImageDigest string `json:"ImageDigest"` + ImageName string `json:"ImageName"` + Rootfs string `json:"Rootfs"` + Pod string `json:"Pod"` + ResolvConfPath string `json:"ResolvConfPath"` + HostnamePath string `json:"HostnamePath"` + HostsPath string `json:"HostsPath"` + StaticDir string `json:"StaticDir"` + OCIConfigPath string `json:"OCIConfigPath,omitempty"` + OCIRuntime string `json:"OCIRuntime,omitempty"` + ConmonPidFile string `json:"ConmonPidFile"` + PidFile string `json:"PidFile"` + Name string `json:"Name"` + RestartCount int32 `json:"RestartCount"` + Driver string `json:"Driver"` + MountLabel string `json:"MountLabel"` + ProcessLabel string `json:"ProcessLabel"` + AppArmorProfile string `json:"AppArmorProfile"` + EffectiveCaps []string `json:"EffectiveCaps"` + BoundingCaps []string `json:"BoundingCaps"` + ExecIDs []string `json:"ExecIDs"` + GraphDriver *DriverData `json:"GraphDriver"` + SizeRw *int64 `json:"SizeRw,omitempty"` + SizeRootFs int64 `json:"SizeRootFs,omitempty"` + Mounts []InspectMount `json:"Mounts"` + Dependencies []string `json:"Dependencies"` + NetworkSettings *InspectNetworkSettings `json:"NetworkSettings"` + Namespace string `json:"Namespace"` + IsInfra bool `json:"IsInfra"` + IsService bool `json:"IsService"` + KubeExitCodePropagation string `json:"KubeExitCodePropagation"` + Config *InspectContainerConfig `json:"Config"` + HostConfig *InspectContainerHostConfig `json:"HostConfig"` } // InspectExecSession contains information about a given exec session. diff --git a/libpod/define/exit_code_propagation.go b/libpod/define/exit_code_propagation.go new file mode 100644 index 0000000000..eff69b1c0e --- /dev/null +++ b/libpod/define/exit_code_propagation.go @@ -0,0 +1,54 @@ +package define + +import "fmt" + +// KubeExitCodePropagation defines an exit policy of kube workloads. +type KubeExitCodePropagation int + +const ( + // Invalid exit policy for a proper type system. + KubeExitCodePropagationInvalid KubeExitCodePropagation = iota + // Exit 0 regardless of any failed containers. + KubeExitCodePropagationNone + // Exit non-zero if all containers failed. + KubeExitCodePropagationAll + // Exit non-zero if any container failed. + KubeExitCodePropagationAny + + // String representations. + strKubeECPInvalid = "invalid" + strKubeECPNone = "none" + strKubeECPAll = "all" + strKubeECPAny = "any" +) + +// Parse the specified kube exit-code propagation. Return an error if an +// unsupported value is specified. +func ParseKubeExitCodePropagation(value string) (KubeExitCodePropagation, error) { + switch value { + case strKubeECPNone, "": + return KubeExitCodePropagationNone, nil + case strKubeECPAll: + return KubeExitCodePropagationAll, nil + case strKubeECPAny: + return KubeExitCodePropagationAny, nil + default: + return KubeExitCodePropagationInvalid, fmt.Errorf("unsupported exit-code propagation %q", value) + } +} + +// Return the string representation of the KubeExitCodePropagation. +func (k KubeExitCodePropagation) String() string { + switch k { + case KubeExitCodePropagationNone: + return strKubeECPNone + case KubeExitCodePropagationAll: + return strKubeECPAll + case KubeExitCodePropagationAny: + return strKubeECPAny + case KubeExitCodePropagationInvalid: + return strKubeECPInvalid + default: + return "unknown value" + } +} diff --git a/libpod/options.go b/libpod/options.go index a974caeebf..54cc5b0ae6 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -1579,15 +1579,16 @@ func withIsInfra() CtrCreateOption { } // WithIsService allows us to differentiate between service containers and other container -// within the container config -func WithIsService() CtrCreateOption { +// within the container config. It also sets the exit-code propagation of the +// service container. +func WithIsService(ecp define.KubeExitCodePropagation) CtrCreateOption { return func(ctr *Container) error { if ctr.valid { return define.ErrCtrFinalized } ctr.config.IsService = true - + ctr.config.KubeExitCodePropagation = ecp return nil } } diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index e13fb66654..563832f457 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -957,11 +957,11 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol } if c.IsService() { - canStop, err := c.canStopServiceContainer() + report, err := c.canStopServiceContainer() if err != nil { return id, err } - if !canStop { + if !report.canBeStopped { return id, fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ",")) } } diff --git a/libpod/service.go b/libpod/service.go index 976ccd88bf..a4b66eae9f 100644 --- a/libpod/service.go +++ b/libpod/service.go @@ -7,6 +7,7 @@ import ( "github.com/containers/podman/v4/libpod/define" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) // A service consists of one or more pods. The service container is started @@ -59,17 +60,29 @@ func (c *Container) IsService() bool { return c.config.IsService } +// serviceContainerReport bundles information when checking whether a service +// container can be stopped. +type serviceContainerReport struct { + // Indicates whether the service container can be stopped or not. + canBeStopped bool + // Number of all known containers below the service container. + numContainers int + // Number of containers below the service containers that exited + // non-zero. + failedContainers int +} + // canStopServiceContainerLocked returns true if all pods of the service are stopped. // Note that the method acquires the container lock. -func (c *Container) canStopServiceContainerLocked() (bool, error) { +func (c *Container) canStopServiceContainerLocked() (*serviceContainerReport, error) { c.lock.Lock() defer c.lock.Unlock() if err := c.syncContainer(); err != nil { - return false, err + return nil, err } if !c.IsService() { - return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID()) + return nil, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID()) } return c.canStopServiceContainer() @@ -77,14 +90,15 @@ func (c *Container) canStopServiceContainerLocked() (bool, error) { // canStopServiceContainer returns true if all pods of the service are stopped. // Note that the method expects the container to be locked. -func (c *Container) canStopServiceContainer() (bool, error) { +func (c *Container) canStopServiceContainer() (*serviceContainerReport, error) { + report := serviceContainerReport{canBeStopped: true} for _, id := range c.state.Service.Pods { pod, err := c.runtime.LookupPod(id) if err != nil { if errors.Is(err, define.ErrNoSuchPod) { continue } - return false, err + return nil, err } status, err := pod.GetPodStatus() @@ -92,19 +106,37 @@ func (c *Container) canStopServiceContainer() (bool, error) { if errors.Is(err, define.ErrNoSuchPod) { continue } - return false, err + return nil, err } - // We can only stop the service if all pods are done. switch status { case define.PodStateStopped, define.PodStateExited, define.PodStateErrored: - continue + podCtrs, err := c.runtime.state.PodContainers(pod) + if err != nil { + return nil, err + } + for _, pc := range podCtrs { + if pc.IsInfra() { + continue // ignore infra containers + } + exitCode, err := c.runtime.state.GetContainerExitCode(pc.ID()) + if err != nil { + return nil, err + } + if exitCode != 0 { + report.failedContainers++ + } + report.numContainers++ + } default: - return false, nil + // Service container cannot be stopped, so we can + // return early. + report.canBeStopped = false + return &report, nil } } - return true, nil + return &report, nil } // Checks whether the service container can be stopped and does so. @@ -125,21 +157,49 @@ func (p *Pod) maybeStopServiceContainer() error { // pod->container->servicePods hierarchy. p.runtime.queueWork(func() { logrus.Debugf("Pod %s has a service %s: checking if it can be stopped", p.ID(), serviceCtr.ID()) - canStop, err := serviceCtr.canStopServiceContainerLocked() + report, err := serviceCtr.canStopServiceContainerLocked() if err != nil { logrus.Errorf("Checking whether service of container %s can be stopped: %v", serviceCtr.ID(), err) return } - if !canStop { + if !report.canBeStopped { return } - logrus.Debugf("Stopping service container %s", serviceCtr.ID()) - if err := serviceCtr.Stop(); err != nil && !errors.Is(err, define.ErrCtrStopped) { - // Log this in debug mode so that we don't print out an error and confuse the user - // when the service container can't be stopped because it is in created state - // This can happen when an error happens during kube play and we are trying to - // clean up after the error. - logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err) + + // Now either kill or stop the service container, depending on the configured exit policy. + stop := func() { + // Note that the service container runs catatonit which + // will exit gracefully on SIGINT. + logrus.Debugf("Stopping service container %s", serviceCtr.ID()) + if err := serviceCtr.Kill(uint(unix.SIGINT)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) { + logrus.Debugf("Error stopping service container %s: %v", serviceCtr.ID(), err) + } + } + + kill := func() { + logrus.Debugf("Killing service container %s", serviceCtr.ID()) + if err := serviceCtr.Kill(uint(unix.SIGKILL)); err != nil && !errors.Is(err, define.ErrCtrStateInvalid) { + logrus.Debugf("Error killing service container %s: %v", serviceCtr.ID(), err) + } + } + + switch serviceCtr.config.KubeExitCodePropagation { + case define.KubeExitCodePropagationNone: + stop() + case define.KubeExitCodePropagationAny: + if report.failedContainers > 0 { + kill() + } else { + stop() + } + case define.KubeExitCodePropagationAll: + if report.failedContainers == report.numContainers { + kill() + } else { + stop() + } + default: + logrus.Errorf("Internal error: cannot stop service container %s: unknown exit policy %q", serviceCtr.ID(), serviceCtr.config.KubeExitCodePropagation.String()) } }) return nil @@ -240,9 +300,8 @@ func (p *Pod) maybeRemoveServiceContainer() error { if !canRemove { return } - timeout := uint(0) logrus.Debugf("Removing service container %s", serviceCtr.ID()) - if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, &timeout); err != nil { + if err := p.runtime.RemoveContainer(context.Background(), serviceCtr, true, false, nil); err != nil { if !errors.Is(err, define.ErrNoSuchCtr) { logrus.Errorf("Removing service container %s: %v", serviceCtr.ID(), err) } diff --git a/libpod/sqlite_state.go b/libpod/sqlite_state.go index 7c94c6f208..fd207acb36 100644 --- a/libpod/sqlite_state.go +++ b/libpod/sqlite_state.go @@ -943,7 +943,8 @@ func (s *SQLiteState) GetContainerExitCodeTimeStamp(id string) (*time.Time, erro return &result, nil } -// PruneExitCodes removes exit codes older than 5 minutes. +// PruneExitCodes removes exit codes older than 5 minutes unless the associated +// container still exists. func (s *SQLiteState) PruneContainerExitCodes() (defErr error) { if !s.valid { return define.ErrDBClosed @@ -963,7 +964,7 @@ func (s *SQLiteState) PruneContainerExitCodes() (defErr error) { } }() - if _, err := tx.Exec("DELETE FROM ContainerExitCode WHERE (Timestamp <= ?);", fiveMinsAgo); err != nil { + if _, err := tx.Exec("DELETE FROM ContainerExitCode WHERE (Timestamp <= ?) AND (ID NOT IN (SELECT ID FROM ContainerConfig))", fiveMinsAgo); err != nil { return fmt.Errorf("removing exit codes with timestamps older than 5 minutes: %w", err) } diff --git a/pkg/domain/entities/play.go b/pkg/domain/entities/play.go index 3989f96a68..184062278e 100644 --- a/pkg/domain/entities/play.go +++ b/pkg/domain/entities/play.go @@ -21,6 +21,9 @@ type PlayKubeOptions struct { // Down indicates whether to bring contents of a yaml file "down" // as in stop Down bool + // ExitCodePropagation decides how the main PID of the Kube service + // should exit depending on the containers' exit codes. + ExitCodePropagation string // Replace indicates whether to delete and recreate a yaml file Replace bool // Do not create /etc/hosts within the pod's containers, @@ -100,6 +103,8 @@ type PlayKubeReport struct { Secrets []PlaySecret // ServiceContainerID - ID of the service container if one is created ServiceContainerID string + // If set, exit with the specified exit code. + ExitCode *int32 } type KubePlayReport = PlayKubeReport diff --git a/pkg/domain/infra/abi/play.go b/pkg/domain/infra/abi/play.go index 420c9ebd30..c19178b68e 100644 --- a/pkg/domain/infra/abi/play.go +++ b/pkg/domain/infra/abi/play.go @@ -86,7 +86,12 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri if err != nil { return nil, fmt.Errorf("creating runtime spec for service container: %w", err) } - opts = append(opts, libpod.WithIsService()) + + ecp, err := define.ParseKubeExitCodePropagation(options.ExitCodePropagation) + if err != nil { + return nil, err + } + opts = append(opts, libpod.WithIsService(ecp)) // Set the sd-notify mode to "ignore". Podman is responsible for // sending the notify messages when all containers are ready. @@ -348,9 +353,11 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options if err := notifyproxy.SendMessage("", message); err != nil { return nil, err } - if _, err := serviceContainer.Wait(ctx); err != nil { + exitCode, err := serviceContainer.Wait(ctx) + if err != nil { return nil, fmt.Errorf("waiting for service container: %w", err) } + report.ExitCode = &exitCode } report.ServiceContainerID = serviceContainer.ID() diff --git a/pkg/systemd/quadlet/quadlet.go b/pkg/systemd/quadlet/quadlet.go index ac45f6bd75..27f337aaeb 100644 --- a/pkg/systemd/quadlet/quadlet.go +++ b/pkg/systemd/quadlet/quadlet.go @@ -62,6 +62,7 @@ const ( KeyImage = "Image" KeyIP = "IP" KeyIP6 = "IP6" + KeyExitCodePropagation = "ExitCodePropagation" KeyLabel = "Label" KeyLogDriver = "LogDriver" KeyMount = "Mount" @@ -192,17 +193,18 @@ var ( // Supported keys in "Kube" group supportedKubeKeys = map[string]bool{ - KeyConfigMap: true, - KeyLogDriver: true, - KeyNetwork: true, - KeyPodmanArgs: true, - KeyPublishPort: true, - KeyRemapGID: true, - KeyRemapUID: true, - KeyRemapUIDSize: true, - KeyRemapUsers: true, - KeyUserNS: true, - KeyYaml: true, + KeyConfigMap: true, + KeyExitCodePropagation: true, + KeyLogDriver: true, + KeyNetwork: true, + KeyPodmanArgs: true, + KeyPublishPort: true, + KeyRemapGID: true, + KeyRemapUID: true, + KeyRemapUIDSize: true, + KeyRemapUsers: true, + KeyUserNS: true, + KeyYaml: true, } ) @@ -895,6 +897,10 @@ func ConvertKube(kube *parser.UnitFile, isUser bool) (*parser.UnitFile, error) { "--service-container=true", ) + if ecp, ok := kube.Lookup(KubeGroup, KeyExitCodePropagation); ok && len(ecp) > 0 { + execStart.addf("--service-exit-code-propagation=%s", ecp) + } + handleLogDriver(kube, KubeGroup, execStart) if err := handleUserRemap(kube, KubeGroup, execStart, isUser, false); err != nil { @@ -924,9 +930,11 @@ func ConvertKube(kube *parser.UnitFile, isUser bool) (*parser.UnitFile, error) { service.AddCmdline(ServiceGroup, "ExecStart", execStart.Args) + // Use `ExecStopPost` to make sure cleanup happens even in case of + // errors; otherwise containers, pods, etc. would be left behind. execStop := NewPodmanCmdline("kube", "down") execStop.add(yamlPath) - service.AddCmdline(ServiceGroup, "ExecStop", execStop.Args) + service.AddCmdline(ServiceGroup, "ExecStopPost", execStop.Args) return service, nil } diff --git a/test/e2e/quadlet/absolute.path.kube b/test/e2e/quadlet/absolute.path.kube index 88e8c7d8f2..2d389e7cee 100644 --- a/test/e2e/quadlet/absolute.path.kube +++ b/test/e2e/quadlet/absolute.path.kube @@ -3,9 +3,9 @@ ## assert-podman-final-args /opt/k8s/deployment.yml ## assert-podman-args "--replace" ## assert-podman-args "--service-container=true" -## assert-podman-stop-args "kube" -## assert-podman-stop-args "down" -## assert-podman-stop-final-args /opt/k8s/deployment.yml +## assert-podman-stop-post-args "kube" +## assert-podman-stop-post-args "down" +## assert-podman-stop-post-final-args /opt/k8s/deployment.yml ## assert-key-is "Unit" "RequiresMountsFor" "%t/containers" ## assert-key-is "Service" "KillMode" "mixed" ## assert-key-is "Service" "Type" "notify" diff --git a/test/e2e/quadlet/basic.kube b/test/e2e/quadlet/basic.kube index b186efc538..1f2bc16dc6 100644 --- a/test/e2e/quadlet/basic.kube +++ b/test/e2e/quadlet/basic.kube @@ -3,9 +3,9 @@ ## assert-podman-final-args-regex .*/podman_test.*/quadlet/deployment.yml ## assert-podman-args "--replace" ## assert-podman-args "--service-container=true" -## assert-podman-stop-args "kube" -## assert-podman-stop-args "down" -## assert-podman-stop-final-args-regex .*/podman_test.*/quadlet/deployment.yml +## assert-podman-stop-post-args "kube" +## assert-podman-stop-post-args "down" +## assert-podman-stop-post-final-args-regex .*/podman_test.*/quadlet/deployment.yml ## assert-key-is "Unit" "RequiresMountsFor" "%t/containers" ## assert-key-is "Service" "KillMode" "mixed" ## assert-key-is "Service" "Type" "notify" diff --git a/test/e2e/quadlet/exit_code_propagation.kube b/test/e2e/quadlet/exit_code_propagation.kube new file mode 100644 index 0000000000..150563b2b1 --- /dev/null +++ b/test/e2e/quadlet/exit_code_propagation.kube @@ -0,0 +1,5 @@ +[Kube] +Yaml=/opt/k8s/deployment.yml + +## assert-podman-args "--service-exit-code-propagation=all" +ExitCodePropagation=all diff --git a/test/e2e/quadlet_test.go b/test/e2e/quadlet_test.go index 4d6400c443..4eb059e6a5 100644 --- a/test/e2e/quadlet_test.go +++ b/test/e2e/quadlet_test.go @@ -286,6 +286,18 @@ func (t *quadletTestcase) assertStopPodmanFinalArgsRegex(args []string, unit *pa return t.assertPodmanFinalArgsRegex(args, unit, "ExecStop") } +func (t *quadletTestcase) assertStopPostPodmanArgs(args []string, unit *parser.UnitFile) bool { + return t.assertPodmanArgs(args, unit, "ExecStopPost") +} + +func (t *quadletTestcase) assertStopPostPodmanFinalArgs(args []string, unit *parser.UnitFile) bool { + return t.assertPodmanFinalArgs(args, unit, "ExecStopPost") +} + +func (t *quadletTestcase) assertStopPostPodmanFinalArgsRegex(args []string, unit *parser.UnitFile) bool { + return t.assertPodmanFinalArgsRegex(args, unit, "ExecStopPost") +} + func (t *quadletTestcase) assertSymlink(args []string, unit *parser.UnitFile) bool { Expect(args).To(HaveLen(2)) symlink := args[0] @@ -347,6 +359,13 @@ func (t *quadletTestcase) doAssert(check []string, unit *parser.UnitFile, sessio ok = t.assertStopPodmanFinalArgs(args, unit) case "assert-podman-stop-final-args-regex": ok = t.assertStopPodmanFinalArgsRegex(args, unit) + case "assert-podman-stop-post-args": + ok = t.assertStopPostPodmanArgs(args, unit) + case "assert-podman-stop-post-final-args": + ok = t.assertStopPostPodmanFinalArgs(args, unit) + case "assert-podman-stop-post-final-args-regex": + ok = t.assertStopPostPodmanFinalArgsRegex(args, unit) + default: return fmt.Errorf("Unsupported assertion %s", op) } @@ -463,9 +482,9 @@ var _ = Describe("quadlet system generator", func() { "## assert-podman-final-args-regex .*/podman_test.*/quadlet/deployment.yml", "## assert-podman-args \"--replace\"", "## assert-podman-args \"--service-container=true\"", - "## assert-podman-stop-args \"kube\"", - "## assert-podman-stop-args \"down\"", - "## assert-podman-stop-final-args-regex .*/podman_test.*/quadlet/deployment.yml", + "## assert-podman-stop-post-args \"kube\"", + "## assert-podman-stop-post-args \"down\"", + "## assert-podman-stop-post-final-args-regex .*/podman_test.*/quadlet/deployment.yml", "## assert-key-is \"Unit\" \"RequiresMountsFor\" \"%t/containers\"", "## assert-key-is \"Service\" \"KillMode\" \"mixed\"", "## assert-key-is \"Service\" \"Type\" \"notify\"", @@ -484,7 +503,7 @@ var _ = Describe("quadlet system generator", func() { "NotifyAccess=all", "SyslogIdentifier=%N", fmt.Sprintf("ExecStart=%s kube play --replace --service-container=true %s/deployment.yml", podmanPath, quadletDir), - fmt.Sprintf("ExecStop=%s kube down %s/deployment.yml", podmanPath, quadletDir), + fmt.Sprintf("ExecStopPost=%s kube down %s/deployment.yml", podmanPath, quadletDir), } Expect(current).To(Equal(expected)) @@ -580,6 +599,7 @@ var _ = Describe("quadlet system generator", func() { Entry("Kube - Publish IPv6 ports", "ports_ipv6.kube"), Entry("Kube - Logdriver", "logdriver.kube"), Entry("Kube - PodmanArgs", "podmanargs.kube"), + Entry("Kube - Exit Code Propagation", "exit_code_propagation.kube"), Entry("Network - Basic", "basic.network"), Entry("Network - Label", "label.network"), diff --git a/test/system/250-systemd.bats b/test/system/250-systemd.bats index f051b2146d..8e24440228 100644 --- a/test/system/250-systemd.bats +++ b/test/system/250-systemd.bats @@ -459,8 +459,7 @@ $name stderr" "logs work with passthrough" fi sleep 0.5 done - # The service is marked as failed as the service container exits non-zero. - is "$output" "failed" "systemd service transitioned to 'inactive' state: $service_name" + is "$output" "inactive" "systemd service transitioned to 'inactive' state: $service_name" # Now stop and start the service again. systemctl stop $service_name diff --git a/test/system/252-quadlet.bats b/test/system/252-quadlet.bats index 96b8f7b08f..26e8322f4f 100644 --- a/test/system/252-quadlet.bats +++ b/test/system/252-quadlet.bats @@ -439,8 +439,7 @@ EOF run_podman container inspect --format "{{.State.Status}}" test_pod-test is "$output" "running" "container should be started by systemd and hence be running" - # The service is marked as failed as the service container exits non-zero. - service_cleanup $QUADLET_SERVICE_NAME failed + service_cleanup $QUADLET_SERVICE_NAME inactive run_podman rmi $(pause_image) } @@ -638,4 +637,62 @@ EOF service_cleanup $QUADLET_SERVICE_NAME failed } +@test "quadlet - exit-code propagation" { + local quadlet_file=$PODMAN_TMPDIR/basic_$(random_string).kube + local yaml_file=$PODMAN_TMPDIR/$(random_string).yaml + + exit_tests=" +all | true | 0 | inactive +all | false | 137 | failed +none | false | 0 | inactive +" + while read exit_code_prop cmd exit_code service_state; do + cat > $yaml_file < $quadlet_file < $fname +} + +@test "podman kube play - exit-code propagation" { + fname=$PODMAN_TMPDIR/$(random_string).yaml + + # Create a test matrix with the following arguments: + # exit-code propagation | ctr1 command | ctr2 command | service-container exit code + exit_tests=" +all | true | true | 0 +all | true | false | 0 +all | false | false | 137 +any | true | true | 0 +any | false | true | 137 +any | false | false | 137 +none | true | true | 0 +none | true | false | 0 +none | false | false | 0 +" + + # I am sorry, this is a long test as we need to test the upper matrix + # twice. The first run is using the default sdnotify policy of "ignore". + # In this case, the service container serves as the main PID of the service + # to have a minimal resource footprint. The second run is using the + # "conmon" sdnotify policy in which case Podman needs to serve as the main + # PID to act as an sdnotify proxy; there Podman will wait for the service + # container to exit and reflects its exit code. + while read exit_code_prop cmd1 cmd2 exit_code; do + for sdnotify_policy in ignore conmon; do + generate_exit_code_yaml $fname $cmd1 $cmd2 $sdnotify_policy + yaml_sha=$(sha256sum $fname) + service_container="${yaml_sha:0:12}-service" + podman_exit=$exit_code + if [[ $sdnotify_policy == "ignore" ]];then + podman_exit=0 + fi + run_podman $podman_exit kube play --service-exit-code-propagation="$exit_code_prop" --service-container $fname + run_podman container inspect --format '{{.KubeExitCodePropagation}}' $service_container + is "$output" "$exit_code_prop" "service container has the expected policy set in its annotations" + run_podman wait $service_container + is "$output" "$exit_code" "service container reflects expected exit code $exit_code (policy: $policy, cmd1: $cmd1, cmd2: $cmd2)" + run_podman kube down $fname + done + done < <(parse_table "$exit_tests") + + # A final smoke test to make sure bogus policies lead to an error + run_podman 125 kube play --service-exit-code-propagation=bogus --service-container $fname + is "$output" "Error: unsupported exit-code propagation \"bogus\"" "error on unsupported exit-code propagation" + + run_podman rmi $(pause_image) +} # vim: filetype=sh