Merge pull request #25789 from jankaluza/23292

Replace podman pause image with rootfs.
This commit is contained in:
openshift-merge-bot[bot] 2025-04-17 08:47:30 +00:00 committed by GitHub
commit 51c4df1316
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 174 additions and 174 deletions

View File

@ -1266,6 +1266,11 @@ func (c *Container) IsInfra() bool {
return c.config.IsInfra
}
// IsDefaultInfra returns whether the container is a default infra container generated directly by podman
func (c *Container) IsDefaultInfra() bool {
return c.config.IsDefaultInfra
}
// IsInitCtr returns whether the container is an init container
func (c *Container) IsInitCtr() bool {
return len(c.config.InitContainerType) > 0

View File

@ -404,6 +404,9 @@ type ContainerMiscConfig struct {
// IsInfra is a bool indicating whether this container is an infra container used for
// sharing kernel namespaces in a pod
IsInfra bool `json:"pause"`
// IsDefaultInfra is a bool indicating whether this container is a default infra container
// using the default rootfs with catatonit bind-mounted into it.
IsDefaultInfra bool `json:"defaultPause"`
// IsService is a bool indicating whether this container is a service container used for
// tracking the life cycle of K8s service.
IsService bool `json:"isService"`

View File

@ -178,6 +178,51 @@ func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
return upperDir, workDir, nil
}
// Internal only function which creates the Rootfs for default internal
// pause image, configures the Rootfs in the Container and returns
// the mount-point for the /catatonit. This mount-point should be added
// to the Container spec.
func (c *Container) prepareInitRootfs() (spec.Mount, error) {
newMount := spec.Mount{
Type: define.TypeBind,
Source: "",
Destination: "",
Options: append(bindOptions, "ro", "nosuid", "nodev"),
}
tmpDir, err := c.runtime.TmpDir()
if err != nil {
return newMount, fmt.Errorf("getting runtime temporary directory: %w", err)
}
tmpDir = filepath.Join(tmpDir, "infra-container")
err = os.MkdirAll(tmpDir, 0755)
if err != nil {
return newMount, fmt.Errorf("creating infra container temporary directory: %w", err)
}
// Also look into the path as some distributions install catatonit in
// /usr/bin.
catatonitPath, err := c.runtime.config.FindInitBinary()
if err != nil {
return newMount, fmt.Errorf("finding catatonit binary: %w", err)
}
catatonitPath, err = filepath.EvalSymlinks(catatonitPath)
if err != nil {
return newMount, fmt.Errorf("follow symlink to catatonit binary: %w", err)
}
newMount.Source = catatonitPath
newMount.Destination = "/" + filepath.Base(catatonitPath)
c.config.Rootfs = tmpDir
c.config.RootfsOverlay = true
if len(c.config.Entrypoint) == 0 {
c.config.Entrypoint = []string{"/" + filepath.Base(catatonitPath), "-P"}
c.config.Spec.Process.Args = c.config.Entrypoint
}
return newMount, nil
}
// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFuncRet func(), err error) {
@ -380,6 +425,14 @@ func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFunc
c.setProcessLabel(&g)
c.setMountLabel(&g)
if c.IsDefaultInfra() || c.IsService() {
newMount, err := c.prepareInitRootfs()
if err != nil {
return nil, nil, err
}
g.AddMount(newMount)
}
// Add bind mounts to container
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{

View File

@ -183,6 +183,10 @@ func (c *Container) validate() error {
}
}
if c.config.IsDefaultInfra && !c.config.IsInfra {
return fmt.Errorf("default rootfs-based infra container is set for non-infra container")
}
return nil
}

View File

@ -925,6 +925,10 @@ func containerToV1Container(ctx context.Context, c *Container, getService bool)
return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("linux devices: %w", define.ErrNotImplemented)
}
if !c.IsInfra() && len(c.config.Rootfs) > 0 {
return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("k8s does not support Rootfs")
}
if len(c.config.UserVolumes) > 0 {
volumeMounts, volumes, localAnnotations, err := libpodMountsToKubeVolumeMounts(c)
if err != nil {
@ -957,53 +961,44 @@ func containerToV1Container(ctx context.Context, c *Container, getService bool)
kubeContainer.Name = removeUnderscores(c.Name())
_, image := c.Image()
// The infra container may have been created with an overlay root FS
// instead of an infra image. If so, set the imageto the default K8s
// pause one and make sure it's in the storage by pulling it down if
// missing.
if image == "" && c.IsInfra() {
image = c.runtime.config.Engine.InfraImage
if _, err := c.runtime.libimageRuntime.Pull(ctx, image, config.PullPolicyMissing, nil); err != nil {
return kubeContainer, nil, nil, nil, err
}
}
kubeContainer.Image = image
kubeContainer.Stdin = c.Stdin()
img, _, err := c.runtime.libimageRuntime.LookupImage(image, nil)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("looking up image %q of container %q: %w", image, c.ID(), err)
}
imgData, err := img.Inspect(ctx, nil)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, err
}
// If the user doesn't set a command/entrypoint when creating the container with podman and
// is using the image command or entrypoint from the image, don't add it to the generated kube yaml
if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) || reflect.DeepEqual(imgData.Config.Entrypoint, kubeContainer.Command) {
kubeContainer.Command = nil
}
if len(image) > 0 {
img, _, err := c.runtime.libimageRuntime.LookupImage(image, nil)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("looking up image %q of container %q: %w", image, c.ID(), err)
}
imgData, err := img.Inspect(ctx, nil)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, err
}
// If the user doesn't set a command/entrypoint when creating the container with podman and
// is using the image command or entrypoint from the image, don't add it to the generated kube yaml
if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) || reflect.DeepEqual(imgData.Config.Entrypoint, kubeContainer.Command) {
kubeContainer.Command = nil
}
if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() {
kubeContainer.WorkingDir = c.WorkingDir()
}
if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() {
kubeContainer.WorkingDir = c.WorkingDir()
}
if imgData.User == c.User() && hasSecData {
kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil
}
// If the image has user set as a positive integer value, then set runAsNonRoot to true
// in the kube yaml
imgUserID, err := strconv.Atoi(imgData.User)
if err == nil && imgUserID > 0 {
trueBool := true
kubeSec.RunAsNonRoot = &trueBool
}
if imgData.User == c.User() && hasSecData {
kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil
}
// If the image has user set as a positive integer value, then set runAsNonRoot to true
// in the kube yaml
imgUserID, err := strconv.Atoi(imgData.User)
if err == nil && imgUserID > 0 {
trueBool := true
kubeSec.RunAsNonRoot = &trueBool
}
envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, err
envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env)
if err != nil {
return kubeContainer, kubeVolumes, nil, annotations, err
}
kubeContainer.Env = envVariables
}
kubeContainer.Env = envVariables
kubeContainer.Ports = ports
// This should not be applicable

View File

@ -1648,6 +1648,20 @@ func withIsInfra() CtrCreateOption {
}
}
// withIsDefaultInfra allows us to differentiate between the default infra containers generated
// directly by podman and custom infra containers within the container config
func withIsDefaultInfra() CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return define.ErrCtrFinalized
}
ctr.config.IsDefaultInfra = true
return nil
}
}
// WithIsService allows us to differentiate between service containers and other container
// within the container config. It also sets the exit-code propagation of the
// service container.

View File

@ -51,6 +51,9 @@ func (r *Runtime) NewContainer(ctx context.Context, rSpec *spec.Spec, spec *spec
}
if infra {
options = append(options, withIsInfra())
if len(spec.RawImageName) == 0 {
options = append(options, withIsDefaultInfra())
}
}
return r.newContainer(ctx, rSpec, options...)
}
@ -246,6 +249,13 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options ..
}
func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Container, retErr error) {
if ctr.IsDefaultInfra() || ctr.IsService() {
_, err := ctr.prepareInitRootfs()
if err != nil {
return nil, err
}
}
// normalize the networks to names
// the db backend only knows about network names so we have to make
// sure we do not use ids internally
@ -422,7 +432,6 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
if ctr.restoreFromCheckpoint {
// Remove information about bind mount
// for new container from imported checkpoint
// NewFromSpec() is deprecated according to its comment
// however the recommended replace just causes a nil map panic
g := generate.NewFromSpec(ctr.config.Spec)

View File

@ -67,12 +67,6 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
}
}
// Similar to infra containers, a service container is using the pause image.
image, err := generate.PullOrBuildInfraImage(ic.Libpod, "")
if err != nil {
return nil, fmt.Errorf("image for service container: %w", err)
}
rtc, err := ic.Libpod.GetConfigNoCopy()
if err != nil {
return nil, err
@ -92,7 +86,7 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
}
// Create and fill out the runtime spec.
s := specgen.NewSpecGenerator(image, false)
s := specgen.NewSpecGenerator("", true)
if err := specgenutil.FillOutSpecGen(s, &ctrOpts, []string{}); err != nil {
return nil, fmt.Errorf("completing spec for service container: %w", err)
}
@ -1314,6 +1308,10 @@ func (ic *ContainerEngine) getImageAndLabelInfo(ctx context.Context, cwd string,
// Contains all labels obtained from kube
labels := make(map[string]string)
if len(container.Image) == 0 {
return nil, labels, nil
}
pulledImage, err := ic.buildOrPullImage(ctx, cwd, writer, container.Image, container.ImagePullPolicy, options)
if err != nil {
return nil, labels, err

View File

@ -301,9 +301,13 @@ func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGener
// TODO: We don't understand why specgen does not take of this, but
// integration tests clearly pointed out that it was required.
imageData, err := opts.Image.Inspect(ctx, nil)
if err != nil {
return nil, err
var imageData *libimage.ImageData
if opts.Image != nil {
var err error
imageData, err = opts.Image.Inspect(ctx, nil)
if err != nil {
return nil, err
}
}
s.WorkDir = "/"
// Entrypoint/Command handling is based off of

View File

@ -4,18 +4,15 @@ package generate
import (
"context"
"fmt"
"os"
buildahDefine "github.com/containers/buildah/define"
"github.com/containers/common/pkg/config"
"github.com/containers/podman/v5/libpod"
"github.com/containers/podman/v5/libpod/define"
)
// PullOrBuildInfraImage pulls down the specified image or the one set in
// containers.conf. If none is set, it builds a local pause image.
func PullOrBuildInfraImage(rt *libpod.Runtime, imageName string) (string, error) {
// PullInfraImage pulls down the specified image or the one set in
// containers.conf. If none is set, it returns an empty string. In this
// case, the rootfs-based pause image is used by libpod.
func PullInfraImage(rt *libpod.Runtime, imageName string) (string, error) {
rtConfig, err := rt.GetConfigNoCopy()
if err != nil {
return "", err
@ -33,64 +30,5 @@ func PullOrBuildInfraImage(rt *libpod.Runtime, imageName string) (string, error)
return imageName, nil
}
name, err := buildPauseImage(rt, rtConfig)
if err != nil {
return "", fmt.Errorf("building local pause image: %w", err)
}
return name, nil
}
func buildPauseImage(rt *libpod.Runtime, rtConfig *config.Config) (string, error) {
version, err := define.GetVersion()
if err != nil {
return "", err
}
imageName := fmt.Sprintf("localhost/podman-pause:%s-%d", version.Version, version.Built)
// First check if the image has already been built.
if _, _, err := rt.LibimageRuntime().LookupImage(imageName, nil); err == nil {
return imageName, nil
}
// Also look into the path as some distributions install catatonit in
// /usr/bin.
catatonitPath, err := rtConfig.FindInitBinary()
if err != nil {
return "", fmt.Errorf("finding pause binary: %w", err)
}
buildContent := fmt.Sprintf(`FROM scratch
COPY %s /catatonit
ENTRYPOINT ["/catatonit", "-P"]`, catatonitPath)
tmpF, err := os.CreateTemp("", "pause.containerfile")
if err != nil {
return "", err
}
if _, err := tmpF.WriteString(buildContent); err != nil {
return "", err
}
if err := tmpF.Close(); err != nil {
return "", err
}
defer os.Remove(tmpF.Name())
buildOptions := buildahDefine.BuildOptions{
CommonBuildOpts: &buildahDefine.CommonBuildOptions{},
Output: imageName,
Quiet: true,
IgnoreFile: "/dev/null", // makes sure to not read a local .ignorefile (see #13529)
IIDFile: "/dev/null", // prevents Buildah from writing the ID on stdout
IDMappingOptions: &buildahDefine.IDMappingOptions{
// Use the host UID/GID mappings for the build to avoid issues when
// running with a custom mapping (BZ #2083997).
HostUIDMapping: true,
HostGIDMapping: true,
},
}
if _, _, err := rt.Build(context.Background(), buildOptions, tmpF.Name()); err != nil {
return "", err
}
return imageName, nil
return "", nil
}

View File

@ -38,12 +38,14 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (_ *libpod.Pod, finalErr e
}
if !p.PodSpecGen.NoInfra {
imageName, err := PullOrBuildInfraImage(rt, p.PodSpecGen.InfraImage)
imageName, err := PullInfraImage(rt, p.PodSpecGen.InfraImage)
if err != nil {
return nil, err
}
p.PodSpecGen.InfraImage = imageName
p.PodSpecGen.InfraContainerSpec.RawImageName = imageName
if len(imageName) > 0 {
p.PodSpecGen.InfraImage = imageName
p.PodSpecGen.InfraContainerSpec.RawImageName = imageName
}
}
spec, err := MapSpec(&p.PodSpecGen)

View File

@ -259,41 +259,45 @@ Labels.created_at | 20[0-9-]\\\+T[0-9:]\\\+Z
run_podman inspect --format '{{.ID}}' $IMAGE
imageID=$output
pauseImage=$(pause_image)
run_podman inspect --format '{{.ID}}' $pauseImage
pauseID=$output
run_podman pod inspect --format "{{.InfraContainerID}}" $pname
infra_ID="$output"
run_podman 2 rmi -a
is "$output" "Error: 2 errors occurred:
.** image used by .*: image is in use by a container: consider listing external containers and force-removing image
.** image used by .*: image is in use by a container: consider listing external containers and force-removing image"
is "$output" "Error: image used by .*: image is in use by a container: consider listing external containers and force-removing image"
run_podman rmi -af
is "$output" "Untagged: $IMAGE
Untagged: $pauseImage
Deleted: $imageID
Deleted: $pauseID" "infra images gets removed as well"
Deleted: $imageID" "image gets removed"
run_podman images --noheading
is "$output" ""
run_podman ps --all --noheading
is "$output" ""
run_podman ps --all --noheading --no-trunc
assert "$output" =~ ".*$infra_ID.*" "infra container still running"
run_podman pod ps --noheading
is "$output" ""
assert "$output" =~ ".*$pname.*" "pod still running"
run_podman create --pod new:$pname $IMAGE
# Clean up
run_podman rm "${lines[-1]}"
run_podman pod rm -a
run_podman rmi $pauseImage
}
# CANNOT BE PARALLELIZED: relies on exact output from podman images
@test "podman images - rmi -f can remove infra images" {
pname=p_$(safename)
run_podman create --pod new:$pname $IMAGE
pauseImage=$(pause_image)
# Create a custom image so we can test --infra-image and -command.
# It will have a randomly generated infra command, using the
# existing 'pause' script in our testimage. We assign a bogus
# entrypoint to confirm that --infra-command will override.
local pauseImage="infra_image_$(safename)"
# --layers=false needed to work around buildah#5674 parallel flake
run_podman build -t $pauseImage --layers=false - << EOF
FROM $IMAGE
ENTRYPOINT ["/home/podman/pause"]
EOF
run_podman --noout pod create --name $pname --infra-image "$pauseImage"
run_podman create --pod $pname $IMAGE
run_podman inspect --format '{{.ID}}' $pauseImage
pauseID=$output
@ -301,7 +305,7 @@ Deleted: $pauseID" "infra images gets removed as well"
is "$output" "Error: image used by .* image is in use by a container: consider listing external containers and force-removing image"
run_podman rmi -f $pauseImage
is "$output" "Untagged: $pauseImage
is "$output" "Untagged: localhost/$pauseImage:latest
Deleted: $pauseID"
# Force-removing the infra container removes the pod and all its containers.
@ -330,6 +334,7 @@ Deleted: $pauseID"
run_podman image rm --force bogus
is "$output" "" "Should print no output"
_prefetch $IMAGE
random_image_name=i_$(safename)
run_podman image tag $IMAGE $random_image_name
run_podman image rm --force bogus $random_image_name
@ -386,6 +391,7 @@ EOF
| grep -vF '[storage.options]' >>$sconf
fi
_prefetch $IMAGE
skopeo copy containers-storage:$IMAGE \
containers-storage:\[${storagedriver}@${imstore}/root+${imstore}/runroot\]$IMAGE

View File

@ -57,6 +57,7 @@ function _tag_and_check() {
# CANNOT BE PARALLELIZED: temporarily removes $IMAGE
@test "podman untag all" {
_prefetch $IMAGE
# First get the image ID
run_podman inspect --format '{{.ID}}' $IMAGE
iid=$output

View File

@ -237,7 +237,6 @@ load helpers
is "$output" "$rand_value"
run_podman pod rm -t 0 -f test
run_podman rmi $(pause_image)
}
@test "podman ps --format PodName" {
@ -252,7 +251,6 @@ load helpers
run_podman rm -t 0 -f $cid
run_podman pod rm -t 0 -f $rand_value
run_podman rmi $(pause_image)
}
# vim: filetype=sh

View File

@ -563,7 +563,7 @@ EOF
# Clean up
systemctl stop $service_name
run_podman rmi -f $(pause_image) $local_image $newID $oldID
run_podman rmi -f $local_image $newID $oldID
run_podman network rm podman-default-kube-network
rm -f $UNIT_DIR/$unit_name
}
@ -630,7 +630,7 @@ EOF
assert $status -eq 0 "Error stopping pod systemd unit: $output"
run_podman pod rm -f $podname
run_podman rmi $local_image $(pause_image)
run_podman rmi $local_image
rm -f $podunit $ctrunit
systemctl daemon-reload
}

View File

@ -117,12 +117,6 @@ RELABEL="system_u:object_r:container_file_t:s0"
is "$output" "${RELABEL} $TESTDIR" "selinux relabel should have happened"
fi
# Make sure that the K8s pause image isn't pulled but the local podman-pause is built.
run_podman images
run_podman 1 image exists k8s.gcr.io/pause
run_podman 1 image exists registry.k8s.io/pause
run_podman image exists $(pause_image)
run_podman pod rm -t 0 -f $PODNAME
}

View File

@ -452,21 +452,6 @@ function clean_setup() {
if [[ -z "$found_needed_image" ]]; then
_prefetch $PODMAN_TEST_IMAGE_FQN
fi
# Load (create, actually) the pause image. This way, all pod tests will
# have it available. Without this, pod tests run in parallel will leave
# behind <none>:<none> images.
# FIXME: only do this when running parallel! Otherwise, we may break
# test expectations.
# SUB-FIXME: there's no actual way to tell if we're running bats
# in parallel (see bats-core#998). Use undocumented hack.
# FIXME: #23292 -- this should not be necessary.
if [[ -n "$BATS_SEMAPHORE_DIR" ]]; then
run_podman pod create mypod
run_podman pod rm mypod
# And now, we have a pause image, and each test does not
# need to build their own.
fi
}
# END setup/teardown tools
@ -812,15 +797,6 @@ function journald_unavailable() {
return 1
}
# Returns the name of the local pause image.
function pause_image() {
# This function is intended to be used as '$(pause_image)', i.e.
# our caller wants our output. run_podman() messes with output because
# it emits the command invocation to stdout, hence the redirection.
run_podman version --format "{{.Server.Version}}-{{.Server.Built}}" >/dev/null
echo "localhost/podman-pause:$output"
}
# Wait for the pod (1st arg) to transition into the state (2nd arg)
function _ensure_pod_state() {
for i in {0..5}; do