Merge pull request #21523 from umohnani8/memory-final

Use persist dir for oom file
This commit is contained in:
openshift-merge-bot[bot] 2024-02-12 21:38:43 +00:00 committed by GitHub
commit 01bd79b371
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 113 additions and 14 deletions

View File

@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "exit")
}
// execPersistDir gets the path to the container's persist directory
// The persist directory container the exit file and oom file (if oomkilled)
// of a container
func (c *Container) execPersistDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
}
// execOCILog returns the file path for the exec sessions oci log
func (c *Container) execOCILog(sessionID string) string {
if !c.ociRuntime.SupportsJSONErrors() {
@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
}
}
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
}
return nil
}

View File

@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strconv"
@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c)
}
func (c *Container) oomFilePath() (string, error) {
return c.ociRuntime.OOMFilePath(c)
}
// Wait for the container's exit file to appear.
// When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error {
@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error {
// Handle the container exit file.
// The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists.
// Also check for an oom file to determine if the container was oom killed or not.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := os.ReadFile(exitFile)
@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
}
c.state.ExitCode = int32(statusCode)
oomFilePath := filepath.Join(c.bundlePath(), "oom")
oomFilePath, err := c.oomFilePath()
if err != nil {
return err
}
if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true
}
@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
}
oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}
// Remove the exit file so we don't leak memory in tmpfs
exitFile, err := c.exitFilePath()
if err != nil {
@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
}
// Remove the oom file
oomFile, err := c.oomFilePath()
if err != nil {
return err
}
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
}
return nil
}

View File

@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
// This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error)
// OOMFilePath is the path to a container's oom file if it was oom killed.
// An oom file is only created when the container is oom killed. The existence
// of this file means that the container was oom killed.
// This is the path to that file for a given container.
OOMFilePath(ctr *Container) (string, error)
// RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)

View File

@ -64,6 +64,7 @@ type ConmonOCIRuntime struct {
supportsKVM bool
supportsNoCgroups bool
enableKeyring bool
persistDir string
}
// Make a new Conmon-based OCI runtime with the given options.
@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
}
runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")
// Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
// The directory is allowed to exist
if !os.IsExist(err) {
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
}
if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
}
return runtime, nil
}
@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}
// OOMFilePath is the path to a container's oom file.
// The oom file will only exist if the container was oom killed.
func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}
// RuntimeInfo provides information on the runtime.
func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
runtimePackage := version.Package(r.path)
@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
}
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag)
persistDir := filepath.Join(r.persistDir, ctr.ID())
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag)
if err != nil {
return 0, err
}
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
}
// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string {
// func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
// Make the persists directory for the container after the ctr ID is appended to it in the caller
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
// So creating a directory with the container ID under the persist dir will help keep track of which container the
// exit and oom files belong to.
if err := os.MkdirAll(persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
}
// set the conmon API version to be able to use the correct sync struct keys
args := []string{
"--api-version", "1",
@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"-p", pidPath,
"-n", ctr.Name(),
"--exit-dir", exitDir,
"--persist-dir", persistDir,
"--full-attach",
}
if len(r.runtimeFlags) > 0 {
@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logrus.Debugf("Running with no Cgroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
}
return args
return args, nil
}
// newPipe creates a unix socket pair for communication.

View File

@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
}
defer processFile.Close()
args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
if err != nil {
return nil, nil, err
}
preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
if err != nil {

View File

@ -29,6 +29,8 @@ type MissingRuntime struct {
name string
// exitsDir is the directory for exit files.
exitsDir string
// persistDir is the directory for exit and oom files.
persistDir string
}
// Get a new MissingRuntime for the given name.
@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {
newRuntime := new(MissingRuntime)
newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")
missingRuntimes[name] = newRuntime
@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil
}
// OOMFilePath returns the oom file path for a container.
// The oom file will only exist if the container was oom killed.
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}
// RuntimeInfo returns information on the missing runtime
func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
ocirt := define.OCIRuntimeInfo{

View File

@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
Expect(session.OutputToString()).To(Equal(limit))
})
}
It("podman run memory test on oomkilled container", func() {
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
mem.WaitWithDefaultTimeout()
if mem.OutputToString() != "0" {
Skip("overcommit memory is not set to 0")
}
ctrName := "oomkilled-ctr"
// create a container that gets oomkilled
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitWithError())
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
})
It("podman run memory test on successfully exited container", func() {
ctrName := "success-ctr"
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
})
})