Merge pull request #21523 from umohnani8/memory-final

Use persist dir for oom file
This commit is contained in:
openshift-merge-bot[bot] 2024-02-12 21:38:43 +00:00 committed by GitHub
commit 01bd79b371
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 113 additions and 14 deletions

View File

@ -890,6 +890,13 @@ func (c *Container) execExitFileDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "exit") return filepath.Join(c.execBundlePath(sessionID), "exit")
} }
// execPersistDir gets the path to the container's persist directory
// The persist directory container the exit file and oom file (if oomkilled)
// of a container
func (c *Container) execPersistDir(sessionID string) string {
return filepath.Join(c.execBundlePath(sessionID), "persist", c.ID())
}
// execOCILog returns the file path for the exec sessions oci log // execOCILog returns the file path for the exec sessions oci log
func (c *Container) execOCILog(sessionID string) string { func (c *Container) execOCILog(sessionID string) string {
if !c.ociRuntime.SupportsJSONErrors() { if !c.ociRuntime.SupportsJSONErrors() {
@ -917,6 +924,9 @@ func (c *Container) createExecBundle(sessionID string) (retErr error) {
return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err) return fmt.Errorf("creating OCI runtime exit file path %s: %w", c.execExitFileDir(sessionID), err)
} }
} }
if err := os.MkdirAll(c.execPersistDir(sessionID), execDirPermission); err != nil {
return fmt.Errorf("creating OCI runtime persist directory path %s: %w", c.execPersistDir(sessionID), err)
}
return nil return nil
} }

View File

@ -8,6 +8,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"io/fs"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
@ -145,6 +146,10 @@ func (c *Container) exitFilePath() (string, error) {
return c.ociRuntime.ExitFilePath(c) return c.ociRuntime.ExitFilePath(c)
} }
func (c *Container) oomFilePath() (string, error) {
return c.ociRuntime.OOMFilePath(c)
}
// Wait for the container's exit file to appear. // Wait for the container's exit file to appear.
// When it does, update our state based on it. // When it does, update our state based on it.
func (c *Container) waitForExitFileAndSync() error { func (c *Container) waitForExitFileAndSync() error {
@ -181,6 +186,7 @@ func (c *Container) waitForExitFileAndSync() error {
// Handle the container exit file. // Handle the container exit file.
// The exit file is used to supply container exit time and exit code. // The exit file is used to supply container exit time and exit code.
// This assumes the exit file already exists. // This assumes the exit file already exists.
// Also check for an oom file to determine if the container was oom killed or not.
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error { func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
c.state.FinishedTime = ctime.Created(fi) c.state.FinishedTime = ctime.Created(fi)
statusCodeStr, err := os.ReadFile(exitFile) statusCodeStr, err := os.ReadFile(exitFile)
@ -194,7 +200,10 @@ func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
} }
c.state.ExitCode = int32(statusCode) c.state.ExitCode = int32(statusCode)
oomFilePath := filepath.Join(c.bundlePath(), "oom") oomFilePath, err := c.oomFilePath()
if err != nil {
return err
}
if _, err = os.Stat(oomFilePath); err == nil { if _, err = os.Stat(oomFilePath); err == nil {
c.state.OOMKilled = true c.state.OOMKilled = true
} }
@ -758,11 +767,6 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err) return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
} }
oomFile := filepath.Join(c.bundlePath(), "oom")
if err := os.Remove(oomFile); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing container %s OOM file: %w", c.ID(), err)
}
// Remove the exit file so we don't leak memory in tmpfs // Remove the exit file so we don't leak memory in tmpfs
exitFile, err := c.exitFilePath() exitFile, err := c.exitFilePath()
if err != nil { if err != nil {
@ -772,6 +776,15 @@ func (c *Container) removeConmonFiles() error {
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err) return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
} }
// Remove the oom file
oomFile, err := c.oomFilePath()
if err != nil {
return err
}
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
}
return nil return nil
} }

View File

@ -149,6 +149,12 @@ type OCIRuntime interface { //nolint:interfacebloat
// This is the path to that file for a given container. // This is the path to that file for a given container.
ExitFilePath(ctr *Container) (string, error) ExitFilePath(ctr *Container) (string, error)
// OOMFilePath is the path to a container's oom file if it was oom killed.
// An oom file is only created when the container is oom killed. The existence
// of this file means that the container was oom killed.
// This is the path to that file for a given container.
OOMFilePath(ctr *Container) (string, error)
// RuntimeInfo returns verbose information about the runtime. // RuntimeInfo returns verbose information about the runtime.
RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error)

View File

@ -64,6 +64,7 @@ type ConmonOCIRuntime struct {
supportsKVM bool supportsKVM bool
supportsNoCgroups bool supportsNoCgroups bool
enableKeyring bool enableKeyring bool
persistDir string
} }
// Make a new Conmon-based OCI runtime with the given options. // Make a new Conmon-based OCI runtime with the given options.
@ -143,13 +144,15 @@ func newConmonOCIRuntime(name string, paths []string, conmonPath string, runtime
} }
runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits") runtime.exitsDir = filepath.Join(runtime.tmpDir, "exits")
// The persist-dir is where conmon writes the exit file and oom file (if oom killed), we join the container ID to this path later on
runtime.persistDir = filepath.Join(runtime.tmpDir, "persist")
// Create the exit files and attach sockets directories // Create the exit files and attach sockets directories
if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil { if err := os.MkdirAll(runtime.exitsDir, 0750); err != nil {
// The directory is allowed to exist return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err)
if !os.IsExist(err) { }
return nil, fmt.Errorf("creating OCI runtime exit files directory: %w", err) if err := os.MkdirAll(runtime.persistDir, 0750); err != nil {
} return nil, fmt.Errorf("creating OCI runtime persist directory: %w", err)
} }
return runtime, nil return runtime, nil
} }
@ -940,6 +943,12 @@ func (r *ConmonOCIRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil return filepath.Join(r.exitsDir, ctr.ID()), nil
} }
// OOMFilePath is the path to a container's oom file.
// The oom file will only exist if the container was oom killed.
func (r *ConmonOCIRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}
// RuntimeInfo provides information on the runtime. // RuntimeInfo provides information on the runtime.
func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { func (r *ConmonOCIRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
runtimePackage := version.Package(r.path) runtimePackage := version.Package(r.path)
@ -1107,7 +1116,11 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
pidfile = filepath.Join(ctr.state.RunDir, "pidfile") pidfile = filepath.Join(ctr.state.RunDir, "pidfile")
} }
args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) persistDir := filepath.Join(r.persistDir, ctr.ID())
args, err := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), pidfile, ctr.LogPath(), r.exitsDir, persistDir, ociLog, ctr.LogDriver(), logTag)
if err != nil {
return 0, err
}
if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" { if ctr.config.SdNotifyMode == define.SdNotifyModeContainer && ctr.config.SdNotifySocket != "" {
args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket)) args = append(args, fmt.Sprintf("--sdnotify-socket=%s", ctr.config.SdNotifySocket))
@ -1363,7 +1376,16 @@ func (r *ConmonOCIRuntime) configureConmonEnv() ([]string, error) {
} }
// sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI // sharedConmonArgs takes common arguments for exec and create/restore and formats them for the conmon CLI
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, ociLogPath, logDriver, logTag string) []string { // func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, pidPath, logPath, exitDir, persistDir, ociLogPath, logDriver, logTag string) ([]string, error) {
// Make the persists directory for the container after the ctr ID is appended to it in the caller
// This is needed as conmon writes the exit and oom file in the given persist directory path as just "exit" and "oom"
// So creating a directory with the container ID under the persist dir will help keep track of which container the
// exit and oom files belong to.
if err := os.MkdirAll(persistDir, 0750); err != nil {
return nil, fmt.Errorf("creating OCI runtime oom files directory for ctr %q: %w", ctr.ID(), err)
}
// set the conmon API version to be able to use the correct sync struct keys // set the conmon API version to be able to use the correct sync struct keys
args := []string{ args := []string{
"--api-version", "1", "--api-version", "1",
@ -1374,6 +1396,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
"-p", pidPath, "-p", pidPath,
"-n", ctr.Name(), "-n", ctr.Name(),
"--exit-dir", exitDir, "--exit-dir", exitDir,
"--persist-dir", persistDir,
"--full-attach", "--full-attach",
} }
if len(r.runtimeFlags) > 0 { if len(r.runtimeFlags) > 0 {
@ -1436,7 +1459,7 @@ func (r *ConmonOCIRuntime) sharedConmonArgs(ctr *Container, cuuid, bundlePath, p
logrus.Debugf("Running with no Cgroups") logrus.Debugf("Running with no Cgroups")
args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled") args = append(args, "--runtime-arg", "--cgroup-manager", "--runtime-arg", "disabled")
} }
return args return args, nil
} }
// newPipe creates a unix socket pair for communication. // newPipe creates a unix socket pair for communication.

View File

@ -387,7 +387,10 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex
} }
defer processFile.Close() defer processFile.Close()
args := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), ociLog, define.NoLogging, c.config.LogTag) args, err := r.sharedConmonArgs(c, sessionID, c.execBundlePath(sessionID), c.execPidPath(sessionID), c.execLogPath(sessionID), c.execExitFileDir(sessionID), c.execPersistDir(sessionID), ociLog, define.NoLogging, c.config.LogTag)
if err != nil {
return nil, nil, err
}
preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs) preserveFDs, filesToClose, extraFiles, err := getPreserveFdExtraFiles(options.PreserveFD, options.PreserveFDs)
if err != nil { if err != nil {

View File

@ -29,6 +29,8 @@ type MissingRuntime struct {
name string name string
// exitsDir is the directory for exit files. // exitsDir is the directory for exit files.
exitsDir string exitsDir string
// persistDir is the directory for exit and oom files.
persistDir string
} }
// Get a new MissingRuntime for the given name. // Get a new MissingRuntime for the given name.
@ -52,6 +54,7 @@ func getMissingRuntime(name string, r *Runtime) OCIRuntime {
newRuntime := new(MissingRuntime) newRuntime := new(MissingRuntime)
newRuntime.name = name newRuntime.name = name
newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits") newRuntime.exitsDir = filepath.Join(r.config.Engine.TmpDir, "exits")
newRuntime.persistDir = filepath.Join(r.config.Engine.TmpDir, "persist")
missingRuntimes[name] = newRuntime missingRuntimes[name] = newRuntime
@ -222,6 +225,12 @@ func (r *MissingRuntime) ExitFilePath(ctr *Container) (string, error) {
return filepath.Join(r.exitsDir, ctr.ID()), nil return filepath.Join(r.exitsDir, ctr.ID()), nil
} }
// OOMFilePath returns the oom file path for a container.
// The oom file will only exist if the container was oom killed.
func (r *MissingRuntime) OOMFilePath(ctr *Container) (string, error) {
return filepath.Join(r.persistDir, ctr.ID(), "oom"), nil
}
// RuntimeInfo returns information on the missing runtime // RuntimeInfo returns information on the missing runtime
func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) { func (r *MissingRuntime) RuntimeInfo() (*define.ConmonInfo, *define.OCIRuntimeInfo, error) {
ocirt := define.OCIRuntimeInfo{ ocirt := define.OCIRuntimeInfo{

View File

@ -70,4 +70,39 @@ var _ = Describe("Podman run memory", func() {
Expect(session.OutputToString()).To(Equal(limit)) Expect(session.OutputToString()).To(Equal(limit))
}) })
} }
It("podman run memory test on oomkilled container", func() {
mem := SystemExec("cat", []string{"/proc/sys/vm/overcommit_memory"})
mem.WaitWithDefaultTimeout()
if mem.OutputToString() != "0" {
Skip("overcommit memory is not set to 0")
}
ctrName := "oomkilled-ctr"
// create a container that gets oomkilled
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--read-only", "--memory-swap=20m", "--memory=20m", "--oom-score-adj=1000", ALPINE, "sort", "/dev/urandom"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitWithError())
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("true"))
Expect(inspect.OutputToString()).Should(ContainSubstring("137"))
})
It("podman run memory test on successfully exited container", func() {
ctrName := "success-ctr"
session := podmanTest.Podman([]string{"run", "--name", ctrName, "--memory=40m", ALPINE, "echo", "hello"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
inspect := podmanTest.Podman(([]string{"inspect", "--format", "{{.State.OOMKilled}} {{.State.ExitCode}}", ctrName}))
inspect.WaitWithDefaultTimeout()
Expect(inspect).Should(ExitCleanly())
// Check oomkilled and exit code values
Expect(inspect.OutputToString()).Should(ContainSubstring("false"))
Expect(inspect.OutputToString()).Should(ContainSubstring("0"))
})
}) })