libpod: improve conmon error handling

When conmon is started it blocks and waits for us to signal it to start
via pipe. This works but when conmon exits before it waits for the start
message it causes podman to fail with `write child: broken pipe`. This
error is meaningless to podman users.

The real error is that conmon failed so we should not return early if we
fail to send the start message to conmon. Instead ignore the EPIPE error
case as it is safe to assume to the conmon died and for other errors we
make sure to kill conmon so that the following wait() call does not hang
forever. This also fixes problems with having conmon zombie processes
leaked as wait() was never called.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger 2023-08-17 10:40:02 +02:00
parent ed1f514d55
commit c726cf8106
No known key found for this signature in database
GPG Key ID: EB145DD938A3CAF2
2 changed files with 10 additions and 5 deletions

View File

@ -1261,8 +1261,16 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co
return 0, err return 0, err
} }
if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil { if err := r.moveConmonToCgroupAndSignal(ctr, cmd, parentStartPipe); err != nil {
return 0, err // The child likely already exited in which case the cmd.Wait() below should return the proper error.
// EPIPE is expected if the child already exited so not worth to log and kill the process.
if !errors.Is(err, syscall.EPIPE) {
logrus.Errorf("Failed to signal conmon to start: %v", err)
if err := cmd.Process.Kill(); err != nil && !errors.Is(err, syscall.ESRCH) {
logrus.Errorf("Failed to kill conmon after error: %v", err)
} }
}
}
/* Wait for initial setup and fork, and reap child */ /* Wait for initial setup and fork, and reap child */
err = cmd.Wait() err = cmd.Wait()
if err != nil { if err != nil {

View File

@ -162,10 +162,7 @@ func (r *ConmonOCIRuntime) moveConmonToCgroupAndSignal(ctr *Container, cmd *exec
} }
/* We set the cgroup, now the child can start creating children */ /* We set the cgroup, now the child can start creating children */
if err := writeConmonPipeData(startFd); err != nil { return writeConmonPipeData(startFd)
return err
}
return nil
} }
// GetLimits converts spec resource limits to cgroup consumable limits // GetLimits converts spec resource limits to cgroup consumable limits