Add a new hidden command, podman system locks

This is a general debug command that identifies any lock
conflicts that could lead to a deadlock. It's only intended for
Libpod developers (while it does tell you if you need to run
`podman system renumber`, you should never have to do that
anyways, and the next commit will include a lot more technical
info in the output that no one except a Libpod dev will want).
Hence, hidden command, and only implemented for the local driver
(recommend just running it by SSHing into a `podman machine` VM
in the unlikely case it's needed by remote Podman).

These conflicts should normally never happen, but having a
command like this is useful for debugging deadlock conditions
when they do occur.

Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
Matt Heon 2023-06-05 14:47:12 -04:00
parent 1013696ad2
commit 0948c078c2
6 changed files with 139 additions and 0 deletions

View File

@ -0,0 +1,48 @@
package system
import (
"fmt"
"github.com/containers/podman/v4/cmd/podman/registry"
"github.com/containers/podman/v4/cmd/podman/validate"
"github.com/spf13/cobra"
)
var (
locksCommand = &cobra.Command{
Use: "locks",
Short: "Debug Libpod's use of locks, identifying any potential conflicts",
Args: validate.NoArgs,
Hidden: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runLocks()
},
Example: "podman system locks",
}
)
func init() {
registry.Commands = append(registry.Commands, registry.CliCommand{
Command: locksCommand,
Parent: systemCmd,
})
}
func runLocks() error {
report, err := registry.ContainerEngine().Locks(registry.Context())
if err != nil {
return err
}
for lockNum, objects := range report.LockConflicts {
fmt.Printf("Lock %d is in use by the following\n:", lockNum)
for _, obj := range objects {
fmt.Printf("\t%s\n", obj)
}
}
if len(report.LockConflicts) > 0 {
fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n")
}
return nil
}

View File

@ -1188,3 +1188,73 @@ func (r *Runtime) RemoteURI() string {
func (r *Runtime) SetRemoteURI(uri string) {
r.config.Engine.RemoteURI = uri
}
// Get information on potential lock conflicts.
// Returns a map of lock number to object(s) using the lock, formatted as
// "container <id>" or "volume <id>" or "pod <id>".
// If the map returned is not empty, you should immediately renumber locks on
// the runtime, because you have a deadlock waiting to happen.
func (r *Runtime) LockConflicts() (map[uint32][]string, error) {
// Make an internal map to store what lock is associated with what
locksInUse := make(map[uint32][]string)
ctrs, err := r.state.AllContainers(false)
if err != nil {
return nil, err
}
for _, ctr := range ctrs {
lockNum := ctr.lock.ID()
ctrString := fmt.Sprintf("container %s", ctr.ID())
locksArr, ok := locksInUse[lockNum]
if ok {
locksInUse[lockNum] = append(locksArr, ctrString)
} else {
locksInUse[lockNum] = []string{ctrString}
}
}
pods, err := r.state.AllPods()
if err != nil {
return nil, err
}
for _, pod := range pods {
lockNum := pod.lock.ID()
podString := fmt.Sprintf("pod %s", pod.ID())
locksArr, ok := locksInUse[lockNum]
if ok {
locksInUse[lockNum] = append(locksArr, podString)
} else {
locksInUse[lockNum] = []string{podString}
}
}
volumes, err := r.state.AllVolumes()
if err != nil {
return nil, err
}
for _, vol := range volumes {
lockNum := vol.lock.ID()
volString := fmt.Sprintf("volume %s", vol.Name())
locksArr, ok := locksInUse[lockNum]
if ok {
locksInUse[lockNum] = append(locksArr, volString)
} else {
locksInUse[lockNum] = []string{volString}
}
}
// Now go through and find any entries with >1 item associated
toReturn := make(map[uint32][]string)
for lockNum, objects := range locksInUse {
// If debug logging is requested, just spit out *every* lock in
// use.
logrus.Debugf("Lock number %d is in use by %v", lockNum, objects)
if len(objects) > 1 {
toReturn[lockNum] = objects
}
}
return toReturn, nil
}

View File

@ -62,6 +62,7 @@ type ContainerEngine interface { //nolint:interfacebloat
HealthCheckRun(ctx context.Context, nameOrID string, options HealthCheckOptions) (*define.HealthCheckResults, error)
Info(ctx context.Context) (*define.Info, error)
KubeApply(ctx context.Context, body io.Reader, opts ApplyOptions) error
Locks(ctx context.Context) (*LocksReport, error)
NetworkConnect(ctx context.Context, networkname string, options NetworkConnectOptions) error
NetworkCreate(ctx context.Context, network types.Network, createOptions *types.NetworkCreateOptions) (*types.Network, error)
NetworkUpdate(ctx context.Context, networkname string, options NetworkUpdateOptions) error

View File

@ -120,3 +120,9 @@ type AuthReport struct {
IdentityToken string
Status string
}
// LocksReport describes any conflicts in Libpod's lock allocations that could
// lead to deadlocks.
type LocksReport struct {
LockConflicts map[uint32][]string
}

View File

@ -429,3 +429,13 @@ func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionR
report.Client = &v
return &report, err
}
func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) {
var report entities.LocksReport
conflicts, err := ic.Libpod.LockConflicts()
if err != nil {
return nil, err
}
report.LockConflicts = conflicts
return &report, nil
}

View File

@ -34,3 +34,7 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e
func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionReport, error) {
return system.Version(ic.ClientCtx, nil)
}
func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) {
return nil, errors.New("locks is not supported on remote clients")
}