mirror of https://github.com/containers/podman.git
Add a new hidden command, podman system locks
This is a general debug command that identifies any lock conflicts that could lead to a deadlock. It's only intended for Libpod developers (while it does tell you if you need to run `podman system renumber`, you should never have to do that anyways, and the next commit will include a lot more technical info in the output that no one except a Libpod dev will want). Hence, hidden command, and only implemented for the local driver (recommend just running it by SSHing into a `podman machine` VM in the unlikely case it's needed by remote Podman). These conflicts should normally never happen, but having a command like this is useful for debugging deadlock conditions when they do occur. Signed-off-by: Matt Heon <mheon@redhat.com>
This commit is contained in:
parent
1013696ad2
commit
0948c078c2
|
@ -0,0 +1,48 @@
|
||||||
|
package system
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/containers/podman/v4/cmd/podman/registry"
|
||||||
|
"github.com/containers/podman/v4/cmd/podman/validate"
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
locksCommand = &cobra.Command{
|
||||||
|
Use: "locks",
|
||||||
|
Short: "Debug Libpod's use of locks, identifying any potential conflicts",
|
||||||
|
Args: validate.NoArgs,
|
||||||
|
Hidden: true,
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
return runLocks()
|
||||||
|
},
|
||||||
|
Example: "podman system locks",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registry.Commands = append(registry.Commands, registry.CliCommand{
|
||||||
|
Command: locksCommand,
|
||||||
|
Parent: systemCmd,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
func runLocks() error {
|
||||||
|
report, err := registry.ContainerEngine().Locks(registry.Context())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for lockNum, objects := range report.LockConflicts {
|
||||||
|
fmt.Printf("Lock %d is in use by the following\n:", lockNum)
|
||||||
|
for _, obj := range objects {
|
||||||
|
fmt.Printf("\t%s\n", obj)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(report.LockConflicts) > 0 {
|
||||||
|
fmt.Printf("\nLock conflicts have been detected. Recommend immediate use of `podman system renumber` to resolve.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -1188,3 +1188,73 @@ func (r *Runtime) RemoteURI() string {
|
||||||
func (r *Runtime) SetRemoteURI(uri string) {
|
func (r *Runtime) SetRemoteURI(uri string) {
|
||||||
r.config.Engine.RemoteURI = uri
|
r.config.Engine.RemoteURI = uri
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get information on potential lock conflicts.
|
||||||
|
// Returns a map of lock number to object(s) using the lock, formatted as
|
||||||
|
// "container <id>" or "volume <id>" or "pod <id>".
|
||||||
|
// If the map returned is not empty, you should immediately renumber locks on
|
||||||
|
// the runtime, because you have a deadlock waiting to happen.
|
||||||
|
func (r *Runtime) LockConflicts() (map[uint32][]string, error) {
|
||||||
|
// Make an internal map to store what lock is associated with what
|
||||||
|
locksInUse := make(map[uint32][]string)
|
||||||
|
|
||||||
|
ctrs, err := r.state.AllContainers(false)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, ctr := range ctrs {
|
||||||
|
lockNum := ctr.lock.ID()
|
||||||
|
ctrString := fmt.Sprintf("container %s", ctr.ID())
|
||||||
|
locksArr, ok := locksInUse[lockNum]
|
||||||
|
if ok {
|
||||||
|
locksInUse[lockNum] = append(locksArr, ctrString)
|
||||||
|
} else {
|
||||||
|
locksInUse[lockNum] = []string{ctrString}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pods, err := r.state.AllPods()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, pod := range pods {
|
||||||
|
lockNum := pod.lock.ID()
|
||||||
|
podString := fmt.Sprintf("pod %s", pod.ID())
|
||||||
|
locksArr, ok := locksInUse[lockNum]
|
||||||
|
if ok {
|
||||||
|
locksInUse[lockNum] = append(locksArr, podString)
|
||||||
|
} else {
|
||||||
|
locksInUse[lockNum] = []string{podString}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
volumes, err := r.state.AllVolumes()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, vol := range volumes {
|
||||||
|
lockNum := vol.lock.ID()
|
||||||
|
volString := fmt.Sprintf("volume %s", vol.Name())
|
||||||
|
locksArr, ok := locksInUse[lockNum]
|
||||||
|
if ok {
|
||||||
|
locksInUse[lockNum] = append(locksArr, volString)
|
||||||
|
} else {
|
||||||
|
locksInUse[lockNum] = []string{volString}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now go through and find any entries with >1 item associated
|
||||||
|
toReturn := make(map[uint32][]string)
|
||||||
|
for lockNum, objects := range locksInUse {
|
||||||
|
// If debug logging is requested, just spit out *every* lock in
|
||||||
|
// use.
|
||||||
|
logrus.Debugf("Lock number %d is in use by %v", lockNum, objects)
|
||||||
|
|
||||||
|
if len(objects) > 1 {
|
||||||
|
toReturn[lockNum] = objects
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return toReturn, nil
|
||||||
|
}
|
||||||
|
|
|
@ -62,6 +62,7 @@ type ContainerEngine interface { //nolint:interfacebloat
|
||||||
HealthCheckRun(ctx context.Context, nameOrID string, options HealthCheckOptions) (*define.HealthCheckResults, error)
|
HealthCheckRun(ctx context.Context, nameOrID string, options HealthCheckOptions) (*define.HealthCheckResults, error)
|
||||||
Info(ctx context.Context) (*define.Info, error)
|
Info(ctx context.Context) (*define.Info, error)
|
||||||
KubeApply(ctx context.Context, body io.Reader, opts ApplyOptions) error
|
KubeApply(ctx context.Context, body io.Reader, opts ApplyOptions) error
|
||||||
|
Locks(ctx context.Context) (*LocksReport, error)
|
||||||
NetworkConnect(ctx context.Context, networkname string, options NetworkConnectOptions) error
|
NetworkConnect(ctx context.Context, networkname string, options NetworkConnectOptions) error
|
||||||
NetworkCreate(ctx context.Context, network types.Network, createOptions *types.NetworkCreateOptions) (*types.Network, error)
|
NetworkCreate(ctx context.Context, network types.Network, createOptions *types.NetworkCreateOptions) (*types.Network, error)
|
||||||
NetworkUpdate(ctx context.Context, networkname string, options NetworkUpdateOptions) error
|
NetworkUpdate(ctx context.Context, networkname string, options NetworkUpdateOptions) error
|
||||||
|
|
|
@ -120,3 +120,9 @@ type AuthReport struct {
|
||||||
IdentityToken string
|
IdentityToken string
|
||||||
Status string
|
Status string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LocksReport describes any conflicts in Libpod's lock allocations that could
|
||||||
|
// lead to deadlocks.
|
||||||
|
type LocksReport struct {
|
||||||
|
LockConflicts map[uint32][]string
|
||||||
|
}
|
||||||
|
|
|
@ -429,3 +429,13 @@ func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionR
|
||||||
report.Client = &v
|
report.Client = &v
|
||||||
return &report, err
|
return &report, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) {
|
||||||
|
var report entities.LocksReport
|
||||||
|
conflicts, err := ic.Libpod.LockConflicts()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
report.LockConflicts = conflicts
|
||||||
|
return &report, nil
|
||||||
|
}
|
||||||
|
|
|
@ -34,3 +34,7 @@ func (ic *ContainerEngine) Unshare(ctx context.Context, args []string, options e
|
||||||
func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionReport, error) {
|
func (ic ContainerEngine) Version(ctx context.Context) (*entities.SystemVersionReport, error) {
|
||||||
return system.Version(ic.ClientCtx, nil)
|
return system.Version(ic.ClientCtx, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ic ContainerEngine) Locks(ctx context.Context) (*entities.LocksReport, error) {
|
||||||
|
return nil, errors.New("locks is not supported on remote clients")
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue