Merge pull request #5305 from mheon/check_for_common_deadlocks
Add basic deadlock detection for container start/remove
This commit is contained in:
		
						commit
						6babc9f20c
					
				|  | @ -4,8 +4,10 @@ import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 
 | 
 | ||||||
| 	"github.com/containers/libpod/cmd/podman/cliconfig" | 	"github.com/containers/libpod/cmd/podman/cliconfig" | ||||||
|  | 	"github.com/containers/libpod/libpod/define" | ||||||
| 	"github.com/containers/libpod/pkg/adapter" | 	"github.com/containers/libpod/pkg/adapter" | ||||||
| 	"github.com/pkg/errors" | 	"github.com/pkg/errors" | ||||||
|  | 	"github.com/sirupsen/logrus" | ||||||
| 	"github.com/spf13/cobra" | 	"github.com/spf13/cobra" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -77,6 +79,9 @@ func rmCmd(c *cliconfig.RmValues) error { | ||||||
| 
 | 
 | ||||||
| 	if len(failures) > 0 { | 	if len(failures) > 0 { | ||||||
| 		for _, err := range failures { | 		for _, err := range failures { | ||||||
|  | 			if errors.Cause(err) == define.ErrWillDeadlock { | ||||||
|  | 				logrus.Errorf("Potential deadlock detected - please run 'podman system renumber' to resolve") | ||||||
|  | 			} | ||||||
| 			exitCode = setExitCode(err) | 			exitCode = setExitCode(err) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | @ -1401,6 +1401,9 @@ func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) | ||||||
| 		return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) | 		return nil, errors.Wrapf(err, "error retrieving named volume %s for container %s", v.Name, c.ID()) | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	if vol.config.LockID == c.config.LockID { | ||||||
|  | 		return nil, errors.Wrapf(define.ErrWillDeadlock, "container %s and volume %s share lock ID %d", c.ID(), vol.Name(), c.config.LockID) | ||||||
|  | 	} | ||||||
| 	vol.lock.Lock() | 	vol.lock.Lock() | ||||||
| 	defer vol.lock.Unlock() | 	defer vol.lock.Unlock() | ||||||
| 	if vol.needsMount() { | 	if vol.needsMount() { | ||||||
|  |  | ||||||
|  | @ -61,6 +61,11 @@ var ( | ||||||
| 	// the user.
 | 	// the user.
 | ||||||
| 	ErrDetach = utils.ErrDetach | 	ErrDetach = utils.ErrDetach | ||||||
| 
 | 
 | ||||||
|  | 	// ErrWillDeadlock indicates that the requested operation will cause a
 | ||||||
|  | 	// deadlock. This is usually caused by upgrade issues, and is resolved
 | ||||||
|  | 	// by renumbering the locks.
 | ||||||
|  | 	ErrWillDeadlock = errors.New("deadlock due to lock mismatch") | ||||||
|  | 
 | ||||||
| 	// ErrNoCgroups indicates that the container does not have its own
 | 	// ErrNoCgroups indicates that the container does not have its own
 | ||||||
| 	// CGroup.
 | 	// CGroup.
 | ||||||
| 	ErrNoCgroups = errors.New("this container does not have a cgroup") | 	ErrNoCgroups = errors.New("this container does not have a cgroup") | ||||||
|  |  | ||||||
|  | @ -412,6 +412,9 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force bool, | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		// Lock the pod while we're removing container
 | 		// Lock the pod while we're removing container
 | ||||||
|  | 		if pod.config.LockID == c.config.LockID { | ||||||
|  | 			return errors.Wrapf(define.ErrWillDeadlock, "container %s and pod %s share lock ID %d", c.ID(), pod.ID(), c.config.LockID) | ||||||
|  | 		} | ||||||
| 		pod.lock.Lock() | 		pod.lock.Lock() | ||||||
| 		defer pod.lock.Unlock() | 		defer pod.lock.Unlock() | ||||||
| 		if err := pod.updatePod(); err != nil { | 		if err := pod.updatePod(); err != nil { | ||||||
|  |  | ||||||
|  | @ -36,9 +36,6 @@ func (r *Runtime) RemoveVolume(ctx context.Context, v *Volume, force bool) error | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	v.lock.Lock() |  | ||||||
| 	defer v.lock.Unlock() |  | ||||||
| 
 |  | ||||||
| 	return r.removeVolume(ctx, v, force) | 	return r.removeVolume(ctx, v, force) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -124,6 +124,9 @@ func (r *Runtime) removeVolume(ctx context.Context, v *Volume, force bool) error | ||||||
| 		return define.ErrVolumeRemoved | 		return define.ErrVolumeRemoved | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	v.lock.Lock() | ||||||
|  | 	defer v.lock.Unlock() | ||||||
|  | 
 | ||||||
| 	// Update volume status to pick up a potential removal from state
 | 	// Update volume status to pick up a potential removal from state
 | ||||||
| 	if err := v.update(); err != nil { | 	if err := v.update(); err != nil { | ||||||
| 		return err | 		return err | ||||||
|  |  | ||||||
|  | @ -469,6 +469,10 @@ func (r *LocalRuntime) Run(ctx context.Context, c *cliconfig.RunValues, exitCode | ||||||
| 				logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID()) | 				logrus.Debugf("unable to remove container %s after failing to start and attach to it", ctr.ID()) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  | 		if errors.Cause(err) == define.ErrWillDeadlock { | ||||||
|  | 			logrus.Debugf("Deadlock error: %v", err) | ||||||
|  | 			return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID()) | ||||||
|  | 		} | ||||||
| 		return define.ExitCode(err), err | 		return define.ExitCode(err), err | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | @ -702,6 +706,11 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP | ||||||
| 				return exitCode, nil | 				return exitCode, nil | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
|  | 			if errors.Cause(err) == define.ErrWillDeadlock { | ||||||
|  | 				logrus.Debugf("Deadlock error: %v", err) | ||||||
|  | 				return define.ExitCode(err), errors.Errorf("attempting to start container %s would cause a deadlock; please run 'podman system renumber' to resolve", ctr.ID()) | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
| 			if ctrRunning { | 			if ctrRunning { | ||||||
| 				return 0, err | 				return 0, err | ||||||
| 			} | 			} | ||||||
|  | @ -735,6 +744,10 @@ func (r *LocalRuntime) Start(ctx context.Context, c *cliconfig.StartValues, sigP | ||||||
| 				if lastError != nil { | 				if lastError != nil { | ||||||
| 					fmt.Fprintln(os.Stderr, lastError) | 					fmt.Fprintln(os.Stderr, lastError) | ||||||
| 				} | 				} | ||||||
|  | 				if errors.Cause(err) == define.ErrWillDeadlock { | ||||||
|  | 					lastError = errors.Wrapf(err, "please run 'podman system renumber' to resolve deadlocks") | ||||||
|  | 					continue | ||||||
|  | 				} | ||||||
| 				lastError = errors.Wrapf(err, "unable to start container %q", container) | 				lastError = errors.Wrapf(err, "unable to start container %q", container) | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue