Ensure volumes reacquire locks on state refresh

After a restart, pods and containers both run a refresh()
function to prepare to run after a reboot. Until now, volumes
have not had a similar function, because they had no per-boot
setup to perform.

Unfortunately, this was not noticed when in-memory locking was
introduced to volumes. The refresh() routine is, among other
things, responsible for ensuring that locks are reserved after a
reboot, ensuring they cannot be taken by a freshly-created
container, pod, or volume. If this reservation is not done, we
can end up with two objects using the same lock, potentially
needing to lock each other for some operations - classic recipe
for deadlocks.

Add a refresh() function to volumes to perform lock reservation
and ensure it is called as part of overall refresh().

Fixes #4605
Fixes #4621

Signed-off-by: Matthew Heon <matthew.heon@pm.me>
This commit is contained in:
Matthew Heon 2019-12-02 23:06:00 -05:00
parent c9696c451d
commit 689329f749
2 changed files with 26 additions and 4 deletions

View File

@ -625,7 +625,8 @@ func (r *Runtime) refresh(alivePath string) error {
}
// Next refresh the state of all containers to recreate dirs and
// namespaces, and all the pods to recreate cgroups
// namespaces, and all the pods to recreate cgroups.
// Containers, pods, and volumes must also reacquire their locks.
ctrs, err := r.state.AllContainers()
if err != nil {
return errors.Wrapf(err, "error retrieving all containers from state")
@ -634,10 +635,14 @@ func (r *Runtime) refresh(alivePath string) error {
if err != nil {
return errors.Wrapf(err, "error retrieving all pods from state")
}
// No locks are taken during pod and container refresh.
// Furthermore, the pod and container refresh() functions are not
vols, err := r.state.AllVolumes()
if err != nil {
return errors.Wrapf(err, "error retrieving all volumes from state")
}
// No locks are taken during pod, volume, and container refresh.
// Furthermore, the pod/volume/container refresh() functions are not
// allowed to take locks themselves.
// We cannot assume that any pod or container has a valid lock until
// We cannot assume that any pod/volume/container has a valid lock until
// after this function has returned.
// The runtime alive lock should suffice to provide mutual exclusion
// until this has run.
@ -651,6 +656,11 @@ func (r *Runtime) refresh(alivePath string) error {
logrus.Errorf("Error refreshing pod %s: %v", pod.ID(), err)
}
}
for _, vol := range vols {
if err := vol.refresh(); err != nil {
logrus.Errorf("Error refreshing volume %s: %v", vol.Name(), err)
}
}
// Create a file indicating the runtime is alive and ready
file, err := os.OpenFile(alivePath, os.O_RDONLY|os.O_CREATE, 0644)

View File

@ -5,6 +5,7 @@ import (
"path/filepath"
"github.com/containers/libpod/libpod/define"
"github.com/pkg/errors"
)
// Creates a new volume
@ -46,3 +47,14 @@ func (v *Volume) update() error {
func (v *Volume) save() error {
return v.runtime.state.SaveVolume(v)
}
// Refresh volume state after a restart.
func (v *Volume) refresh() error {
lock, err := v.runtime.lockManager.AllocateAndRetrieveLock(v.config.LockID)
if err != nil {
return errors.Wrapf(err, "error acquiring lock %d for volume %s", v.config.LockID, v.Name())
}
v.lock = lock
return nil
}