pkg/machine: make checkExclusiveActiveVM race free

We need to take another lock to prevent concurrent starts from different
machines.

I manually tested it by starting three VM in parallel with:
podman machine start & podman machine start test1 & podman machine start test2

I also added a CI test that seems to work as expected (failed with the
old binary, worked with the new)

Before this patch I was able to start more than VM, with this patch it
now only starts one of them and the other ones will fail to start with
a proper error.

Signed-off-by: Paul Holzinger <pholzing@redhat.com>
This commit is contained in:
Paul Holzinger 2024-03-13 15:30:32 +01:00
parent b238303a40
commit e82d196269
No known key found for this signature in database
GPG Key ID: EB145DD938A3CAF2
4 changed files with 100 additions and 21 deletions

View File

@ -8,7 +8,6 @@ import (
"github.com/containers/podman/v5/cmd/podman/registry"
"github.com/containers/podman/v5/libpod/events"
"github.com/containers/podman/v5/pkg/machine"
"github.com/containers/podman/v5/pkg/machine/define"
"github.com/containers/podman/v5/pkg/machine/env"
"github.com/containers/podman/v5/pkg/machine/shim"
"github.com/containers/podman/v5/pkg/machine/vmconfigs"
@ -64,19 +63,6 @@ func start(_ *cobra.Command, args []string) error {
return err
}
state, err := provider.State(mc, false)
if err != nil {
return err
}
if state == define.Running {
return define.ErrVMAlreadyRunning
}
if err := shim.CheckExclusiveActiveVM(provider, mc); err != nil {
return err
}
if !startOpts.Quiet {
fmt.Printf("Starting machine %q\n", vmName)
}

View File

@ -1,6 +1,7 @@
package e2e_test
import (
"sync"
"time"
"github.com/containers/podman/v5/pkg/machine/define"
@ -119,4 +120,58 @@ var _ = Describe("podman machine start", func() {
Expect(inspectSession2).To(Exit(0))
Expect(inspectSession2.outputToString()).To(Not(Equal(define.Running)))
})
It("start two machines in parallel", func() {
i := initMachine{}
machine1 := "m1-" + randomString()
session, err := mb.setName(machine1).setCmd(i.withImage(mb.imagePath)).run()
Expect(err).ToNot(HaveOccurred())
Expect(session).To(Exit(0))
machine2 := "m2-" + randomString()
session, err = mb.setName(machine2).setCmd(i.withImage(mb.imagePath)).run()
Expect(session).To(Exit(0))
var startSession1, startSession2 *machineSession
wg := sync.WaitGroup{}
wg.Add(2)
// now start two machine start process in parallel
go func() {
defer GinkgoRecover()
defer wg.Done()
s := startMachine{}
startSession1, err = mb.setName(machine1).setCmd(s).setTimeout(time.Minute * 10).run()
Expect(err).ToNot(HaveOccurred())
}()
go func() {
defer GinkgoRecover()
defer wg.Done()
s := startMachine{}
// ok this is a hack and should not be needed but the way these test are setup they all
// share "mb" which stores the name that is used for the VM, thus running two parallel
// can overwrite the name from the other, work around that by creating a new mb for the
// second run.
nmb, err := newMB()
Expect(err).ToNot(HaveOccurred())
startSession2, err = nmb.setName(machine2).setCmd(s).setTimeout(time.Minute * 10).run()
Expect(err).ToNot(HaveOccurred())
}()
wg.Wait()
// WSL can start in parallel so just check both command exit 0 there
if testProvider.VMType() == define.WSLVirt {
Expect(startSession1).To(Exit(0))
Expect(startSession2).To(Exit(0))
return
}
// other providers have a check that only one VM can be running at any given time so make sure our check is race free
Expect(startSession1).To(Or(Exit(0), Exit(125)), "start command should succeed or fail with 125")
if startSession1.ExitCode() == 0 {
Expect(startSession2).To(Exit(125), "first start worked, second start must fail")
Expect(startSession2.errorToString()).To(ContainSubstring("machine %s: VM already running or starting", machine1))
} else {
Expect(startSession2).To(Exit(0), "first start failed, second start succeed")
Expect(startSession1.errorToString()).To(ContainSubstring("machine %s: VM already running or starting", machine2))
}
})
})

View File

@ -4,6 +4,7 @@ import (
"fmt"
"path/filepath"
"github.com/containers/podman/v5/pkg/machine/env"
"github.com/containers/storage/pkg/lockfile"
)
@ -15,3 +16,21 @@ func GetMachineLock(name string, machineConfigDir string) (*lockfile.LockFile, e
}
return lock, nil
}
const machineStartLockName = "machine-start.lock"
// GetMachineStartLock is a lock only used to prevent starting different machines at the same time,
// This is required as most provides support at max 1 running VM and to check this race free we
// cannot allows starting two machine.
func GetMachineStartLock() (*lockfile.LockFile, error) {
lockDir, err := env.GetGlobalDataDir()
if err != nil {
return nil, err
}
lock, err := lockfile.GetLockFile(filepath.Join(lockDir, machineStartLockName))
if err != nil {
return nil, err
}
return lock, nil
}

View File

@ -268,13 +268,8 @@ func VMExists(name string, vmstubbers []vmconfigs.VMProvider) (*vmconfigs.Machin
return nil, false, nil
}
// CheckExclusiveActiveVM checks if any of the machines are already running
func CheckExclusiveActiveVM(provider vmconfigs.VMProvider, mc *vmconfigs.MachineConfig) error {
// Don't check if provider supports parallel running machines
if !provider.RequireExclusiveActive() {
return nil
}
// checkExclusiveActiveVM checks if any of the machines are already running
func checkExclusiveActiveVM(provider vmconfigs.VMProvider, mc *vmconfigs.MachineConfig) error {
// Check if any other machines are running; if so, we error
localMachines, err := getMCsOverProviders([]vmconfigs.VMProvider{provider})
if err != nil {
@ -384,6 +379,30 @@ func Start(mc *vmconfigs.MachineConfig, mp vmconfigs.VMProvider, dirs *machineDe
return fmt.Errorf("reload config: %w", err)
}
// Don't check if provider supports parallel running machines
if mp.RequireExclusiveActive() {
startLock, err := lock.GetMachineStartLock()
if err != nil {
return err
}
startLock.Lock()
defer startLock.Unlock()
if err := checkExclusiveActiveVM(mp, mc); err != nil {
return err
}
} else {
// still should make sure we do not start the same machine twice
state, err := mp.State(mc, false)
if err != nil {
return err
}
if state == machineDefine.Running || state == machineDefine.Starting {
return fmt.Errorf("machine %s: %w", mc.Name, machineDefine.ErrVMAlreadyRunning)
}
}
// Set starting to true
mc.Starting = true
if err := mc.Write(); err != nil {