From dfb5b6e1154f7ea6356464f1b9eb554802ad30c9 Mon Sep 17 00:00:00 2001 From: Debarshi Ray Date: Sun, 6 Jul 2025 21:50:15 +0200 Subject: [PATCH] cmd, pkg/utils: Stop the container once the last session finishes Currently, once a Toolbx container gets started with 'podman start', as part of the 'enter' or 'run' commands, it doesn't stop unless the host is shut down or someone explicitly calls 'podman stop'. This becomes annoying if someone tries to remove the container because commands like 'podman rm' and such don't work without the '--force' flag, even if all active 'enter' and 'run' sessions have ended, and the lingering entry points of those containers are can be considered a waste of resources. A system of reference counting based on advisory file locks has been used to automatically exit the container's entry point once all the active sessions have ended. Two locks are used - a global lock that's common for all containers, and a local lock that's specific to each container. The initialization stamp file is conveniently used as the local lock. The 'enter' and 'run' sessions acquire shared file locks and the container's entry point acquires ones that are exclusive. All attempts at acquiring the locks are blocking unless otherwise noted. The global lock is acquired at the beginning of 'enter' and 'run' before they inspect the container, negotiate the path to the local lock (ie., the initialization stamp file) with the entry point, and the local lock is created by the entry point. Once the local lock is known by 'enter' and 'run', they acquire it and only then release the global. The Toolbx container's entry point tries to acquire the global lock as it creates the initialization stamp file (ie., the local lock). This waits for the 'enter' and 'run' invocations to receive the location of the local lock, acquire it and release the global. Once the entry point acquires the global lock, it releases it, and waits trying to acquire the local lock. This sequence of acquiring and releasing the locks lets the entry point track the state of the 'enter' and 'run' invocations. It should only try to acquire the local lock after the 'enter' and 'run' invocations have acquired it before invoking 'podman exec'. The entry point is able to acquire the local lock after all 'enter' and 'run' sessions end and release their local locks. At this point, a new 'enter' or 'run' invocation might be in the process of starting. Both sides need to be careful not to race against each other and up in an invalid state. eg., a 'podman start' being invoked against a container whose entry point is just about to exit, or a 'podman exec' being invoked against a container whose entry point is about to exit or has already exited. Therefore, the entry point makes a non-blocking attempt to acquire the global lock while holding the local. If it fails, then it's because a new 'enter' or 'run' was invoked that is in the process of negotiating the path to the local lock with the entry point. In this case, the entry point releases the local lock and goes back trying to acquire the global lock, as it did when creating the initialization stamp file (ie., the local lock). If it succeeds, then no new 'enter' or 'run' is in the process of starting, and the entry point can exit. If this system of reference counting is simplified to just the global lock, then all the entry points of all Toolbx containers will exit only after all the 'enter' and 'run' sessions across all Toolbx containers have ended. The local lock makes it possible to do this for each container separately. This system will not work without the global lock. It will cause a few races if a new 'enter' or 'run' is invoked, just as the last of the previous batch of sessions end, letting the entry point acquire the local lock and prepare to exit. Sometimes, a Toolbx container's entry point is started directly with 'podman start', without going through the 'enter' or 'run' commands, for debugging. Care was taken to detect this case by making a non-blocking attempt to acquire the global lock from the entry point before creating the initialization stamp file (ie., the local lock). If it fails, then it's because an 'enter' or 'run' is waiting for the container to get initialized by the entry point, and things proceed as described above. If it succeeds, then it's because the entry point was started directly. In this case, the entry point releases the global lock, and adds a timeout after creating the initialization stamp file before trying to acquire any other locks to give the user time to invoke 'enter' or 'run'. A timeout of 25 seconds is used, as is the default for D-Bus method calls [1] and when waiting for the entry point to initialize the container. A variation of this system of reference counting can only use the advisory file locks in the 'enter' and 'run' commands, and invoke 'podman inspect --format {{.ExecIDs}} ...' after each 'podman exec' to find out if there are any remaining sessions [2]. This was not done because each podman(1) invocation is sufficiently expensive and there is a desire to keep them to minimum in the 'enter' and 'run' commands, because these are the most frequently used commands and users expect them to be as lean as possible [3,4]. A totally different approach could be to pass an AF_UNIX socket to the Toolbx container through the NOTIFY_SOCKET environment variable and 'podman create --sdnotify container ...', and do the reference counting by sending messages from the host to the entry point before and after each 'podman exec' [2]. One downside is that the reference counting will break if the host process crashes before sending the message to deduct the count after a 'podman exec' ends. Another downside is that it becomes complicated to directly call 'podman start', without going through the 'enter' or 'run' commands, for debugging. [1] https://docs.gtk.org/gio/property.DBusProxy.g-default-timeout.html [2] https://github.com/containers/podman/discussions/26589 [3] Commit 4536e2c8c28f6c4f https://github.com/containers/toolbox/commit/4536e2c8c28f6c4f https://github.com/containers/toolbox/pull/813 https://github.com/containers/toolbox/issues/654 [4] Commit 74d4fcf00c6ec3d1 https://github.com/containers/toolbox/commit/74d4fcf00c6ec3d1 https://github.com/containers/toolbox/pull/1491 https://github.com/containers/toolbox/issues/1070 https://github.com/containers/toolbox/issues/114 --- src/cmd/initContainer.go | 113 +++++++++++++++++++++++++++++++++++++++ src/cmd/run.go | 58 ++++++++++++++++++++ src/pkg/utils/utils.go | 12 +++++ 3 files changed, 183 insertions(+) diff --git a/src/cmd/initContainer.go b/src/cmd/initContainer.go index cac4ddf..5dbe087 100644 --- a/src/cmd/initContainer.go +++ b/src/cmd/initContainer.go @@ -17,6 +17,7 @@ package cmd import ( + "context" "errors" "fmt" "io/ioutil" @@ -353,6 +354,28 @@ func initContainer(cmd *cobra.Command, args []string) error { return err } + referenceCountGlobalLock, err := utils.GetReferenceCountGlobalLock(targetUser) + if err != nil { + return err + } + + var waitForRun bool + if referenceCountGlobalLockFile, err := utils.Flock(referenceCountGlobalLock, + syscall.LOCK_EX|syscall.LOCK_NB); err == nil { + waitForRun = true + if err := referenceCountGlobalLockFile.Close(); err != nil { + logrus.Debugf("Releasing global reference count lock: %s", err) + return utils.ErrFlockRelease + } + } + + parentCtx := context.Background() + waitForExitCtx, waitForExitCancel := context.WithCancelCause(parentCtx) + defer waitForExitCancel(errors.New("clean-up")) + + detectWhenContainerIsUnsedAsync(waitForExitCancel, initializedStamp, referenceCountGlobalLock, waitForRun) + done := waitForExitCtx.Done() + logrus.Debugf("Creating initialization stamp %s", initializedStamp) initializedStampFile, err := os.Create(initializedStamp) @@ -372,6 +395,10 @@ func initContainer(cmd *cobra.Command, args []string) error { for { select { + case <-done: + cause := context.Cause(waitForExitCtx) + logrus.Debugf("Exiting entry point: %s", cause) + return nil case event := <-tickerDaily.C: handleDailyTick(event) case event := <-watcherForHostEvents: @@ -788,6 +815,55 @@ func createSymbolicLink(existingTarget, newLink string) error { return nil } +func detectWhenContainerIsUnsedAsync(cancel context.CancelCauseFunc, + initializedStamp, referenceCountGlobalLock string, + waitForRun bool) { + + go func() { + if waitForRun { + logrus.Debugf("This entry point was not started by 'toolbox enter' or 'toolbox run'") + logrus.Debugf("Waiting for 'toolbox enter' or 'toolbox run'") + time.Sleep(25 * time.Second) + } + + for { + logrus.Debugf("Waiting for 'podman exec' to begin") + if err := waitForExecToBegin(referenceCountGlobalLock); err != nil { + if errors.Is(err, utils.ErrFlockRelease) { + cancel(err) + } else { + logrus.Debugf("Waiting for 'podman exec' to begin: %s", err) + logrus.Debug("This entry point will not exit when the container is unused") + } + + return + } + + logrus.Debugf("Waiting for the container to be unused") + if err := waitForContainerToBeUnused(initializedStamp, + referenceCountGlobalLock); err != nil { + if errors.Is(err, syscall.EWOULDBLOCK) { + logrus.Debug("Detected potentially new use of the container") + continue + } + + if errors.Is(err, utils.ErrFlockRelease) { + cancel(err) + } else { + logrus.Debugf("Waiting for the container to be unused: %s", err) + logrus.Debug("This entry point will not exit when the container is unused") + } + + return + } + + cause := errors.New("all 'podman exec' sessions exited") + cancel(cause) + return + } + }() +} + func getDelayEntryPoint() (time.Duration, bool) { valueString := os.Getenv("TOOLBX_DELAY_ENTRY_POINT") if valueString == "" { @@ -1123,6 +1199,43 @@ func updateTimeZoneFromLocalTime() error { return nil } +func waitForExecToBegin(referenceCountGlobalLock string) error { + referenceCountGlobalLockFile, err := utils.Flock(referenceCountGlobalLock, syscall.LOCK_EX) + if err != nil { + return err + } + + if err := referenceCountGlobalLockFile.Close(); err != nil { + logrus.Debugf("Releasing global reference count lock: %s", err) + return utils.ErrFlockRelease + } + + return nil +} + +func waitForContainerToBeUnused(initializedStamp, referenceCountGlobalLock string) error { + referenceCountLocalLockFile, err := utils.Flock(initializedStamp, syscall.LOCK_EX) + if err != nil { + if errors.Is(err, syscall.EWOULDBLOCK) { + panicMsg := fmt.Sprintf("unexpected %T: %s", err, err) + panic(panicMsg) + } + + return err + } + + if _, err := utils.Flock(referenceCountGlobalLock, syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + if err := referenceCountLocalLockFile.Close(); err != nil { + logrus.Debugf("Releasing local reference count lock: %s", err) + return utils.ErrFlockRelease + } + + return err + } + + return nil +} + func writeTimeZone(timeZone string) error { const etcTimeZone = "/etc/timezone" diff --git a/src/cmd/run.go b/src/cmd/run.go index d50c910..dde645a 100644 --- a/src/cmd/run.go +++ b/src/cmd/run.go @@ -243,6 +243,35 @@ func runCommand(container string, } } + logrus.Debug("Acquiring global reference count lock") + + referenceCountGlobalLock, err := utils.GetReferenceCountGlobalLock(currentUser) + if err != nil { + return err + } + + referenceCountGlobalLockFile, err := utils.Flock(referenceCountGlobalLock, syscall.LOCK_SH) + if err != nil { + logrus.Debugf("Acquiring global reference count lock: %s", err) + + var errFlock *utils.FlockError + + if errors.As(err, &errFlock) { + if errors.Is(err, utils.ErrFlockAcquire) { + err = utils.ErrFlockAcquire + } else if errors.Is(err, utils.ErrFlockCreate) { + err = utils.ErrFlockCreate + } else { + panicMsg := fmt.Sprintf("unexpected %T: %s", err, err) + panic(panicMsg) + } + } + + return err + } + + defer referenceCountGlobalLockFile.Close() + logrus.Debugf("Inspecting container %s", container) containerObj, err := podman.InspectContainer(container) if err != nil { @@ -345,6 +374,35 @@ func runCommand(container string, } logrus.Debugf("Container %s is initialized", container) + logrus.Debug("Acquiring local reference count lock") + + referenceCountLocalLockFile, err := utils.Flock(initializedStamp, syscall.LOCK_SH) + if err != nil { + logrus.Debugf("Acquiring local reference count lock: %s", err) + + var errFlock *utils.FlockError + + if errors.As(err, &errFlock) { + if errors.Is(err, utils.ErrFlockAcquire) { + err = utils.ErrFlockAcquire + } else if errors.Is(err, utils.ErrFlockCreate) { + err = utils.ErrFlockCreate + } else { + panicMsg := fmt.Sprintf("unexpected %T: %s", err, err) + panic(panicMsg) + } + } + + return err + } + + defer referenceCountLocalLockFile.Close() + + logrus.Debug("Releasing global reference count lock") + if err := referenceCountGlobalLockFile.Close(); err != nil { + logrus.Debugf("Releasing global reference count lock: %s", err) + return utils.ErrFlockRelease + } environ := append(cdiEnviron, p11KitServerEnviron...) if err := runCommandWithFallbacks(container, diff --git a/src/pkg/utils/utils.go b/src/pkg/utils/utils.go index 627bdeb..10481fd 100644 --- a/src/pkg/utils/utils.go +++ b/src/pkg/utils/utils.go @@ -184,6 +184,8 @@ var ( ErrFlockCreate = errors.New("failed to create lock file") + ErrFlockRelease = errors.New("failed to release lock") + ErrImageWithoutBasename = errors.New("image does not have a basename") ) @@ -498,6 +500,16 @@ func GetP11KitServerSocketLock(targetUser *user.User) (string, error) { return p11KitServerSocketLock, nil } +func GetReferenceCountGlobalLock(targetUser *user.User) (string, error) { + toolbxRuntimeDirectory, err := GetRuntimeDirectory(targetUser) + if err != nil { + return "", err + } + + referenceCountGlobalLock := filepath.Join(toolbxRuntimeDirectory, "container-reference-count.lock") + return referenceCountGlobalLock, nil +} + func GetRuntimeDirectory(targetUser *user.User) (string, error) { if runtimeDirectories == nil { runtimeDirectories = make(map[string]string)