diff --git a/cmd/git-sync/main.go b/cmd/git-sync/main.go index eb1e649..acc0b9f 100644 --- a/cmd/git-sync/main.go +++ b/cmd/git-sync/main.go @@ -319,7 +319,7 @@ func main() { } // From here on, output goes through logging. - log.V(0).Info("starting up", "args", os.Args) + log.V(0).Info("starting up", "pid", os.Getpid(), "args", os.Args) // Startup webhooks goroutine var webhook *Webhook diff --git a/pkg/pid1/pid1.go b/pkg/pid1/pid1.go index 19cc84f..5421b91 100644 --- a/pkg/pid1/pid1.go +++ b/pkg/pid1/pid1.go @@ -25,27 +25,30 @@ func ReRun() error { if err := cmd.Start(); err != nil { return err } - go runInit(cmd.Process.Pid) - return cmd.Wait() + runInit(cmd.Process.Pid) + return nil } -// runInit runs a bare-bones init process. This will never return. In case of -// truly unknown errors it will panic. -func runInit(pid int) { +// runInit runs a bare-bones init process. This will return when firstborn +// exits. In case of truly unknown errors it will panic. +func runInit(firstborn int) { sigs := make(chan os.Signal, 8) signal.Notify(sigs) for sig := range sigs { - if sig == syscall.SIGCHLD { - sigchld() - } else { + if sig != syscall.SIGCHLD { // Pass it on to the real process. - syscall.Kill(pid, sig.(syscall.Signal)) + syscall.Kill(firstborn, sig.(syscall.Signal)) + } + // Always try to reap a child - empirically, sometimes this gets missed. + if sigchld(firstborn) { + return } } } -// sigchld handles a SIGCHLD. -func sigchld() { +// sigchld handles a SIGCHLD. This will return true when firstborn exits. In +// case of truly unknown errors it will panic. +func sigchld(firstborn int) bool { // Loop to handle multiple child processes. for { var status syscall.WaitStatus @@ -53,10 +56,15 @@ func sigchld() { if err != nil { panic(fmt.Sprintf("failed to wait4(): %v\n", err)) } + + if pid == firstborn { + return true + } if pid <= 0 { // No more children to reap. break } // Must have found one, see if there are more. } + return false } diff --git a/pkg/pid1/test/fast-exit/.gitignore b/pkg/pid1/test/fast-exit/.gitignore new file mode 100644 index 0000000..3adacd9 --- /dev/null +++ b/pkg/pid1/test/fast-exit/.gitignore @@ -0,0 +1 @@ +fast-exit diff --git a/pkg/pid1/test/fast-exit/Dockerfile b/pkg/pid1/test/fast-exit/Dockerfile new file mode 100644 index 0000000..305261b --- /dev/null +++ b/pkg/pid1/test/fast-exit/Dockerfile @@ -0,0 +1,4 @@ +FROM debian +RUN apt-get update && apt-get install -y bash procps psmisc psutils +COPY fast-exit /fast-exit +ENTRYPOINT ["/fast-exit"] diff --git a/pkg/pid1/test/fast-exit/main.go b/pkg/pid1/test/fast-exit/main.go new file mode 100644 index 0000000..3f51a81 --- /dev/null +++ b/pkg/pid1/test/fast-exit/main.go @@ -0,0 +1,28 @@ +// A do-nothing app to test pid1.ReRun(). +package main + +import ( + "fmt" + "os" + "os/exec" + + "k8s.io/git-sync/pkg/pid1" +) + +func main() { + // In case we come up as pid 1, act as init. + if os.Getpid() == 1 { + fmt.Printf("detected pid 1, running as init\n") + err := pid1.ReRun() + if err == nil { + os.Exit(0) + } + if exerr, ok := err.(*exec.ExitError); ok { + os.Exit(exerr.ExitCode()) + } + fmt.Printf("unhandled pid1 error: %v\n", err) + os.Exit(127) + } + fmt.Printf("main app\n") + os.Exit(42) +} diff --git a/pkg/pid1/test/fast-exit/test.sh b/pkg/pid1/test/fast-exit/test.sh new file mode 100755 index 0000000..a86630e --- /dev/null +++ b/pkg/pid1/test/fast-exit/test.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +go build +docker build -t example.com/fast-exit . + +# In the past we have observed hangs and missed signals. This *should* run +# forever. +while true; do + docker run -ti --rm example.com/fast-exit +done