Fix 2 bugs in pid1
1) Was calling Wait() on the child process, but also calling Wait4, which would race, causing an occasional error or panic. 2) In testing (1), I observed occasional hangs. Tracing it down to a SIGWINCH, which masked a SIGCHLD, causing it to hang. Both seem fixed. Added a manual test script.
This commit is contained in:
parent
8015d4b24e
commit
95a1690e6f
|
|
@ -319,7 +319,7 @@ func main() {
|
|||
}
|
||||
|
||||
// From here on, output goes through logging.
|
||||
log.V(0).Info("starting up", "args", os.Args)
|
||||
log.V(0).Info("starting up", "pid", os.Getpid(), "args", os.Args)
|
||||
|
||||
// Startup webhooks goroutine
|
||||
var webhook *Webhook
|
||||
|
|
|
|||
|
|
@ -25,27 +25,30 @@ func ReRun() error {
|
|||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
go runInit(cmd.Process.Pid)
|
||||
return cmd.Wait()
|
||||
runInit(cmd.Process.Pid)
|
||||
return nil
|
||||
}
|
||||
|
||||
// runInit runs a bare-bones init process. This will never return. In case of
|
||||
// truly unknown errors it will panic.
|
||||
func runInit(pid int) {
|
||||
// runInit runs a bare-bones init process. This will return when firstborn
|
||||
// exits. In case of truly unknown errors it will panic.
|
||||
func runInit(firstborn int) {
|
||||
sigs := make(chan os.Signal, 8)
|
||||
signal.Notify(sigs)
|
||||
for sig := range sigs {
|
||||
if sig == syscall.SIGCHLD {
|
||||
sigchld()
|
||||
} else {
|
||||
if sig != syscall.SIGCHLD {
|
||||
// Pass it on to the real process.
|
||||
syscall.Kill(pid, sig.(syscall.Signal))
|
||||
syscall.Kill(firstborn, sig.(syscall.Signal))
|
||||
}
|
||||
// Always try to reap a child - empirically, sometimes this gets missed.
|
||||
if sigchld(firstborn) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sigchld handles a SIGCHLD.
|
||||
func sigchld() {
|
||||
// sigchld handles a SIGCHLD. This will return true when firstborn exits. In
|
||||
// case of truly unknown errors it will panic.
|
||||
func sigchld(firstborn int) bool {
|
||||
// Loop to handle multiple child processes.
|
||||
for {
|
||||
var status syscall.WaitStatus
|
||||
|
|
@ -53,10 +56,15 @@ func sigchld() {
|
|||
if err != nil {
|
||||
panic(fmt.Sprintf("failed to wait4(): %v\n", err))
|
||||
}
|
||||
|
||||
if pid == firstborn {
|
||||
return true
|
||||
}
|
||||
if pid <= 0 {
|
||||
// No more children to reap.
|
||||
break
|
||||
}
|
||||
// Must have found one, see if there are more.
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
fast-exit
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
FROM debian
|
||||
RUN apt-get update && apt-get install -y bash procps psmisc psutils
|
||||
COPY fast-exit /fast-exit
|
||||
ENTRYPOINT ["/fast-exit"]
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
// A do-nothing app to test pid1.ReRun().
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"k8s.io/git-sync/pkg/pid1"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// In case we come up as pid 1, act as init.
|
||||
if os.Getpid() == 1 {
|
||||
fmt.Printf("detected pid 1, running as init\n")
|
||||
err := pid1.ReRun()
|
||||
if err == nil {
|
||||
os.Exit(0)
|
||||
}
|
||||
if exerr, ok := err.(*exec.ExitError); ok {
|
||||
os.Exit(exerr.ExitCode())
|
||||
}
|
||||
fmt.Printf("unhandled pid1 error: %v\n", err)
|
||||
os.Exit(127)
|
||||
}
|
||||
fmt.Printf("main app\n")
|
||||
os.Exit(42)
|
||||
}
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/sh
|
||||
|
||||
go build
|
||||
docker build -t example.com/fast-exit .
|
||||
|
||||
# In the past we have observed hangs and missed signals. This *should* run
|
||||
# forever.
|
||||
while true; do
|
||||
docker run -ti --rm example.com/fast-exit
|
||||
done
|
||||
Loading…
Reference in New Issue