Fix exit non-zero exit codes when running as pid1

(v4 branch)

Prior to this we would swallow the exit code and always exit(0).
This commit is contained in:
Tim Hockin 2021-03-11 12:55:42 -08:00
parent 6f5c1a9a22
commit 45b28530a7
5 changed files with 71 additions and 30 deletions

View File

@ -269,12 +269,9 @@ func main() {
// In case we come up as pid 1, act as init.
if os.Getpid() == 1 {
fmt.Fprintf(os.Stderr, "INFO: detected pid 1, running init handler\n")
err := pid1.ReRun()
code, err := pid1.ReRun()
if err == nil {
os.Exit(0)
}
if exerr, ok := err.(*exec.ExitError); ok {
os.Exit(exerr.ExitCode())
os.Exit(code)
}
fmt.Fprintf(os.Stderr, "ERROR: unhandled pid1 error: %v\n", err)
os.Exit(127)

View File

@ -11,54 +11,67 @@ import (
// ReRun converts the current process into a bare-bones init, runs the current
// commandline as a child process, and waits for it to complete. The new child
// process shares stdin/stdout/stderr with the parent. When the child exits,
// this will return the same value as exec.Command.Run(). If there is an error
// in reaping children that this can not handle, it will panic.
func ReRun() error {
// this will return the exit code. If this returns an error, the child process
// may not be terminated.
func ReRun() (int, error) {
bin, err := os.Readlink("/proc/self/exe")
if err != nil {
return err
return 0, err
}
cmd := exec.Command(bin, os.Args[1:]...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
return err
return 0, err
}
runInit(cmd.Process.Pid)
return nil
return runInit(cmd.Process.Pid)
}
// runInit runs a bare-bones init process. This will return when firstborn
// exits. In case of truly unknown errors it will panic.
func runInit(firstborn int) {
// runInit runs a bare-bones init process. When firstborn exits, this will
// return the exit code. If this returns an error, the child process may not
// be terminated.
func runInit(firstborn int) (int, error) {
sigs := make(chan os.Signal, 8)
signal.Notify(sigs)
for sig := range sigs {
if sig != syscall.SIGCHLD {
// Pass it on to the real process.
syscall.Kill(firstborn, sig.(syscall.Signal))
if err := syscall.Kill(firstborn, sig.(syscall.Signal)); err != nil {
return 0, err
}
}
// Always try to reap a child - empirically, sometimes this gets missed.
if sigchld(firstborn) {
return
die, status, err := sigchld(firstborn)
if err != nil {
return 0, err
}
if die {
if status.Signaled() {
return 128 + int(status.Signal()), nil
}
if status.Exited() {
return status.ExitStatus(), nil
}
return 0, fmt.Errorf("unhandled exit status: 0x%x\n", status)
}
}
return 0, fmt.Errorf("signal handler terminated unexpectedly")
}
// sigchld handles a SIGCHLD. This will return true when firstborn exits. In
// case of truly unknown errors it will panic.
func sigchld(firstborn int) bool {
// sigchld handles a SIGCHLD. This will return true only when firstborn exits.
// For any other process this will return false and a 0 status.
func sigchld(firstborn int) (bool, syscall.WaitStatus, error) {
// Loop to handle multiple child processes.
for {
var status syscall.WaitStatus
pid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, nil)
if err != nil {
panic(fmt.Sprintf("failed to wait4(): %v\n", err))
return false, 0, fmt.Errorf("wait4(): %v\n", err)
}
if pid == firstborn {
return true
return true, status, nil
}
if pid <= 0 {
// No more children to reap.
@ -66,5 +79,5 @@ func sigchld(firstborn int) bool {
}
// Must have found one, see if there are more.
}
return false
return false, 0, nil
}

View File

@ -4,7 +4,6 @@ package main
import (
"fmt"
"os"
"os/exec"
"k8s.io/git-sync/pkg/pid1"
)
@ -13,12 +12,9 @@ func main() {
// In case we come up as pid 1, act as init.
if os.Getpid() == 1 {
fmt.Printf("detected pid 1, running as init\n")
err := pid1.ReRun()
code, err := pid1.ReRun()
if err == nil {
os.Exit(0)
}
if exerr, ok := err.(*exec.ExitError); ok {
os.Exit(exerr.ExitCode())
os.Exit(code)
}
fmt.Printf("unhandled pid1 error: %v\n", err)
os.Exit(127)

View File

@ -3,6 +3,14 @@
go build
docker build -t example.com/fast-exit .
# This should exit(42)
docker run -ti --rm example.com/fast-exit
RET=$?
if [ "$RET" != 42 ]; then
echo "FAIL: exit code was not preserved: $RET"
exit 1
fi
# In the past we have observed hangs and missed signals. This *should* run
# forever.
while true; do

View File

@ -254,6 +254,33 @@ assert_file_eq "$ROOT"/link/file "$TESTCASE"
# Wrap up
pass
##############################################
# Test non-zero exit
##############################################
testcase "non-zero-exit"
echo "$TESTCASE" > "$REPO"/file
git -C "$REPO" commit -qam "$TESTCASE"
ln -s "$ROOT" "$DIR/rootlink" # symlink to test
(
set +o errexit
GIT_SYNC \
--one-time \
--repo="file://$REPO" \
--branch=e2e-branch \
--rev=does-not-exit \
--root="$DIR/rootlink" \
--link="link" \
> "$DIR"/log."$TESTCASE" 2>&1
RET=$?
if [[ "$RET" != 1 ]]; then
fail "expected exit code 1, got $RET"
fi
assert_file_absent "$ROOT"/link
assert_file_absent "$ROOT"/link/file
)
# Wrap up
pass
##############################################
# Test default syncing (master)
##############################################