Skip to content

Commit

Permalink
virt-launcher: fix exit pod race condition
Browse files Browse the repository at this point in the history
This commit fixes a race condition that would cause a VMI compute
pod to exit and end up in an erroneous 'Failed' state even if actually
the pod terminated gracefully.

The race condition is caused by having 2 goroutines doing a wait() on
the same PID, while a fix existed already it was not covering the whole
critical section causing to the race to persist (just with a lower
failure rate).

One example of the race is the following:

The goroutine executing on the SIGCHLD signal handler completes the
Wait4() and is about to write the exit status on the channel

goroutine 1
```
case syscall.SIGCHLD:
	var wstatus syscall.WaitStatus
	wpid, err := syscall.Wait4(-1, &wstatus, syscall.WNOHANG, nil)
	if err != nil {
		log.Log.Reason(err).Errorf("Failed to reap process %d", wpid)
	}

        **EXECUTION HERE**

	if wpid == cmd.Process.Pid {
		exitStatus <- wstatus
	}
```

While a second goroutine gets unblocked on the Wait() and continues its
execution after not finding anything inside the exitStatus channel
goroutine 2
```
exitCode := 0
if err := cmd.Wait(); err != nil {
select {
	case status := <-exitStatus:
		exitCode = int(status)
	default:
                **EXECUTION HERE**
		exitCode = 1
		if exiterr, ok := err.(*exec.ExitError); ok {
```

This would cause the VMI pod to report an `exitCode=1` thus ending
up in `Failed` state because internally the `Wait()` function
actually calls first `waitid()` and then `wait4()`, both these functions
can fail if the SIGCHLD handler reaps the child process first leaving no
waitable pid in the system.

The fix has been tested with the following script:

```

function drain {
    NODE=$1

    kubectl drain $NODE --ignore-daemonsets --delete-local-data &> /dev/null
    sleep 0.5
    kubectl uncordon $NODE
}

function check-failed-pod {
    pods=$(kubectl get pod --field-selector 'status.phase==Failed' -o json | jq '.items | length')
    if [[ $pods -gt 0 ]]; then
        return 0
    else
        return 1
    fi
}

nodes=(node01 node02)
i=0

while true; do
    for node in "${nodes[@]}"; do
        echo "iteration $i"
        drain $node
        if check-failed-pod; then
            echo "Failed pod detected!"
            exit 1
        fi
        i=$((i + 1))
    done
done
```
without this commit: 1 failure every ~7 node drains
with this commit: no failures in over 200 iterations

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1943164
Signed-off-by: Antonio Cardace <[email protected]>
  • Loading branch information
acardace committed Jul 30, 2021
1 parent d4c02b3 commit c183cd5
Showing 1 changed file with 5 additions and 22 deletions.
27 changes: 5 additions & 22 deletions cmd/virt-launcher/virt-launcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ func ForkAndMonitor(containerDiskDir string) (int, error) {
return 1, err
}

exitStatus := make(chan syscall.WaitStatus, 10)
exitStatus := make(chan int, 10)
sigs := make(chan os.Signal, 10)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGCHLD)
go func() {
Expand All @@ -549,12 +549,8 @@ func ForkAndMonitor(containerDiskDir string) (int, error) {
}

log.Log.Infof("Reaped pid %d with status %d", wpid, int(wstatus))
// there's a race between cmd.Wait() and syscall.Wait4 when
// cleaning up the cmd's pid after it exits. This allows us
// to detect the correct exit code regardless of which wait
// wins the race.
if wpid == cmd.Process.Pid {
exitStatus <- wstatus
exitStatus <- wstatus.ExitStatus()
}

default:
Expand All @@ -568,22 +564,9 @@ func ForkAndMonitor(containerDiskDir string) (int, error) {
}
}()

// wait for virt-launcher and collect the exit code
exitCode := 0
if err := cmd.Wait(); err != nil {
select {
case status := <-exitStatus:
exitCode = int(status)
default:
exitCode = 1
if exiterr, ok := err.(*exec.ExitError); ok {
if status, ok := exiterr.Sys().(syscall.WaitStatus); ok {
exitCode = status.ExitStatus()
}
}
log.Log.Reason(err).Error("dirty virt-launcher shutdown")
}

exitCode := <-exitStatus
if exitCode != 0 {
log.Log.Errorf("dirty virt-launcher shutdown: exit-code %d", exitCode)
}

// give qemu some time to shut down in case it survived virt-handler
Expand Down

0 comments on commit c183cd5

Please sign in to comment.