Skip to content

Commit

Permalink
runtime: log all thread stack traces during GODEBUG=crash on Unix
Browse files Browse the repository at this point in the history
This extends https://golang.org/cl/2811, which only applied to Darwin
and GNU/Linux, to all Unix systems.

Fixes golang#9591.

Change-Id: Iec3fb438564ba2924b15b447c0480f87c0bfd009
Reviewed-on: https://go-review.googlesource.com/12661
Run-TryBot: Ian Lance Taylor <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Matthew Dempsky <[email protected]>
Reviewed-by: Russ Cox <[email protected]>
  • Loading branch information
ianlancetaylor committed Jul 27, 2015
1 parent a2cf056 commit f0876a1
Show file tree
Hide file tree
Showing 19 changed files with 380 additions and 51 deletions.
135 changes: 135 additions & 0 deletions src/runtime/crash_unix_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build darwin dragonfly freebsd linux netbsd openbsd solaris

package runtime_test

import (
"bytes"
"internal/testenv"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"syscall"
"testing"
)

func TestCrashDumpsAllThreads(t *testing.T) {
switch runtime.GOOS {
case "darwin", "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris":
default:
t.Skipf("skipping; not supported on %v", runtime.GOOS)
}

// We don't use executeTest because we need to kill the
// program while it is running.

testenv.MustHaveGoBuild(t)

checkStaleRuntime(t)

dir, err := ioutil.TempDir("", "go-build")
if err != nil {
t.Fatalf("failed to create temp directory: %v", err)
}
defer os.RemoveAll(dir)

if err := ioutil.WriteFile(filepath.Join(dir, "main.go"), []byte(crashDumpsAllThreadsSource), 0666); err != nil {
t.Fatalf("failed to create Go file: %v", err)
}

cmd := exec.Command("go", "build", "-o", "a.exe")
cmd.Dir = dir
out, err := testEnv(cmd).CombinedOutput()
if err != nil {
t.Fatalf("building source: %v\n%s", err, out)
}

cmd = exec.Command(filepath.Join(dir, "a.exe"))
cmd = testEnv(cmd)
cmd.Env = append(cmd.Env, "GOTRACEBACK=crash")
var outbuf bytes.Buffer
cmd.Stdout = &outbuf
cmd.Stderr = &outbuf

rp, wp, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
cmd.ExtraFiles = []*os.File{wp}

if err := cmd.Start(); err != nil {
t.Fatalf("starting program: %v", err)
}

if err := wp.Close(); err != nil {
t.Logf("closing write pipe: %v", err)
}
if _, err := rp.Read(make([]byte, 1)); err != nil {
t.Fatalf("reading from pipe: %v", err)
}

if err := cmd.Process.Signal(syscall.SIGQUIT); err != nil {
t.Fatalf("signal: %v", err)
}

// No point in checking the error return from Wait--we expect
// it to fail.
cmd.Wait()

// We want to see a stack trace for each thread.
// Before https://golang.org/cl/2811 running threads would say
// "goroutine running on other thread; stack unavailable".
out = outbuf.Bytes()
n := bytes.Count(out, []byte("main.loop("))
if n != 4 {
t.Errorf("found %d instances of main.loop; expected 4", n)
t.Logf("%s", out)
}
}

const crashDumpsAllThreadsSource = `
package main
import (
"fmt"
"os"
"runtime"
)
func main() {
const count = 4
runtime.GOMAXPROCS(count + 1)
chans := make([]chan bool, count)
for i := range chans {
chans[i] = make(chan bool)
go loop(i, chans[i])
}
// Wait for all the goroutines to start executing.
for _, c := range chans {
<-c
}
// Tell our parent that all the goroutines are executing.
if _, err := os.NewFile(3, "pipe").WriteString("x"); err != nil {
fmt.Fprintf(os.Stderr, "write to pipe failed: %v\n", err)
os.Exit(2)
}
select {}
}
func loop(i int, c chan bool) {
close(c)
for {
for j := 0; j < 0x7fffffff; j++ {
}
}
}
`
34 changes: 15 additions & 19 deletions src/runtime/signal_386.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,25 +170,21 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
}

if docrash {
// TODO(rsc): Implement raiseproc on other systems
// and then add to this if condition.
if GOOS == "linux" {
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crash()
}
Expand Down
34 changes: 15 additions & 19 deletions src/runtime/signal_amd64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,25 +181,21 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
}

if docrash {
// TODO(rsc): Implement raiseproc on other systems
// and then add to this if condition.
if GOOS == "darwin" || GOOS == "linux" {
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crash()
}
Expand Down
35 changes: 31 additions & 4 deletions src/runtime/signal_arm.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ func dumpregs(c *sigctxt) {
print("fault ", hex(c.fault()), "\n")
}

var crashing int32

// May run during STW, so write barriers are not allowed.
//go:nowritebarrier
func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
Expand Down Expand Up @@ -106,15 +108,18 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {

_g_.m.throwing = 1
_g_.m.caughtsig.set(gp)
startpanic()

if crashing == 0 {
startpanic()
}

if sig < uint32(len(sigtable)) {
print(sigtable[sig].name, "\n")
} else {
print("Signal ", sig, "\n")
}

print("PC=", hex(c.pc()), "\n")
print("PC=", hex(c.pc()), " m=", _g_.m.id, "\n")
if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
print("signal arrived during cgo execution\n")
gp = _g_.m.lockedg
Expand All @@ -125,12 +130,34 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
if gotraceback(&docrash) > 0 {
goroutineheader(gp)
tracebacktrap(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp)
tracebackothers(gp)
print("\n")
if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(_g_.m.curg)
traceback(^uintptr(0), ^uintptr(0), 0, gp)
} else if crashing == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c)
}

if docrash {
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crash()
}

Expand Down
35 changes: 31 additions & 4 deletions src/runtime/signal_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func dumpregs(c *sigctxt) {
print("fault ", hex(c.fault()), "\n")
}

var crashing int32

// May run during STW, so write barriers are not allowed.
//go:nowritebarrier
func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
Expand Down Expand Up @@ -119,15 +121,18 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {

_g_.m.throwing = 1
_g_.m.caughtsig.set(gp)
startpanic()

if crashing == 0 {
startpanic()
}

if sig < uint32(len(sigtable)) {
print(sigtable[sig].name, "\n")
} else {
print("Signal ", sig, "\n")
}

print("PC=", hex(c.pc()), "\n")
print("PC=", hex(c.pc()), " m=", _g_.m.id, "\n")
if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
print("signal arrived during cgo execution\n")
gp = _g_.m.lockedg
Expand All @@ -138,12 +143,34 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
if gotraceback(&docrash) > 0 {
goroutineheader(gp)
tracebacktrap(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp)
tracebackothers(gp)
print("\n")
if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
// tracebackothers on original m skipped this one; trace it now.
goroutineheader(_g_.m.curg)
traceback(^uintptr(0), ^uintptr(0), 0, gp)
} else if crashing == 0 {
tracebackothers(gp)
print("\n")
}
dumpregs(c)
}

if docrash {
crashing++
if crashing < sched.mcount {
// There are other m's that need to dump their stacks.
// Relay SIGQUIT to the next m by sending it to the current process.
// All m's that have already received SIGQUIT have signal masks blocking
// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
// When the last m receives the SIGQUIT, it will fall through to the call to
// crash below. Just in case the relaying gets botched, each m involved in
// the relay sleeps for 5 seconds and then does the crash/exit itself.
// In expected operation, the last m has received the SIGQUIT and run
// crash/exit and the process is gone, all long before any of the
// 5-second sleeps have finished.
print("\n-----\n\n")
raiseproc(_SIGQUIT)
usleep(5 * 1000 * 1000)
}
crash()
}

Expand Down
Loading

0 comments on commit f0876a1

Please sign in to comment.