Skip to content

Commit

Permalink
syz-ci: switch to using syz-manager for smoke testing
Browse files Browse the repository at this point in the history
Add smoke testing mode to manager and use it in syz-ci
instead of pkg/instance which uses syz-fuzzer binary.
  • Loading branch information
dvyukov committed May 21, 2024
1 parent 5546e69 commit 1014eca
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 51 deletions.
2 changes: 2 additions & 0 deletions pkg/report/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ type Report struct {
Recipients vcs.Recipients
// GuiltyFile is the source file that we think is to blame for the crash (filled in by Symbolize).
GuiltyFile string
// Arbitrary information about the test VM, may be attached to the report by users of the package.
MachineInfo []byte
// reportPrefixLen is length of additional prefix lines that we added before actual crash report.
reportPrefixLen int
// symbolized is set if the report is symbolized.
Expand Down
94 changes: 50 additions & 44 deletions syz-ci/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,60 +445,63 @@ func (mgr *Manager) testImage(imageDir string, info *BuildInfo) error {
if err != nil {
return fmt.Errorf("failed to create manager config: %w", err)
}
defer os.RemoveAll(mgrcfg.Workdir)
if !vm.AllowsOvercommit(mgrcfg.Type) {
return nil // No support for creating machines out of thin air.
}
env, err := instance.NewEnv(mgrcfg, buildSem, testSem)
if err != nil {
osutil.MkdirAll(mgrcfg.Workdir)
configFile := filepath.Join(mgrcfg.Workdir, "manager.cfg")
if err := config.SaveFile(configFile, mgrcfg); err != nil {
return err
}
const (
testVMs = 3
maxFailures = 1
)
results, err := env.Test(testVMs, nil, nil, nil)
if err != nil {
return err

testSem.Wait()
defer testSem.Signal()

timeout := 30 * time.Minute * mgrcfg.Timeouts.Scale
bin := filepath.Join(mgrcfg.Syzkaller, "bin", "syz-manager")
output, retErr := osutil.RunCmd(timeout, "", bin, "-config", configFile, "-mode=smoke-test")
if retErr == nil {
return nil
}
failures := 0
var failureErr error
for _, res := range results {
if res.Error == nil {
continue
}
failures++
var err *instance.TestError
switch {
case errors.As(res.Error, &err):
if rep := err.Report; rep != nil {
what := "test"
if err.Boot {
what = "boot"
}
rep.Title = fmt.Sprintf("%v %v error: %v",
mgr.mgrcfg.RepoAlias, what, rep.Title)
// There are usually no duplicates for boot errors, so we reset AltTitles.
// But if we pass them, we would need to add the same prefix as for Title
// in order to avoid duping boot bugs with non-boot bugs.
rep.AltTitles = nil
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
mgr.Errorf("failed to report image error: %v", err)
}
}
if err.Boot {
failureErr = fmt.Errorf("VM boot failed with: %w", err)

var verboseErr *osutil.VerboseError
if errors.As(retErr, &verboseErr) {
// Caller will log the error, so don't include full output.
retErr = errors.New(verboseErr.Title)
}
// If there was a kernel bug, report it to dashboard.
// Otherwise just save the output in a temp file and log an error, unclear what else we can do.
reportData, err := os.ReadFile(filepath.Join(mgrcfg.Workdir, "report.json"))
if err != nil {
if os.IsNotExist(err) {
mgr.Errorf("image testing failed w/o kernel bug")
tmp, err := os.CreateTemp(mgr.workDir, "smoke-test-error")
if err != nil {
mgr.Errorf("failed to create smoke test error file: %v", err)
} else {
failureErr = fmt.Errorf("VM testing failed with: %w", err)
tmp.Write(output)
tmp.Close()
}
} else {
mgr.Errorf("failed to read smoke test report: %v", err)
}
} else {
rep := new(report.Report)
if err := json.Unmarshal(reportData, rep); err != nil {
mgr.Errorf("failed to unmarshal smoke test report: %v", err)
} else {
rep.Title = fmt.Sprintf("%v test error: %v", mgr.mgrcfg.RepoAlias, rep.Title)
retErr = errors.New(rep.Title)
// There are usually no duplicates for boot errors, so we reset AltTitles.
// But if we pass them, we would need to add the same prefix as for Title
// in order to avoid duping boot bugs with non-boot bugs.
rep.AltTitles = nil
if err := mgr.reportBuildError(rep, info, imageDir); err != nil {
mgr.Errorf("failed to report image error: %v", err)
}
default:
failureErr = res.Error
}
}
if failures > maxFailures {
return failureErr
}
return nil
return retErr
}

func (mgr *Manager) reportBuildError(rep *report.Report, info *BuildInfo, imageDir string) error {
Expand Down Expand Up @@ -550,6 +553,9 @@ func (mgr *Manager) createTestConfig(imageDir string, info *BuildInfo) (*mgrconf
if err := instance.SetConfigImage(mgrcfg, imageDir, true); err != nil {
return nil, err
}
if err := instance.OverrideVMCount(mgrcfg, 3); err != nil {
return nil, err
}
mgrcfg.KernelSrc = mgr.kernelSrcDir
if err := mgrconfig.Complete(mgrcfg); err != nil {
return nil, fmt.Errorf("bad manager config: %w", err)
Expand Down
83 changes: 76 additions & 7 deletions syz-manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
Expand Down Expand Up @@ -47,10 +48,19 @@ var (
flagConfig = flag.String("config", "", "configuration file")
flagDebug = flag.Bool("debug", false, "dump all VM output to console")
flagBench = flag.String("bench", "", "write execution statistics into this file periodically")

flagMode = flag.String("mode", "fuzzing", "mode of operation, one of:\n"+
" - fuzzing: the default continuous fuzzing mode\n"+
" - smoke-test: run smoke test for syzkaller+kernel\n"+
" The test consists of booting VMs and running some simple test programs\n"+
" to ensure that fuzzing can proceed in general. After completing the test\n"+
" the process exits and the exit status indicates success/failure.\n"+
" If the kernel oopses during testing, the report is saved to workdir/report.json.\n")
)

type Manager struct {
cfg *mgrconfig.Config
mode Mode
vmPool *vm.Pool
target *prog.Target
sysTarget *targets.Target
Expand Down Expand Up @@ -100,6 +110,14 @@ type Manager struct {
Stats
}

type Mode int

// For description of modes see flagMode help.
const (
ModeFuzzing Mode = iota
ModeSmokeTest
)

const (
// Just started, nothing done yet.
phaseInit = iota
Expand All @@ -122,7 +140,6 @@ type Crash struct {
fromHub bool // this crash was created based on a repro from syz-hub
fromDashboard bool // .. or from dashboard
*report.Report
machineInfo []byte
}

func main() {
Expand All @@ -143,6 +160,19 @@ func main() {
}

func RunManager(cfg *mgrconfig.Config) {
var mode Mode
switch *flagMode {
case "fuzzing":
mode = ModeFuzzing
case "smoke-test":
mode = ModeSmokeTest
cfg.DashboardClient = ""
cfg.HubClient = ""
default:
flag.PrintDefaults()
log.Fatalf("unknown mode: %v", *flagMode)
}

var vmPool *vm.Pool
// Type "none" is a special case for debugging/development when manager
// does not start any VMs, but instead you start them manually
Expand All @@ -166,6 +196,7 @@ func RunManager(cfg *mgrconfig.Config) {
corpusUpdates := make(chan corpus.NewItemEvent, 32)
mgr := &Manager{
cfg: cfg,
mode: mode,
vmPool: vmPool,
corpus: corpus.NewMonitoredCorpus(context.Background(), corpusUpdates),
corpusPreloaded: make(chan bool),
Expand Down Expand Up @@ -217,7 +248,9 @@ func RunManager(cfg *mgrconfig.Config) {
}

go mgr.heartbeatLoop()
osutil.HandleInterrupts(vm.Shutdown)
if mgr.mode != ModeSmokeTest {
osutil.HandleInterrupts(vm.Shutdown)
}
if mgr.vmPool == nil {
log.Logf(0, "no VMs started (type=none)")
log.Logf(0, "you are supposed to start syz-fuzzer manually as:")
Expand Down Expand Up @@ -730,8 +763,11 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) {

rep, vmInfo, err := mgr.runInstanceInner(index, instanceName, injectLog)
machineInfo := mgr.serv.shutdownInstance(instanceName, rep != nil)
if len(vmInfo) != 0 {
machineInfo = append(append(vmInfo, '\n'), machineInfo...)
if rep != nil {
if len(vmInfo) != 0 {
machineInfo = append(append(vmInfo, '\n'), machineInfo...)
}
rep.MachineInfo = machineInfo
}

// Error that is not a VM crash.
Expand All @@ -745,7 +781,6 @@ func (mgr *Manager) runInstance(index int) (*Crash, error) {
crash := &Crash{
instanceName: instanceName,
Report: rep,
machineInfo: machineInfo,
}
return crash, nil
}
Expand All @@ -756,6 +791,22 @@ func (mgr *Manager) runInstanceInner(index int, instanceName string, injectLog <

inst, err := mgr.vmPool.Create(index)
if err != nil {
var bootErr vm.BootErrorer
if errors.As(err, &bootErr) {
title, output := bootErr.BootError()
rep := mgr.reporter.Parse(output)
if rep != nil && rep.Type == crash_pkg.UnexpectedReboot {
// Avoid detecting any boot crash as "unexpected kernel reboot".
rep = mgr.reporter.ParseFrom(output, rep.SkipPos)
}
if rep == nil {
rep = &report.Report{
Title: title,
Output: output,
}
}
return rep, nil, nil
}
return nil, nil, fmt.Errorf("failed to create instance: %w", err)
}
defer inst.Close()
Expand Down Expand Up @@ -873,6 +924,17 @@ func (mgr *Manager) saveCrash(crash *Crash) bool {
}
log.Logf(0, "%s: crash: %v%v", crash.instanceName, crash.Title, flags)

if mgr.mode == ModeSmokeTest {
data, err := json.Marshal(crash.Report)
if err != nil {
log.Fatalf("failed to serialize crash report: %v", err)
}
if err := osutil.WriteFile(filepath.Join(mgr.cfg.Workdir, "report.json"), data); err != nil {
log.Fatal(err)
}
log.Fatalf("kernel crashed in smoke testing mode, exiting")
}

if crash.Suppressed {
// Collect all of them into a single bucket so that it's possible to control and assess them,
// e.g. if there are some spikes in suppressed reports.
Expand Down Expand Up @@ -901,7 +963,7 @@ func (mgr *Manager) saveCrash(crash *Crash) bool {
Recipients: crash.Recipients.ToDash(),
Log: crash.Output,
Report: crash.Report.Report,
MachineInfo: crash.machineInfo,
MachineInfo: crash.MachineInfo,
}
setGuiltyFiles(dc, crash.Report)
resp, err := mgr.dash.ReportCrash(dc)
Expand Down Expand Up @@ -952,7 +1014,7 @@ func (mgr *Manager) saveCrash(crash *Crash) bool {
writeOrRemove("log", crash.Output)
writeOrRemove("tag", []byte(mgr.cfg.Tag))
writeOrRemove("report", crash.Report.Report)
writeOrRemove("machineInfo", crash.machineInfo)
writeOrRemove("machineInfo", crash.MachineInfo)
return mgr.needLocalRepro(crash)
}

Expand Down Expand Up @@ -1335,6 +1397,13 @@ func (mgr *Manager) currentBugFrames() BugFrames {

func (mgr *Manager) machineChecked(features flatrpc.Feature, enabledSyscalls map[*prog.Syscall]bool,
opts flatrpc.ExecOpts) queue.Source {
if mgr.mode == ModeSmokeTest {
log.Logf(0, "smoke test succeeded, shutting down...")
close(vm.Shutdown)
time.Sleep(10 * time.Second)
os.Exit(0)
}

mgr.mu.Lock()
defer mgr.mu.Unlock()
if mgr.checkDone {
Expand Down

0 comments on commit 1014eca

Please sign in to comment.