Skip to content
This repository has been archived by the owner on Dec 14, 2022. It is now read-only.

Commit

Permalink
perf record: Add --tail-synthesize option
Browse files Browse the repository at this point in the history
When working with overwritable ring buffer there's a inconvenience
problem: if perf dumps data after a long period after it starts,
non-sample events may lost, which makes following 'perf report' unable
to identify proc name and mmap layout. For example:

 # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output \
        dd if=/dev/zero of=/dev/null

send SIGUSR2 after dd runs long enough. The resuling perf.data lost
correct comm and mmap events:

 # perf script -i perf.data.2016061522374354
 perf 24478 [004] 2581325.601789:  raw_syscalls:sys_exit: NR 0 = 512
 ^^^^
 Should be 'dd'
                   27b2e8 syscall_slow_exit_work+0xfe2000e3 (/lib/modules/4.6.0-rc3+/build/vmlinux)
                   203cc7 do_syscall_64+0xfe200117 (/lib/modules/4.6.0-rc3+/build/vmlinux)
                   b18d83 return_from_SYSCALL_64+0xfe200000 (/lib/modules/4.6.0-rc3+/build/vmlinux)
             7f47c417edf0 [unknown] ([unknown])
             ^^^^^^^^^^^^
             Fail to unwind

This patch provides a '--tail-synthesize' option, allows perf to collect
system status when finalizing output file. In resuling output file, the
non-sample events reflect system status when dumping data.

After this patch:
 # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output --tail-synthesize \
        dd if=/dev/zero of=/dev/null

 # perf script -i perf.data.2016061600544998
 dd 27364 [004] 2583244.994464: raw_syscalls:sys_enter: NR 1 (1, ...
 ^^
 Correct comm
                   203a18 syscall_trace_enter_phase2+0xfe2001a8 ([kernel.kallsyms])
                   203aa5 syscall_trace_enter+0xfe200055 ([kernel.kallsyms])
                   203caa do_syscall_64+0xfe2000fa ([kernel.kallsyms])
                   b18d83 return_from_SYSCALL_64+0xfe200000 ([kernel.kallsyms])
                    d8e50 __GI___libc_write+0xffff01d9639f4010 (/tmp/oxygen_root-w00229757/lib64/libc-2.18.so)
                    ^^^^^
                    Correct unwind

This option doesn't aim to solve this problem completely. If a process
terminates before SIGUSR2, we still lost its COMM and MMAP events. For
example, we can't unwind correctly from the final perf.data we get from
the previous example, because when perf collects the final output file
(when we press C-c), 'dd' has been terminated so its '/proc/<pid>/mmap'
becomes empty.

However, this is a cheaper choice. To completely solve this problem we
need to continously output non-sample events. To satisify the
requirement of daemonization, we need to merge them periodically. It is
possible but requires much more code and cycles.

Automatically select --tail-synthesize when --overwrite is provided.

Signed-off-by: Wang Nan <[email protected]>
Cc: He Kuang <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Namhyung Kim <[email protected]>
Cc: Nilay Vaish <[email protected]>
Cc: Zefan Li <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
  • Loading branch information
WangNan0 authored and acmel committed Jul 15, 2016
1 parent f06149c commit 4ea648a
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 6 deletions.
8 changes: 8 additions & 0 deletions tools/perf/Documentation/perf-record.txt
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,12 @@ options.
'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
in config file is set to true.

--tail-synthesize::
Instead of collecting non-sample events (for example, fork, comm, mmap) at
the beginning of record, collect them during finalizing an output file.
The collected non-sample events reflects the status of the system when
record is finished.

--overwrite::
Makes all events use an overwritable ring buffer. An overwritable ring
buffer works like a flight recorder: when it gets full, the kernel will
Expand All @@ -381,6 +387,8 @@ those fitting in the ring buffer at that moment.
'overwrite' attribute can also be set or canceled for an event using
config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.

Implies --tail-synthesize.

SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
31 changes: 25 additions & 6 deletions tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -604,13 +604,16 @@ record__finish_output(struct record *rec)
return;
}

static int record__synthesize_workload(struct record *rec)
static int record__synthesize_workload(struct record *rec, bool tail)
{
struct {
struct thread_map map;
struct thread_map_data map_data;
} thread_map;

if (rec->opts.tail_synthesize != tail)
return 0;

thread_map.map.nr = 1;
thread_map.map.map[0].pid = rec->evlist->workload.pid;
thread_map.map.map[0].comm = NULL;
Expand All @@ -621,7 +624,7 @@ static int record__synthesize_workload(struct record *rec)
rec->opts.proc_map_timeout);
}

static int record__synthesize(struct record *rec);
static int record__synthesize(struct record *rec, bool tail);

static int
record__switch_output(struct record *rec, bool at_exit)
Expand All @@ -632,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit)
/* Same Size: "2015122520103046"*/
char timestamp[] = "InvalidTimestamp";

record__synthesize(rec, true);
if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);

rec->samples = 0;
record__finish_output(rec);
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
Expand All @@ -654,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit)

/* Output tracking events */
if (!at_exit) {
record__synthesize(rec);
record__synthesize(rec, false);

/*
* In 'perf record --switch-output' without -a,
Expand All @@ -666,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit)
* perf_event__synthesize_thread_map() for those events.
*/
if (target__none(&rec->opts.target))
record__synthesize_workload(rec);
record__synthesize_workload(rec, false);
}
return fd;
}
Expand Down Expand Up @@ -720,7 +727,7 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
return NULL;
}

static int record__synthesize(struct record *rec)
static int record__synthesize(struct record *rec, bool tail)
{
struct perf_session *session = rec->session;
struct machine *machine = &session->machines.host;
Expand All @@ -730,6 +737,9 @@ static int record__synthesize(struct record *rec)
int fd = perf_data_file__fd(file);
int err = 0;

if (rec->opts.tail_synthesize != tail)
return 0;

if (file->is_pipe) {
err = perf_event__synthesize_attrs(tool, session,
process_synthesized_event);
Expand Down Expand Up @@ -893,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)

machine = &session->machines.host;

err = record__synthesize(rec);
err = record__synthesize(rec, false);
if (err < 0)
goto out_child;

Expand Down Expand Up @@ -1057,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (!quiet)
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);

if (target__none(&rec->opts.target))
record__synthesize_workload(rec, true);

out_child:
if (forks) {
int exit_status;
Expand All @@ -1075,6 +1088,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
} else
status = err;

record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;

Expand Down Expand Up @@ -1399,6 +1413,8 @@ struct option __record_options[] = {
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
&record.opts.no_inherit_set,
"child tasks do not inherit counters"),
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
Expand Down Expand Up @@ -1610,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
}
}

if (record.opts.overwrite)
record.opts.tail_synthesize = true;

if (rec->evlist->nr_entries == 0 &&
perf_evlist__add_default(rec->evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
Expand Down
1 change: 1 addition & 0 deletions tools/perf/perf.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct record_opts {
bool record_switch_events;
bool all_kernel;
bool all_user;
bool tail_synthesize;
bool overwrite;
unsigned int freq;
unsigned int mmap_pages;
Expand Down

0 comments on commit 4ea648a

Please sign in to comment.