Skip to content

Commit

Permalink
tools/offcputime Warn user when an event skipped by negative duration
Browse files Browse the repository at this point in the history
  • Loading branch information
kawamuray authored and yonghong-song committed Jan 21, 2021
1 parent 9712f9e commit 0d86893
Showing 1 changed file with 32 additions and 2 deletions.
34 changes: 32 additions & 2 deletions tools/offcputime.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from __future__ import print_function
from bcc import BPF
from sys import stderr
from time import sleep, strftime
from time import strftime
import argparse
import errno
import signal
Expand Down Expand Up @@ -126,6 +126,14 @@ def signal_ignore(signal, frame):
BPF_HASH(start, u32);
BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE);
struct warn_event_t {
u32 pid;
u32 tgid;
u32 t_start;
u32 t_end;
};
BPF_PERF_OUTPUT(warn_events);
int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
u32 pid = prev->pid;
u32 tgid = prev->tgid;
Expand All @@ -150,6 +158,13 @@ def signal_ignore(signal, frame):
u64 t_end = bpf_ktime_get_ns();
start.delete(&pid);
if (t_start > t_end) {
struct warn_event_t event = {
.pid = pid,
.tgid = tgid,
.t_start = t_start,
.t_end = t_end,
};
warn_events.perf_submit(ctx, &event, sizeof(event));
return 0;
}
u64 delta = t_end - t_start;
Expand Down Expand Up @@ -251,8 +266,23 @@ def signal_ignore(signal, frame):
else:
print("... Hit Ctrl-C to end.")


def print_warn_event(cpu, data, size):
event = b["warn_events"].event(data)
# See https://github.com/iovisor/bcc/pull/3227 for those wondering how can this happen.
print("WARN: Skipped an event with negative duration: pid:%d, tgid:%d, off-cpu:%d, on-cpu:%d"
% (event.pid, event.tgid, event.t_start, event.t_end),
file=stderr)

b["warn_events"].open_perf_buffer(print_warn_event)
try:
sleep(duration)
duration_ms = duration * 1000
start_time_ms = int(BPF.monotonic_time() / 1000000)
while True:
elapsed_ms = int(BPF.monotonic_time() / 1000000) - start_time_ms
if elapsed_ms >= duration_ms:
break
b.perf_buffer_poll(timeout=duration_ms - elapsed_ms)
except KeyboardInterrupt:
# as cleanup can take many seconds, trap Ctrl-C:
signal.signal(signal.SIGINT, signal_ignore)
Expand Down

0 comments on commit 0d86893

Please sign in to comment.