Skip to content

Commit

Permalink
x86, perf_counter, bts: Optimize BTS overflow handling
Browse files Browse the repository at this point in the history
Draining the BTS buffer on a buffer overflow interrupt takes too
long resulting in a kernel lockup when tracing the kernel.

Restructure perf_counter sampling into sample creation and sample
output.

Prepare a single reference sample for BTS sampling and update the
from and to address fields when draining the BTS buffer. Drain the
entire BTS buffer between a single perf_output_begin() /
perf_output_end() pair.

Signed-off-by: Markus Metzger <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
markus-metzger authored and Ingo Molnar committed Sep 18, 2009
1 parent 4b77a72 commit 5622f29
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 174 deletions.
60 changes: 37 additions & 23 deletions arch/x86/kernel/cpu/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
#define BTS_RECORD_SIZE 24

/* The size of a per-cpu BTS buffer in bytes: */
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)

/* The BTS overflow threshold in bytes from the end of the buffer: */
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)


/*
Expand Down Expand Up @@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}

static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
struct perf_sample_data *data)
static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
{
struct debug_store *ds = cpuc->ds;
struct bts_record {
Expand All @@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
u64 flags;
};
struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
unsigned long orig_ip = data->regs->ip;
struct bts_record *at, *top;
struct perf_output_handle handle;
struct perf_event_header header;
struct perf_sample_data data;
struct pt_regs regs;

if (!counter)
return;
Expand All @@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;

if (top <= at)
return;

ds->bts_index = ds->bts_buffer_base;


data.period = counter->hw.last_period;
data.addr = 0;
regs.ip = 0;

/*
* Prepare a generic sample, i.e. fill in the invariant fields.
* We will overwrite the from and to address before we output
* the sample.
*/
perf_prepare_sample(&header, &data, counter, &regs);

if (perf_output_begin(&handle, counter,
header.size * (top - at), 1, 1))
return;

for (; at < top; at++) {
data->regs->ip = at->from;
data->addr = at->to;
data.ip = at->from;
data.addr = at->to;

perf_counter_output(counter, 1, data);
perf_output_sample(&handle, &header, &data, counter);
}

data->regs->ip = orig_ip;
data->addr = 0;
perf_output_end(&handle);

/* There's new data available. */
counter->hw.interrupts++;
counter->pending_kill = POLL_IN;
}

Expand Down Expand Up @@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
x86_perf_counter_update(counter, hwc, idx);

/* Drain the remaining BTS records. */
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
struct perf_sample_data data;
struct pt_regs regs;
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
intel_pmu_drain_bts_buffer(cpuc);

data.regs = &regs;
intel_pmu_drain_bts_buffer(cpuc, &data);
}
cpuc->counters[idx] = NULL;
clear_bit(idx, cpuc->used_mask);

Expand Down Expand Up @@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

cpuc = &__get_cpu_var(cpu_hw_counters);
Expand All @@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
if (!x86_perf_counter_set_period(counter, hwc, idx))
continue;

if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
p6_pmu_disable_counter(hwc, idx);
}

Expand All @@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
int bit, loops;
u64 ack, status;

data.regs = regs;
data.addr = 0;

cpuc = &__get_cpu_var(cpu_hw_counters);

perf_disable();
intel_pmu_drain_bts_buffer(cpuc, &data);
intel_pmu_drain_bts_buffer(cpuc);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
Expand Down Expand Up @@ -1702,7 +1717,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)

data.period = counter->hw.last_period;

if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
intel_pmu_disable_counter(&counter->hw, bit);
}

Expand All @@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
int idx, handled = 0;
u64 val;

data.regs = regs;
data.addr = 0;

cpuc = &__get_cpu_var(cpu_hw_counters);
Expand All @@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
if (!x86_perf_counter_set_period(counter, hwc, idx))
continue;

if (perf_counter_overflow(counter, 1, &data))
if (perf_counter_overflow(counter, 1, &data, regs))
amd_pmu_disable_counter(hwc, idx);
}

Expand Down
68 changes: 64 additions & 4 deletions include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,17 @@ struct perf_cpu_context {
int recursion[4];
};

struct perf_output_handle {
struct perf_counter *counter;
struct perf_mmap_data *data;
unsigned long head;
unsigned long offset;
int nmi;
int sample;
int locked;
unsigned long flags;
};

#ifdef CONFIG_PERF_COUNTERS

/*
Expand Down Expand Up @@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
extern void perf_counter_update_userpage(struct perf_counter *counter);

struct perf_sample_data {
struct pt_regs *regs;
u64 type;

u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 addr;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
u64 period;
struct perf_callchain_entry *callchain;
struct perf_raw_record *raw;
};

extern void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter);
extern void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter,
struct pt_regs *regs);

extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
extern void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
struct perf_sample_data *data,
struct pt_regs *regs);

/*
* Return 1 for a software counter, 0 for a hardware counter
Expand Down Expand Up @@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
#define perf_instruction_pointer(regs) instruction_pointer(regs)
#endif

extern int perf_output_begin(struct perf_output_handle *handle,
struct perf_counter *counter, unsigned int size,
int nmi, int sample);
extern void perf_output_end(struct perf_output_handle *handle);
extern void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
#else
static inline void
perf_counter_task_sched_in(struct task_struct *task, int cpu) { }
Expand All @@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { }
static inline void perf_counter_comm(struct task_struct *tsk) { }
static inline void perf_counter_fork(struct task_struct *tsk) { }
static inline void perf_counter_init(void) { }

static inline int
perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
unsigned int size, int nmi, int sample) { }
static inline void perf_output_end(struct perf_output_handle *handle) { }
static inline void
perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len) { }
static inline void
perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter) { }
static inline void
perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_counter *counter,
struct pt_regs *regs) { }
#endif

#define perf_output_put(handle, x) \
perf_output_copy((handle), &(x), sizeof(x))

#endif /* __KERNEL__ */
#endif /* _LINUX_PERF_COUNTER_H */
Loading

0 comments on commit 5622f29

Please sign in to comment.