Skip to content

Commit

Permalink
Merge tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

 - Make sure PMU internal buffers are flushed for per-CPU events too and
   properly handle PID/TID for large PEBS.

 - Handle the case properly when there's no PMU and therefore return an
   empty list of perf MSRs for VMX to switch instead of reading random
   garbage from the stack.

* tag 'perf_urgent_for_v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/perf: Use RET0 as default for guest_get_msrs to handle "no PMU" case
  perf/x86/intel: Set PERF_ATTACH_SCHED_CB for large PEBS and LBR
  perf/core: Flush PMU internal buffers for per-CPU events
  • Loading branch information
torvalds committed Mar 14, 2021
2 parents 836d7f0 + c8e2fe1 commit 75013c6
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 15 deletions.
15 changes: 6 additions & 9 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);

DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
*/
DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);

u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
Expand Down Expand Up @@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
x86_perf_event_update(event);
}

static inline struct perf_guest_switch_msr *
perf_guest_get_msrs_nop(int *nr)
{
*nr = 0;
return NULL;
}

static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
Expand Down Expand Up @@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
x86_pmu.read = _x86_pmu_read;

if (!x86_pmu.guest_get_msrs)
x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
x86_pmu.guest_get_msrs = (void *)&__static_call_return0;

x86_pmu_static_call_update();

Expand Down
5 changes: 4 additions & 1 deletion arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3662,8 +3662,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
~intel_pmu_large_pebs_flags(event)))
~intel_pmu_large_pebs_flags(event))) {
event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
event->attach_state |= PERF_ATTACH_SCHED_CB;
}
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
Expand All @@ -3676,6 +3678,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
event->attach_state |= PERF_ATTACH_SCHED_CB;

/*
* BTS is set up earlier in this path, so don't account twice
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -6580,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
int i, nr_msrs;
struct perf_guest_switch_msr *msrs;

/* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
msrs = perf_guest_get_msrs(&nr_msrs);

if (!msrs)
return;

Expand Down
2 changes: 2 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ struct swevent_hlist {
#define PERF_ATTACH_TASK 0x04
#define PERF_ATTACH_TASK_DATA 0x08
#define PERF_ATTACH_ITRACE 0x10
#define PERF_ATTACH_SCHED_CB 0x20

struct perf_cgroup;
struct perf_buffer;
Expand Down Expand Up @@ -872,6 +873,7 @@ struct perf_cpu_context {
struct list_head cgrp_cpuctx_entry;
#endif

struct list_head sched_cb_entry;
int sched_cb_usage;

int online;
Expand Down
42 changes: 38 additions & 4 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;

static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);

static atomic_t nr_mmap_events __read_mostly;
Expand Down Expand Up @@ -3461,19 +3462,27 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
}
}

static DEFINE_PER_CPU(struct list_head, sched_cb_list);

void perf_sched_cb_dec(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);

--cpuctx->sched_cb_usage;
this_cpu_dec(perf_sched_cb_usages);

if (!--cpuctx->sched_cb_usage)
list_del(&cpuctx->sched_cb_entry);
}


void perf_sched_cb_inc(struct pmu *pmu)
{
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);

cpuctx->sched_cb_usage++;
if (!cpuctx->sched_cb_usage++)
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));

this_cpu_inc(perf_sched_cb_usages);
}

/*
Expand Down Expand Up @@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}

static void perf_pmu_sched_task(struct task_struct *prev,
struct task_struct *next,
bool sched_in)
{
struct perf_cpu_context *cpuctx;

if (prev == next)
return;

list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
/* will be handled in perf_event_context_sched_in/out */
if (cpuctx->task_ctx)
continue;

__perf_pmu_sched_task(cpuctx, sched_in);
}
}

static void perf_event_switch(struct task_struct *task,
struct task_struct *next_prev, bool sched_in);

Expand All @@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;

if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(task, next, false);

if (atomic_read(&nr_switch_events))
perf_event_switch(task, next, false);

Expand Down Expand Up @@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,

if (atomic_read(&nr_switch_events))
perf_event_switch(task, prev, true);

if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
}

static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
Expand Down Expand Up @@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event)
if (event->parent)
return;

if (event->attach_state & PERF_ATTACH_TASK)
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
dec = true;
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
Expand Down Expand Up @@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event)
if (event->parent)
return;

if (event->attach_state & PERF_ATTACH_TASK)
if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
inc = true;
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
Expand Down Expand Up @@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void)
#ifdef CONFIG_CGROUP_PERF
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
#endif
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
}
}

Expand Down

0 comments on commit 75013c6

Please sign in to comment.