Skip to content

Commit

Permalink
Merge tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull perf event updates from Ingo Molnar:
 "HW support updates:

   - Add uncore support for Intel Comet Lake

   - Add RAPL support for Hygon Fam18h

   - Add Intel "IIO stack to PMON mapping" support on Skylake-SP CPUs,
     which enumerates per device performance counters via sysfs and
     enables the perf stat --iiostat functionality

   - Add support for Intel "Architectural LBRs", which generalized the
     model specific LBR hardware tracing feature into a
     model-independent, architected performance monitoring feature.

     Usage is mostly seamless to tooling, as the pre-existing LBR
     features are kept, but there's a couple of advantages under the
     hood, such as faster context-switching, faster LBR reads, cleaner
     exposure of LBR features to guest kernels, etc.

     ( Since architectural LBRs are supported via XSAVE, there's related
       changes to the x86 FPU code as well. )

  ftrace/perf updates:

   - Add support to add a text poke event to record changes to kernel
     text (i.e. self-modifying code) in order to support tracers like
     Intel PT decoding through jump labels, kprobes and ftrace
     trampolines.

  Misc cleanups, smaller fixes..."

* tag 'perf-core-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits)
  perf/x86/rapl: Add Hygon Fam18h RAPL support
  kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer()
  x86/perf: Fix a typo
  perf: <linux/perf_event.h>: drop a duplicated word
  perf/x86/intel/lbr: Support XSAVES for arch LBR read
  perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch
  x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature
  x86/fpu/xstate: Support dynamic supervisor feature for LBR
  x86/fpu: Use proper mask to replace full instruction mask
  perf/x86: Remove task_ctx_size
  perf/x86/intel/lbr: Create kmem_cache for the LBR context data
  perf/core: Use kmem_cache to allocate the PMU specific data
  perf/core: Factor out functions to allocate/free the task_ctx_data
  perf/x86/intel/lbr: Support Architectural LBR
  perf/x86/intel/lbr: Factor out intel_pmu_store_lbr
  perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all()
  perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline
  perf/x86/intel/lbr: Unify the stored format of LBR information
  perf/x86/intel/lbr: Support LBR_CTL
  perf/x86: Expose CPUID enumeration bits for arch LBR
  ...
  • Loading branch information
torvalds committed Aug 3, 2020
2 parents 9dee868 + d903b6d commit b34133f
Show file tree
Hide file tree
Showing 32 changed files with 1,943 additions and 280 deletions.
33 changes: 33 additions & 0 deletions Documentation/ABI/testing/sysfs-devices-mapping
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
What: /sys/devices/uncore_iio_x/dieX
Date: February 2020
Contact: Roman Sudarikov <[email protected]>
Description:
Each IIO stack (PCIe root port) has its own IIO PMON block, so
each dieX file (where X is die number) holds "Segment:Root Bus"
for PCIe root port, which can be monitored by that IIO PMON
block.
For example, on 4-die Xeon platform with up to 6 IIO stacks per
die and, therefore, 6 IIO PMON blocks per die, the mapping of
IIO PMON block 0 exposes as the following:

$ ls /sys/devices/uncore_iio_0/die*
-r--r--r-- /sys/devices/uncore_iio_0/die0
-r--r--r-- /sys/devices/uncore_iio_0/die1
-r--r--r-- /sys/devices/uncore_iio_0/die2
-r--r--r-- /sys/devices/uncore_iio_0/die3

$ tail /sys/devices/uncore_iio_0/die*
==> /sys/devices/uncore_iio_0/die0 <==
0000:00
==> /sys/devices/uncore_iio_0/die1 <==
0000:40
==> /sys/devices/uncore_iio_0/die2 <==
0000:80
==> /sys/devices/uncore_iio_0/die3 <==
0000:c0

Which means:
IIO PMU 0 on die 0 belongs to PCI RP on bus 0x00, domain 0x0000
IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
28 changes: 18 additions & 10 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,9 @@ u64 x86_perf_event_update(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int shift = 64 - x86_pmu.cntval_bits;
u64 prev_raw_count, new_raw_count;
int idx = hwc->idx;
u64 delta;

if (idx == INTEL_PMC_IDX_FIXED_BTS)
if (unlikely(!hwc->event_base))
return 0;

/*
Expand Down Expand Up @@ -359,6 +358,7 @@ void x86_release_hardware(void)
if (atomic_dec_and_mutex_lock(&pmc_refcount, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_ds_buffers();
release_lbr_buffers();
mutex_unlock(&pmc_reserve_mutex);
}
}
Expand Down Expand Up @@ -1097,22 +1097,31 @@ static inline void x86_assign_hw_event(struct perf_event *event,
struct cpu_hw_events *cpuc, int i)
{
struct hw_perf_event *hwc = &event->hw;
int idx;

hwc->idx = cpuc->assign[i];
idx = hwc->idx = cpuc->assign[i];
hwc->last_cpu = smp_processor_id();
hwc->last_tag = ++cpuc->tags[i];

if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {
switch (hwc->idx) {
case INTEL_PMC_IDX_FIXED_BTS:
case INTEL_PMC_IDX_FIXED_VLBR:
hwc->config_base = 0;
hwc->event_base = 0;
} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
break;

case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;
} else {
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
(idx - INTEL_PMC_IDX_FIXED);
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
break;

default:
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
break;
}
}

Expand Down Expand Up @@ -1233,7 +1242,7 @@ int x86_perf_event_set_period(struct perf_event *event)
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;

if (idx == INTEL_PMC_IDX_FIXED_BTS)
if (unlikely(!hwc->event_base))
return 0;

/*
Expand Down Expand Up @@ -2363,7 +2372,6 @@ static struct pmu pmu = {

.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,

Expand Down
127 changes: 86 additions & 41 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2136,8 +2136,35 @@ static inline void intel_pmu_ack_status(u64 ack)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}

static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
static inline bool event_is_checkpointed(struct perf_event *event)
{
return unlikely(event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
}

static inline void intel_set_masks(struct perf_event *event, int idx)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

if (event->attr.exclude_host)
__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
if (event->attr.exclude_guest)
__set_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
if (event_is_checkpointed(event))
__set_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}

static inline void intel_clear_masks(struct perf_event *event, int idx)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_guest_mask);
__clear_bit(idx, (unsigned long *)&cpuc->intel_ctrl_host_mask);
__clear_bit(idx, (unsigned long *)&cpuc->intel_cp_status);
}

static void intel_pmu_disable_fixed(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
u64 ctrl_val, mask;

Expand All @@ -2148,30 +2175,22 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
wrmsrl(hwc->config_base, ctrl_val);
}

static inline bool event_is_checkpointed(struct perf_event *event)
{
return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
}

static void intel_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int idx = hwc->idx;

if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (idx < INTEL_PMC_IDX_FIXED) {
intel_clear_masks(event, idx);
x86_pmu_disable_event(event);
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
intel_clear_masks(event, idx);
intel_pmu_disable_fixed(event);
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
intel_pmu_disable_bts();
intel_pmu_drain_bts_buffer();
return;
}

cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);

if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
intel_pmu_disable_fixed(hwc);
else
x86_pmu_disable_event(event);
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
intel_clear_masks(event, idx);

/*
* Needs to be called after x86_pmu_disable_event,
Expand Down Expand Up @@ -2238,33 +2257,23 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
static void intel_pmu_enable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
if (!__this_cpu_read(cpu_hw_events.enabled))
return;

intel_pmu_enable_bts(hwc->config);
return;
}

if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
if (event->attr.exclude_guest)
cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);

if (unlikely(event_is_checkpointed(event)))
cpuc->intel_cp_status |= (1ull << hwc->idx);
int idx = hwc->idx;

if (unlikely(event->attr.precise_ip))
intel_pmu_pebs_enable(event);

if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
if (idx < INTEL_PMC_IDX_FIXED) {
intel_set_masks(event, idx);
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
intel_set_masks(event, idx);
intel_pmu_enable_fixed(event);
return;
}

__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
if (!__this_cpu_read(cpu_hw_events.enabled))
return;
intel_pmu_enable_bts(hwc->config);
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
intel_set_masks(event, idx);
}

static void intel_pmu_add_event(struct perf_event *event)
Expand Down Expand Up @@ -2614,6 +2623,20 @@ intel_bts_constraints(struct perf_event *event)
return NULL;
}

/*
* Note: matches a fake event, like Fixed2.
*/
static struct event_constraint *
intel_vlbr_constraints(struct perf_event *event)
{
struct event_constraint *c = &vlbr_constraint;

if (unlikely(constraint_match(c, event->hw.config)))
return c;

return NULL;
}

static int intel_alt_er(int idx, u64 config)
{
int alt_idx = idx;
Expand Down Expand Up @@ -2804,6 +2827,10 @@ __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
{
struct event_constraint *c;

c = intel_vlbr_constraints(event);
if (c)
return c;

c = intel_bts_constraints(event);
if (c)
return c;
Expand Down Expand Up @@ -3951,6 +3978,11 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_dead = intel_pmu_cpu_dead,

.check_period = intel_pmu_check_period,

.lbr_reset = intel_pmu_lbr_reset_64,
.lbr_read = intel_pmu_lbr_read_64,
.lbr_save = intel_pmu_lbr_save,
.lbr_restore = intel_pmu_lbr_restore,
};

static __initconst const struct x86_pmu intel_pmu = {
Expand Down Expand Up @@ -3996,6 +4028,11 @@ static __initconst const struct x86_pmu intel_pmu = {
.check_period = intel_pmu_check_period,

.aux_output_match = intel_pmu_aux_output_match,

.lbr_reset = intel_pmu_lbr_reset_64,
.lbr_read = intel_pmu_lbr_read_64,
.lbr_save = intel_pmu_lbr_save,
.lbr_restore = intel_pmu_lbr_restore,
};

static __init void intel_clovertown_quirk(void)
Expand Down Expand Up @@ -4622,6 +4659,14 @@ __init int intel_pmu_init(void)
x86_pmu.intel_cap.capabilities = capabilities;
}

if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) {
x86_pmu.lbr_reset = intel_pmu_lbr_reset_32;
x86_pmu.lbr_read = intel_pmu_lbr_read_32;
}

if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
intel_pmu_arch_lbr_init();

intel_ds_init();

x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,7 @@ static void adaptive_pebs_record_size_update(void)
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
sz += sizeof(struct pebs_xmm);
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
sz += x86_pmu.lbr_nr * sizeof(struct lbr_entry);

cpuc->pebs_record_size = sz;
}
Expand Down Expand Up @@ -1595,10 +1595,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}

if (format_size & PEBS_DATACFG_LBRS) {
struct pebs_lbr *lbr = next_record;
struct lbr_entry *lbr = next_record;
int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
& 0xff) + 1;
next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
next_record = next_record + num_lbr * sizeof(struct lbr_entry);

if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
Expand Down
Loading

0 comments on commit b34133f

Please sign in to comment.