Skip to content

Commit

Permalink
powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPL…
Browse files Browse the repository at this point in the history
…E_WEIGHT_STRUCT

Performance Monitoring Unit (PMU) registers in powerpc provides
information on cycles elapsed between different stages in the
pipeline. This can be used for application tuning. On ISA v3.1
platform, this information is exposed by sampling registers.
Patch adds kernel support to capture two of the cycle counters
as part of perf sample using the sample type:
PERF_SAMPLE_WEIGHT_STRUCT.

The power PMU function 'get_mem_weight' currently uses 64 bit weight
field of perf_sample_data to capture memory latency. But following the
introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain
64-bit or 32-bit value depending on the architexture support for
PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the
pipeline stage cycles info. Hence update the ppmu functions to work for
64-bit and 32-bit weight values.

If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field.
if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem
latency is stored in the low 32bits of perf_sample_weight structure.
Also for CPU_FTR_ARCH_31, capture the two cycle counter information in
two 16 bit fields of perf_sample_weight structure.

Signed-off-by: Athira Rajeev <[email protected]>
Reviewed-by: Madhavan Srinivasan <[email protected]>
Signed-off-by: Michael Ellerman <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
  • Loading branch information
athira-rajeev authored and mpe committed Apr 20, 2021
1 parent 2886e2d commit af31fd0
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/include/asm/perf_event_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct power_pmu {
u64 alt[]);
void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
u32 flags, struct pt_regs *regs);
void (*get_mem_weight)(u64 *weight);
void (*get_mem_weight)(u64 *weight, u64 type);
unsigned long group_constraint_mask;
unsigned long group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/perf/core-book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -2218,9 +2218,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
ppmu->get_mem_data_src)
ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);

if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
ppmu->get_mem_weight)
ppmu->get_mem_weight(&data.weight.full);
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);

if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
Expand Down
29 changes: 26 additions & 3 deletions arch/powerpc/perf/isa207-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
}
}

void isa207_get_mem_weight(u64 *weight)
void isa207_get_mem_weight(u64 *weight, u64 type)
{
union perf_sample_weight *weight_fields;
u64 weight_lat;
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
Expand All @@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight)
mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);

if (val == 0 || val == 7)
*weight = 0;
weight_lat = 0;
else
*weight = mantissa << (2 * exp);
weight_lat = mantissa << (2 * exp);

/*
* Use 64 bit weight field (full) if sample type is
* WEIGHT.
*
* if sample type is WEIGHT_STRUCT:
* - store memory latency in the lower 32 bits.
* - For ISA v3.1, use remaining two 16 bit fields of
* perf_sample_weight to store cycle counter values
* from sier2.
*/
weight_fields = (union perf_sample_weight *)weight;
if (type & PERF_SAMPLE_WEIGHT)
weight_fields->full = weight_lat;
else {
weight_fields->var1_dw = (u32)weight_lat;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
}
}
}

int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
Expand Down
6 changes: 5 additions & 1 deletion arch/powerpc/perf/isa207-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@
#define ISA207_SIER_DATA_SRC_SHIFT 53
#define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT)

/* Bits in SIER2/SIER3 for Power10 */
#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful)
#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful)

#define P(a, b) PERF_MEM_S(a, b)
#define PH(a, b) (P(LVL, HIT) | P(a, b))
#define PM(a, b) (P(LVL, MISS) | P(a, b))
Expand All @@ -278,7 +282,7 @@ int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
const unsigned int ev_alt[][MAX_ALT]);
void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
struct pt_regs *regs);
void isa207_get_mem_weight(u64 *weight);
void isa207_get_mem_weight(u64 *weight, u64 type);

int isa3XX_check_attr_config(struct perf_event *ev);

Expand Down

0 comments on commit af31fd0

Please sign in to comment.