Skip to content

Commit

Permalink
drm/i915/perf: Add support for OA media units
Browse files Browse the repository at this point in the history
MTL introduces additional OA units dedicated to media use cases. Add
support for programming these OA units by passing the media engine class
and instance parameters.

UMD specific changes for GPUvis support:
https://patchwork.freedesktop.org/patch/522827/?series=114023
https://patchwork.freedesktop.org/patch/522822/?series=114023
https://patchwork.freedesktop.org/patch/522826/?series=114023
https://patchwork.freedesktop.org/patch/522828/?series=114023
https://patchwork.freedesktop.org/patch/522816/?series=114023
https://patchwork.freedesktop.org/patch/522825/?series=114023

v2: (Ashutosh)
- check for IP_VER(12, 70) instead of MTL
- remove PERF_GROUP_OAG comment in mtl_oa_base
- remove oa_buffer.group
- use engine->oa_group->type in engine_supports_oa_format
- remove fw_domains and use FORCEWAKE_ALL
- remove MPES/MPEC comment
- s/xehp/mtl/ in b counter validation function name
- remove engine_supports_oa in __oa_engine_group
- remove warn_ON from __oam_engine_group
- refactor oa_init_groups and oa_init_regs
- assign g->type correctly
- use enum oa_type definition

v3: (Ashutosh)
- Drop oa_unit_functional as engine_supports_oa is enough

v4:
- s/DRM_DEBUG/drm_dbg/

Signed-off-by: Umesh Nerlige Ramappa <[email protected]>
Reviewed-by: Ashutosh Dixit <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
unerlige committed Mar 24, 2023
1 parent c61d04c commit 1cc064d
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 22 deletions.
2 changes: 2 additions & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
(INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
#define HAS_OAM(dev_priv) \
(INTEL_INFO(dev_priv)->has_oam)

/*
* Set this flag, when platform requires 64K GTT page sizes or larger for
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/i915/i915_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,7 @@ static const struct intel_device_info adl_p_info = {
.has_mslice_steering = 1, \
.has_oa_bpc_reporting = 1, \
.has_oa_slice_contrib_limits = 1, \
.has_oam = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
Expand Down
184 changes: 162 additions & 22 deletions drivers/gpu/drm/i915/i915_perf.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@
*/

#include <linux/anon_inodes.h>
#include <linux/nospec.h>
#include <linux/sizes.h>
#include <linux/uuid.h>

Expand Down Expand Up @@ -326,6 +327,12 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
[I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
[I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT },
[I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT },
};

static const u32 mtl_oa_base[] = {
[PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
};

#define SAMPLE_OA_REPORT (1<<0)
Expand Down Expand Up @@ -418,11 +425,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
kfree(oa_bo);
}

static inline const
struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
{
return &stream->engine->oa_group->regs;
}

static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;

return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
GEN12_OAG_OATAILPTR_MASK;
}

Expand Down Expand Up @@ -875,7 +888,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
i915_reg_t oaheadptr;

oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
__oa_regs(stream)->oa_head_ptr :
GEN8_OAHEADPTR;

spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);

Expand Down Expand Up @@ -928,7 +942,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
return -EIO;

oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
GEN12_OAG_OASTATUS : GEN8_OASTATUS;
__oa_regs(stream)->oa_status :
GEN8_OASTATUS;

oastatus = intel_uncore_read(uncore, oastatus_reg);

Expand Down Expand Up @@ -1637,6 +1652,11 @@ static bool engine_supports_oa(const struct intel_engine_cs *engine)
return engine->oa_group;
}

static bool engine_supports_oa_format(struct intel_engine_cs *engine, int type)
{
return engine->oa_group && engine->oa_group->type == type;
}

static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
Expand Down Expand Up @@ -1788,8 +1808,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)

spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);

intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = gtt_offset;

Expand All @@ -1801,9 +1821,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* to enable proper functionality of the overflow
* bit."
*/
intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);

/* Mark that we need updated tail pointers to read from... */
Expand Down Expand Up @@ -2563,7 +2583,8 @@ gen8_modify_self(struct intel_context *ce,
return err;
}

static int gen8_configure_context(struct i915_gem_context *ctx,
static int gen8_configure_context(struct i915_perf_stream *stream,
struct i915_gem_context *ctx,
struct flex *flex, unsigned int count)
{
struct i915_gem_engines_iter it;
Expand Down Expand Up @@ -2704,7 +2725,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,

spin_unlock(&i915->gem.contexts.lock);

err = gen8_configure_context(ctx, regs, num_regs);
err = gen8_configure_context(stream, ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
Expand Down Expand Up @@ -2749,6 +2770,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream,
},
};

if (stream->engine->class != RENDER_CLASS)
return 0;

return oa_configure_all_contexts(stream,
regs, ARRAY_SIZE(regs),
active);
Expand Down Expand Up @@ -2878,7 +2902,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
_MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
}

intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
Expand All @@ -2888,7 +2912,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
*/
oag_report_ctx_switches(stream));

intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
(GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
(period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
Expand Down Expand Up @@ -3042,8 +3066,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)

static void gen12_oa_enable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
u32 report_format = stream->oa_buffer.format->format;
const struct i915_perf_regs *regs;
u32 val;

/*
* If we don't want OA reports from the OA buffer, then we don't even
Expand All @@ -3054,9 +3078,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream)

gen12_init_oa_buffer(stream);

intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
(report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
regs = __oa_regs(stream);
val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) |
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;

intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
}

/**
Expand Down Expand Up @@ -3108,9 +3134,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;

intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
if (intel_wait_for_register(uncore,
GEN12_OAG_OACONTROL,
__oa_regs(stream)->oa_ctrl,
GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
50))
drm_err(&stream->perf->i915->drm,
Expand Down Expand Up @@ -4011,6 +4037,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
struct perf_open_properties *props)
{
struct drm_i915_gem_context_param_sseu user_sseu;
const struct i915_oa_format *f;
u64 __user *uprop = uprops;
bool config_instance = false;
bool config_class = false;
Expand Down Expand Up @@ -4196,6 +4223,15 @@ static int read_properties_unlocked(struct i915_perf *perf,
return -EINVAL;
}

i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);
f = &perf->oa_formats[i];
if (!engine_supports_oa_format(props->engine, f->type)) {
drm_dbg(&perf->i915->drm,
"Invalid OA format %d for class %d\n",
f->type, props->engine->class);
return -EINVAL;
}

if (config_sseu) {
ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
if (ret) {
Expand Down Expand Up @@ -4376,6 +4412,14 @@ static const struct i915_range gen12_oa_b_counters[] = {
{}
};

static const struct i915_range mtl_oam_b_counters[] = {
{ .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
{ .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
{ .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
{ .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
{}
};

static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
Expand Down Expand Up @@ -4429,6 +4473,8 @@ static const struct i915_range mtl_oa_mux_regs[] = {
{ .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */
{ .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */
{ .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */
{ .start = 0x38d100, .end = 0x38d114}, /* VISACTL */
{}
};

static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
Expand Down Expand Up @@ -4466,10 +4512,20 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}

static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
{
if (HAS_OAM(perf->i915) &&
GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
return reg_in_range_table(addr, mtl_oam_b_counters);

return false;
}

static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, xehp_oa_b_counters) ||
reg_in_range_table(addr, gen12_oa_b_counters);
reg_in_range_table(addr, gen12_oa_b_counters) ||
mtl_is_valid_oam_b_counter_addr(perf, addr);
}

static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
Expand Down Expand Up @@ -4839,12 +4895,86 @@ static u32 num_perf_groups_per_gt(struct intel_gt *gt)
return 1;
}

static u32 __oam_engine_group(struct intel_engine_cs *engine)
{
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70)) {
/*
* There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
* within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
*/
drm_WARN_ON(&engine->i915->drm,
engine->gt->type != GT_MEDIA);

return PERF_GROUP_OAM_SAMEDIA_0;
}

return PERF_GROUP_INVALID;
}

static u32 __oa_engine_group(struct intel_engine_cs *engine)
{
if (engine->class == RENDER_CLASS)
switch (engine->class) {
case RENDER_CLASS:
return PERF_GROUP_OAG;
else

case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
return __oam_engine_group(engine);

default:
return PERF_GROUP_INVALID;
}
}

static struct i915_perf_regs __oam_regs(u32 base)
{
return (struct i915_perf_regs) {
base,
GEN12_OAM_HEAD_POINTER(base),
GEN12_OAM_TAIL_POINTER(base),
GEN12_OAM_BUFFER(base),
GEN12_OAM_CONTEXT_CONTROL(base),
GEN12_OAM_CONTROL(base),
GEN12_OAM_DEBUG(base),
GEN12_OAM_STATUS(base),
GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
};
}

static struct i915_perf_regs __oag_regs(void)
{
return (struct i915_perf_regs) {
0,
GEN12_OAG_OAHEADPTR,
GEN12_OAG_OATAILPTR,
GEN12_OAG_OABUFFER,
GEN12_OAG_OAGLBCTXCTRL,
GEN12_OAG_OACONTROL,
GEN12_OAG_OA_DEBUG,
GEN12_OAG_OASTATUS,
GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
};
}

static void oa_init_groups(struct intel_gt *gt)
{
int i, num_groups = gt->perf.num_perf_groups;

for (i = 0; i < num_groups; i++) {
struct i915_perf_group *g = &gt->perf.group[i];

/* Fused off engines can result in a group with num_engines == 0 */
if (g->num_engines == 0)
continue;

if (i == PERF_GROUP_OAG && gt->type != GT_MEDIA) {
g->regs = __oag_regs();
g->type = TYPE_OAG;
} else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
g->regs = __oam_regs(mtl_oa_base[i]);
g->type = TYPE_OAM;
}
}
}

static int oa_init_gt(struct intel_gt *gt)
Expand All @@ -4871,6 +5001,8 @@ static int oa_init_gt(struct intel_gt *gt)
gt->perf.num_perf_groups = num_groups;
gt->perf.group = g;

oa_init_groups(gt);

return 0;
}

Expand Down Expand Up @@ -4928,9 +5060,15 @@ static void oa_init_supported_formats(struct i915_perf *perf)
break;

case INTEL_DG2:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
break;

case INTEL_METEORLAKE:
oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
break;

default:
Expand Down Expand Up @@ -5175,8 +5313,10 @@ int i915_perf_ioctl_version(void)
*
* 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
* DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
*
* 7: Add support for video decode and enhancement classes.
*/
return 6;
return 7;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
Expand Down
Loading

0 comments on commit 1cc064d

Please sign in to comment.