From 57e92d991e31ee237774aa9390586fad83630634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 8 Oct 2024 17:23:22 +0200 Subject: [PATCH 01/78] drm/amdgpu: drop volatile from ring buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Volatile only prevents the compiler from re-ordering reads and writes. Since we always only modify the ring buffer from one CPU thread and have an explicit barrier before signaling the HW this should have no effect at all and just prevents compiler optimisations. While at it drop the local variables as well. Signed-off-by: Christian König Reviewed-by: Sunil Khatri Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 +++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 11 ++++------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 42f616c05f504e..a6e28fe3f8d66b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -109,21 +109,17 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { uint32_t occupied, chunk1, chunk2; - uint32_t *dst; occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count) ? count : chunk1; chunk2 = count - chunk1; if (chunk1) - memset32(dst, ring->funcs->nop, chunk1); + memset32(&ring->ring[occupied], ring->funcs->nop, chunk1); - if (chunk2) { - dst = (void *)ring->ring; - memset32(dst, ring->funcs->nop, chunk2); - } + if (chunk2) + memset32(ring->ring, ring->funcs->nop, chunk2); ring->wptr += count; ring->wptr &= ring->ptr_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 574336d6714af3..36fc9578c53c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -246,7 +246,7 @@ struct amdgpu_ring { struct drm_gpu_scheduler sched; struct amdgpu_bo *ring_obj; - volatile uint32_t *ring; + uint32_t *ring; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; @@ -288,7 +288,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned int set_q_mode_offs; - volatile u32 *set_q_mode_ptr; + u32 *set_q_mode_ptr; u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; @@ -386,10 +386,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) { unsigned occupied, chunk1, chunk2; - void *dst; occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; chunk2 = count_dw - chunk1; @@ -397,12 +395,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, chunk2 <<= 2; if (chunk1) - memcpy(dst, src, chunk1); + memcpy(&ring->ring[occupied], src, chunk1); if (chunk2) { src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); + memcpy(ring->ring, src, chunk2); } ring->wptr += count_dw; From 7a65e88f13b1294a41814a6b679fbc3e3fedb68b Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Fri, 11 Oct 2024 14:55:52 -0400 Subject: [PATCH 02/78] drm/amd/display: Optimize power up sequence for specific OLED [why & how] OLED power up sequence takes an extra 150ms via hardcoded delay, but there is a strict requirement on DisplayOn resume time. For customer panel, remove these delays to meet target until a cleaner solution is can be put in place. Reviewed-by: Charlene Liu Signed-off-by: Ovidiu Bunea Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + .../drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 3 ++- drivers/gpu/drm/amd/display/dc/link/link_dpms.c | 15 +++++++++++---- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6d76dc110d387c..412cdb01a61a60 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1067,6 +1067,7 @@ struct dc_debug_options { unsigned int sharpen_policy; unsigned int scale_to_sharpness_policy; bool skip_full_updated_if_possible; + unsigned int enable_oled_edp_power_up_opt; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 3401f4c9fb10e9..f0776484abb752 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -180,6 +180,7 @@ struct dc_panel_patch { unsigned int remove_sink_ext_caps; unsigned int disable_colorimetry; uint8_t blankstream_before_otg_off; + bool oled_optimize_display_on; }; struct dc_edid_caps { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index c31ec44ccd8c04..427fd6ea062a19 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1039,7 +1039,8 @@ void dce110_edp_backlight_control( link_transmitter_control(ctx->dc_bios, &cntl); if (enable && link->dpcd_sink_ext_caps.bits.oled && - !link->dc->config.edp_no_power_sequencing) { + !link->dc->config.edp_no_power_sequencing && + !link->local_sink->edid_caps.panel_patch.oled_optimize_display_on) { post_T7_delay += link->panel_config.pps.extra_post_t7_ms; msleep(post_T7_delay); } diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index c4e03482ba9ae4..41cab9ad6885ac 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2082,6 +2082,9 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link_settings->link_rate == LINK_RATE_LOW) skip_video_pattern = false; + if (stream->sink_patches.oled_optimize_display_on) + set_default_brightness_aux(link); + if (perform_link_training_with_retries(link_settings, skip_video_pattern, lt_attempts, @@ -2105,10 +2108,14 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (link->dpcd_sink_ext_caps.bits.oled == 1 || link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1 || link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1) { - set_default_brightness_aux(link); - if (link->dpcd_sink_ext_caps.bits.oled == 1) - msleep(bl_oled_enable_delay); - edp_backlight_enable_aux(link, true); + if (!stream->sink_patches.oled_optimize_display_on) { + set_default_brightness_aux(link); + if (link->dpcd_sink_ext_caps.bits.oled == 1) + msleep(bl_oled_enable_delay); + edp_backlight_enable_aux(link, true); + } else { + edp_backlight_enable_aux(link, true); + } } return status; From 69f22c5b454f7a3d77f323ed96b4ad6ac7bbe378 Mon Sep 17 00:00:00 2001 From: Hansen Dsouza Date: Tue, 15 Oct 2024 17:33:15 -0400 Subject: [PATCH 03/78] drm/amd/display: Add a boot option to reduce phy ssc for HBR3 [Why] Spread on DPREFCLK by 0.3 percent can have a negative effect on sink when PHY SSC is also spread by 0.3 percent [How] Add boot option for DMU to lower PHY SSC Reviewed-by: Nicholas Kazlauskas Signed-off-by: Hansen Dsouza Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 1 + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 3 ++- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index ff27229cc3a47d..b353c4ceb60dce 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -301,6 +301,7 @@ struct dmub_srv_hw_params { bool disallow_phy_access; bool disable_sldo_opt; bool enable_non_transparent_setconfig; + bool lower_hbr3_phy_ssc; }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 6edd3d34c7b59c..6d96a840d24d6a 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -694,7 +694,8 @@ union dmub_fw_boot_options { uint32_t ips_disable: 3; /* options to disable ips support*/ uint32_t ips_sequential_ono: 1; /**< 1 to enable sequential ONO IPS sequence */ uint32_t disable_sldo_opt: 1; /**< 1 to disable SLDO optimizations */ - uint32_t reserved : 7; /**< reserved */ + uint32_t lower_hbr3_phy_ssc: 1; /**< 1 to lower hbr3 phy ssc to 0.125 percent */ + uint32_t reserved : 6; /**< reserved */ } bits; /**< boot bits */ uint32_t all; /**< 32-bit access to bits */ }; diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 2ccad79053c586..3be315f1a44348 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -426,6 +426,7 @@ void dmub_dcn35_enable_dmub_boot_options(struct dmub_srv *dmub, const struct dmu boot_options.bits.ips_sequential_ono = params->ips_sequential_ono; boot_options.bits.disable_sldo_opt = params->disable_sldo_opt; boot_options.bits.enable_non_transparent_setconfig = params->enable_non_transparent_setconfig; + boot_options.bits.lower_hbr3_phy_ssc = params->lower_hbr3_phy_ssc; REG_WRITE(DMCUB_SCRATCH14, boot_options.all); } From c6df6213a95fa9674cc48d77042141942dd0809b Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 11 Oct 2024 13:51:11 -0400 Subject: [PATCH 04/78] drm/amd/display: Add P-State Stall Timeout Recovery Support for dcn401 [WHY&HOW] Adds support for P-State stall timeout detection in DCHUBBUB. Reviewed-by: Alvin Lee Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/dml2/dml21/inc/dml_top_dchub_registers.h | 1 + .../dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 3 +++ .../drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h | 9 ++++++++- .../drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c | 12 ++++++++++++ .../drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h | 8 ++++++-- .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 5 +++++ drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h | 1 + .../amd/display/dc/resource/dcn401/dcn401_resource.h | 4 +++- 8 files changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h index 83fc15bf13cf7c..25b607e7b726e9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_dchub_registers.h @@ -88,6 +88,7 @@ struct dml2_display_arb_regs { uint32_t sdpif_request_rate_limit; uint32_t allow_sdpif_rate_limit_when_cstate_req; uint32_t dcfclk_deep_sleep_hysteresis; + uint32_t pstate_stall_threshold; }; struct dml2_cursor_dlg_regs{ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 3ea54fd52e4683..92e43a1e4dd462 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -12236,6 +12236,8 @@ static void rq_dlg_get_dlg_reg( static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param) { + double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz; + arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs; arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4; @@ -12247,6 +12249,7 @@ static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, co arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes; arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib); arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib); + arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h index a1e2cde9c4cca3..4bd1dda0771961 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn10/dcn10_hubbub.h @@ -198,6 +198,8 @@ struct dcn_hubbub_registers { uint32_t DCHUBBUB_ARB_REFCYC_PER_META_TRIP_B; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_A; uint32_t DCHUBBUB_ARB_FRAC_URG_BW_MALL_B; + uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL1; + uint32_t DCHUBBUB_TIMEOUT_DETECTION_CTRL2; }; #define HUBBUB_REG_FIELD_LIST_DCN32(type) \ @@ -313,7 +315,12 @@ struct dcn_hubbub_registers { type DCN_VM_ERROR_VMID;\ type DCN_VM_ERROR_TABLE_LEVEL;\ type DCN_VM_ERROR_PIPE;\ - type DCN_VM_ERROR_INTERRUPT_STATUS + type DCN_VM_ERROR_INTERRUPT_STATUS;\ + type DCHUBBUB_TIMEOUT_ERROR_STATUS;\ + type DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD;\ + type DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD;\ + type DCHUBBUB_TIMEOUT_DETECTION_EN;\ + type DCHUBBUB_TIMEOUT_TIMER_RESET #define HUBBUB_STUTTER_REG_FIELD_LIST(type) \ type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A;\ diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c index 37d26fa0b6fbb9..5d658e9bef6401 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.c @@ -1192,6 +1192,17 @@ static void dcn401_wait_for_det_update(struct hubbub *hubbub, int hubp_inst) } } +static void dcn401_program_timeout_thresholds(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs) +{ + struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub); + + /* request backpressure and outstanding return threshold (unused)*/ + //REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, arb_regs->req_stall_threshold); + + /* P-State stall threshold */ + REG_UPDATE(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, arb_regs->pstate_stall_threshold); +} + static const struct hubbub_funcs hubbub4_01_funcs = { .update_dchub = hubbub2_update_dchub, .init_dchub_sys_ctx = hubbub3_init_dchub_sys_ctx, @@ -1215,6 +1226,7 @@ static const struct hubbub_funcs hubbub4_01_funcs = { .program_det_segments = dcn401_program_det_segments, .program_compbuf_segments = dcn401_program_compbuf_segments, .wait_for_det_update = dcn401_wait_for_det_update, + .program_timeout_thresholds = dcn401_program_timeout_thresholds, }; void hubbub401_construct(struct dcn20_hubbub *hubbub2, diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h index f35f19ba3e18b8..5f1960722ebddb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn401/dcn401_hubbub.h @@ -123,8 +123,12 @@ HUBBUB_SF(DCHUBBUB_CLOCK_CNTL, DCFCLK_R_DCHUBBUB_GATE_DIS, mask_sh),\ HUBBUB_SF(DCHUBBUB_SDPIF_CFG0, SDPIF_PORT_CONTROL, mask_sh),\ HUBBUB_SF(DCHUBBUB_SDPIF_CFG1, SDPIF_MAX_NUM_OUTSTANDING, mask_sh),\ - HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh) - + HUBBUB_SF(DCHUBBUB_MEM_PWR_MODE_CTRL, DET_MEM_PWR_LS_MODE, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_ERROR_STATUS, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL1, DCHUBBUB_TIMEOUT_REQ_STALL_THRESHOLD, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_PSTATE_STALL_THRESHOLD, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_DETECTION_EN, mask_sh),\ + HUBBUB_SF(DCHUBBUB_TIMEOUT_DETECTION_CTRL2, DCHUBBUB_TIMEOUT_TIMER_RESET, mask_sh) bool hubbub401_program_urgent_watermarks( struct hubbub *hubbub, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 3c70f40bf0475d..e8cc1bfa73f31a 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -1554,6 +1554,11 @@ void dcn401_optimize_bandwidth( pipe_ctx->dlg_regs.min_dst_y_next_start); } } + + /* update timeout thresholds */ + if (hubbub->funcs->program_timeout_thresholds) { + hubbub->funcs->program_timeout_thresholds(hubbub, &context->bw_ctx.bw.dcn.arb_regs); + } } void dcn401_fams2_global_control_lock(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h index 67c32401893e86..6c1d41c0f09926 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h @@ -228,6 +228,7 @@ struct hubbub_funcs { void (*program_det_segments)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_seg); void (*program_compbuf_segments)(struct hubbub *hubbub, unsigned compbuf_size_seg, bool safe_to_increase); void (*wait_for_det_update)(struct hubbub *hubbub, int hubp_inst); + void (*program_timeout_thresholds)(struct hubbub *hubbub, struct dml2_display_arb_regs *arb_regs); }; struct hubbub { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index bdafa7496ceae7..7c8d61db153d38 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -610,7 +610,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SR(DCHUBBUB_CLOCK_CNTL), \ SR(DCHUBBUB_SDPIF_CFG0), \ SR(DCHUBBUB_SDPIF_CFG1), \ - SR(DCHUBBUB_MEM_PWR_MODE_CTRL) + SR(DCHUBBUB_MEM_PWR_MODE_CTRL), \ + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL1), \ + SR(DCHUBBUB_TIMEOUT_DETECTION_CTRL2) /* DCCG */ From 68bf95317ebf2cfa7105251e4279e951daceefb7 Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Fri, 11 Oct 2024 11:12:19 -0400 Subject: [PATCH 05/78] Revert "drm/amd/display: update DML2 policy EnhancedPrefetchScheduleAccelerationFinal DCN35" This reverts commit 9dad21f910fc ("drm/amd/display: update DML2 policy EnhancedPrefetchScheduleAccelerationFinal DCN35") [why & how] The offending commit exposes a hang with lid close/open behavior. Both issues seem to be related to ODM 2:1 mode switching, so there is another issue generic to that sequence that needs to be investigated. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Nicholas Kazlauskas Signed-off-by: Ovidiu Bunea Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c index 11c904ae29586d..c4c52173ef2240 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_policy.c @@ -303,6 +303,7 @@ void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_m if (project == dml_project_dcn35 || project == dml_project_dcn351) { policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false; + policy->EnhancedPrefetchScheduleAccelerationFinal = 0; policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/ policy->UseOnlyMaxPrefetchModes = 1; } From a88b19b13fb41a3fa03ec67b5f57cc267fbfb160 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Tue, 15 Oct 2024 14:22:32 -0400 Subject: [PATCH 06/78] drm/amd/display: Reduce HPD Detection Interval for IPS Fix DP Compliance test 4.2.1.3, 4.2.2.8, 4.3.1.12, 4.3.1.13 when IPS enabled. Original HPD detection interval is set to 5s which violates DP compliance. Reduce the interval parameter, such that link training can be finished within 5 seconds. Fixes: afca033f10d3 ("drm/amd/display: Add periodic detection for IPS") Reviewed-by: Roman Li Signed-off-by: Fangzhi Zuo Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index ffa4d3965b4b02..171099a3ea05a0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -35,7 +35,7 @@ #include "amdgpu_dm_trace.h" #include "amdgpu_dm_debugfs.h" -#define HPD_DETECTION_PERIOD_uS 5000000 +#define HPD_DETECTION_PERIOD_uS 2000000 #define HPD_DETECTION_TIME_uS 100000 void amdgpu_dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) From b04200432c4730c9bb730a66be46551c83d60263 Mon Sep 17 00:00:00 2001 From: Lohita Mudimela Date: Wed, 28 Aug 2024 17:04:04 +0530 Subject: [PATCH 07/78] drm/amd/display: Refactoring if and endif statements to enable DC_LOGGER [Why] For Header related changes for core [How] Refactoring if and endif statements to enable DC_LOGGER Reviewed-by: Mounika Adhuri Reviewed-by: Alvin Lee Signed-off-by: Lohita Mudimela Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 5 +++-- .../gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c | 6 +++--- .../gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 + .../drm/amd/display/dc/link/protocols/link_dp_capability.c | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index e93df3d6222e68..bc123f1884da32 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -50,12 +50,13 @@ #include "link.h" #include "logger_types.h" + + +#include "yellow_carp_offset.h" #undef DC_LOGGER #define DC_LOGGER \ clk_mgr->base.base.ctx->logger -#include "yellow_carp_offset.h" - #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c index 29eff386505ab5..91d872d6d392b1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c @@ -53,9 +53,6 @@ #include "logger_types.h" -#undef DC_LOGGER -#define DC_LOGGER \ - clk_mgr->base.base.ctx->logger #define MAX_INSTANCE 7 @@ -77,6 +74,9 @@ static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, { { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } }, { { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } }; +#undef DC_LOGGER +#define DC_LOGGER \ + clk_mgr->base.base.ctx->logger #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 7d68006137a97f..90208a5096c033 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -55,6 +55,7 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger + #define regCLK1_CLK_PLL_REQ 0x0237 #define regCLK1_CLK_PLL_REQ_BASE_IDX 0 diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index e05b8fddf2af6e..93b81918216dd1 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -51,9 +51,10 @@ #include "dc_dmub_srv.h" #include "gpio_service_interface.h" +#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ + #define DC_LOGGER \ link->ctx->logger -#define DC_TRACE_LEVEL_MESSAGE(...) /* do nothing */ #ifndef MAX #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) From 4007f07a47de4a277f4760cac3aed1b31d973eea Mon Sep 17 00:00:00 2001 From: Leo Ma Date: Fri, 11 Oct 2024 14:08:34 -0400 Subject: [PATCH 08/78] drm/amd/display: Fix underflow when playing 8K video in full screen mode [Why&How] Flickering observed while playing 8k HEVC-10 bit video in full screen mode with black border. We didn't support this case for subvp. Make change to the existing check to disable subvp for this corner case. Reviewed-by: Alvin Lee Signed-off-by: Leo Ma Signed-off-by: Dillon Varone Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index 7a01a956e4bbdb..138b4b1e42ed7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -859,7 +859,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->immediate_flip = plane_state->flip_immediate; plane->composition.rect_out_height_spans_vactive = - plane_state->dst_rect.height >= stream->timing.v_addressable && + plane_state->dst_rect.height >= stream->src.height && stream->dst.height >= stream->timing.v_addressable; } From c56c0aca0a0ebb67cc9a609b4361b36dc2adb7c3 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Wed, 16 Oct 2024 14:11:35 -0400 Subject: [PATCH 09/78] drm/amd/display: fix handling of max_downscale_src_width fail check in SPL [Why] If max_downscale_src_width check fails, we exit early from spl_calculate_scaler_params but dscl_prog_data is not fully populated. If viewport is left at 0, it can cause crash in dml. [How] Call spl_set_dscl_prog_data before we exit early from spl_calculate_scaler_params to populate dscl_prog_data Populate taps in spl_get_optimal_number_of_taps Reviewed-by: Alvin Lee Signed-off-by: Samson Tam Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index f043c7e32e169e..403fd1221803bf 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -882,8 +882,13 @@ static bool spl_get_optimal_number_of_taps( if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && - spl_scratch->scl_data.viewport.width > max_downscale_src_width) + spl_scratch->scl_data.viewport.width > max_downscale_src_width) { + memcpy(&spl_scratch->scl_data.taps, in_taps, sizeof(struct spl_taps)); + *enable_easf_v = false; + *enable_easf_h = false; + *enable_isharp = false; return false; + } /* Disable adaptive scaler and sharpener when integer scaling is enabled */ if (spl_in->scaling_quality.integer_scaling) { @@ -1765,12 +1770,12 @@ bool spl_calculate_scaler_params(struct spl_in *spl_in, struct spl_out *spl_out) // Clamp spl_clamp_viewport(&spl_scratch.scl_data.viewport); - if (!res) - return res; - // Save all calculated parameters in dscl_prog_data structure to program hw registers spl_set_dscl_prog_data(spl_in, &spl_scratch, spl_out, enable_easf_v, enable_easf_h, enable_isharp); + if (!res) + return res; + if (spl_in->lls_pref == LLS_PREF_YES) { if (spl_in->is_hdr_on) setup = HDR_L; From b0814fa3be76a8c62cbb9e02bb851b0ec234037d Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 16 Oct 2024 12:18:39 -0600 Subject: [PATCH 10/78] drm/amd/display: Remove useless assignments and variables [WHAT & HOW] misc0, temp and split_pipe are assigned but immediately re-assigned to other values. The early assignments are useless and are removed. Unused variables are removed as well. This fixes 5 UNUSED_VALUE issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 1 - drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c | 2 -- drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c | 2 -- drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c | 2 -- .../drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c | 2 -- 5 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index f0417ee6fcf8e8..f90fc154549a80 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1013,7 +1013,6 @@ static bool dc_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx) r2 = test_pipe->plane_res.scl_data.recout; r2_r = r2.x + r2.width; r2_b = r2.y + r2.height; - split_pipe = test_pipe; /** * There is another half plane on same layer because of diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index 5c2825bc9a8766..d199e4ed2e59e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -277,7 +277,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute( uint32_t misc1 = 0; uint32_t h_blank; uint32_t h_back_porch; - uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; uint8_t dynamic_range_rgb = 0; /*full range*/ uint8_t dynamic_range_ycbcr = 1; /*bt709*/ @@ -380,7 +379,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute( break; } - misc0 = misc0 | synchronous_clock; misc0 = colorimetry_bpc << 5; if (REG(DP_MSA_TIMING_PARAM1)) { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c index db7557a1c61347..8a3fbf95c48f2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c @@ -76,7 +76,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C_MAS mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_C, value); - temp = 0; value = 0; temp = address.low_part >> UNP_GRPH_PRIMARY_SURFACE_ADDRESS_C__GRPH_PRIMARY_SURFACE_ADDRESS_C__SHIFT; @@ -112,7 +111,6 @@ UNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L__GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L_MAS mmUNP_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH_L, value); - temp = 0; value = 0; temp = address.low_part >> UNP_GRPH_PRIMARY_SURFACE_ADDRESS_L__GRPH_PRIMARY_SURFACE_ADDRESS_L__SHIFT; diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c index f496e952ceecb8..d01a8b8f95954e 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn10/dcn10_stream_encoder.c @@ -255,7 +255,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint32_t misc1 = 0; uint32_t h_blank; uint32_t h_back_porch; - uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; uint8_t dp_pixel_encoding = 0; uint8_t dp_component_depth = 0; @@ -362,7 +361,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( break; } - misc0 = misc0 | synchronous_clock; misc0 = colorimetry_bpc << 5; switch (output_color_space) { diff --git a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c index 0a27e0942a1234..098c2a01a85099 100644 --- a/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dio/dcn401/dcn401_dio_stream_encoder.c @@ -447,7 +447,6 @@ void enc401_stream_encoder_dp_set_stream_attribute( uint32_t misc1 = 0; uint32_t h_blank; uint32_t h_back_porch; - uint8_t synchronous_clock = 0; /* asynchronous mode */ uint8_t colorimetry_bpc; uint8_t dp_pixel_encoding = 0; uint8_t dp_component_depth = 0; @@ -603,7 +602,6 @@ void enc401_stream_encoder_dp_set_stream_attribute( break; } - misc0 = misc0 | synchronous_clock; misc0 = colorimetry_bpc << 5; switch (output_color_space) { From d2bf27be839e89c6fd24b3ad3a2b38dcbfbf378a Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 16 Oct 2024 12:23:58 -0600 Subject: [PATCH 11/78] drm/amd/display: Simplify dcn35_is_ips_supported() [WHAT & HOW] The variable "ips_supported" is redundant and we can return from dcn35_smu_get_ips_supported directly. This fixes 1 UNUSED_VALUE issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 90208a5096c033..1d4fe0de0f672d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -978,11 +978,8 @@ static void dcn35_exit_low_power_state(struct clk_mgr *clk_mgr_base) static bool dcn35_is_ips_supported(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - bool ips_supported = true; - ips_supported = dcn35_smu_get_ips_supported(clk_mgr) ? true : false; - - return ips_supported; + return dcn35_smu_get_ips_supported(clk_mgr) ? true : false; } static void dcn35_init_clocks_fpga(struct clk_mgr *clk_mgr) From 7ef6f3ae4cd21a4ab86e04c7f11a6bdd92332b60 Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Thu, 17 Oct 2024 15:56:30 -0400 Subject: [PATCH 12/78] drm/amd/display: Change MPC Tree visual confirm colours [Why] MPC background colours that use fractional components look different if MPC OGAM is in use vs in bypass mode. The current red and orange colours look very similar when OGAM is in bypass, so the colours need to change to be consistently very easy to tell apart. [How] Use colours that only have 0 or MAX values in each component Reviewed-by: Alvin Lee Signed-off-by: Joshua Aberback Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 2fdcf8d59b9f53..0419ee7f22a53b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -312,11 +312,11 @@ void get_mpctree_visual_confirm_color( { const struct tg_color pipe_colors[6] = { {MAX_TG_COLOR_VALUE, 0, 0}, /* red */ - {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE / 4, 0}, /* orange */ {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* yellow */ {0, MAX_TG_COLOR_VALUE, 0}, /* green */ + {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* cyan */ {0, 0, MAX_TG_COLOR_VALUE}, /* blue */ - {MAX_TG_COLOR_VALUE / 2, 0, MAX_TG_COLOR_VALUE / 2}, /* purple */ + {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* magenta */ }; struct pipe_ctx *top_pipe = pipe_ctx; From 1b7ac448cc544f6a4f8543423d9c2b726f3313fd Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Wed, 16 Oct 2024 13:08:02 -0400 Subject: [PATCH 13/78] drm/amd/display: Fix idle optimizations entry log [Why & How] Whether we really enter idle optimizations are decided within DC. Printing into dmesg before calling the DC API gives an incorrect indication that we are entering idle optimization in cases where its disabled manually. To fix this, remove the print in DM and add them in DC Reviewed-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 8 ++------ drivers/gpu/drm/amd/display/dc/core/dc.c | 8 ++++++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 171099a3ea05a0..8b5bea799a2413 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -252,10 +252,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) else if (dm->active_vblank_irq_count) dm->active_vblank_irq_count--; - if (dm->active_vblank_irq_count > 0) { - DRM_DEBUG_KMS("Allow idle optimizations (MALL): false\n"); + if (dm->active_vblank_irq_count > 0) dc_allow_idle_optimizations(dm->dc, false); - } /* * Control PSR based on vblank requirements from OS @@ -274,10 +272,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) vblank_work->stream->link->replay_settings.replay_feature_enabled); } - if (dm->active_vblank_irq_count == 0) { - DRM_DEBUG_KMS("Allow idle optimizations (MALL): true\n"); + if (dm->active_vblank_irq_count == 0) dc_allow_idle_optimizations(dm->dc, true); - } mutex_unlock(&dm->dc_lock); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 5a12fc75f97ff0..8a52fef46785c0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5429,8 +5429,10 @@ bool dc_set_ips_disable(struct dc *dc, unsigned int disable_ips) void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const *caller_name) { - if (dc->debug.disable_idle_power_optimizations) + if (dc->debug.disable_idle_power_optimizations) { + DC_LOG_DEBUG("%s: disabled\n", __func__); return; + } if (allow != dc->idle_optimizations_allowed) DC_LOG_IPS("%s: allow_idle old=%d new=%d (caller=%s)\n", __func__, @@ -5447,8 +5449,10 @@ void dc_allow_idle_optimizations_internal(struct dc *dc, bool allow, char const return; if (dc->hwss.apply_idle_power_optimizations && dc->clk_mgr != NULL && - dc->hwss.apply_idle_power_optimizations(dc, allow)) + dc->hwss.apply_idle_power_optimizations(dc, allow)) { dc->idle_optimizations_allowed = allow; + DC_LOG_DEBUG("%s: %s\n", __func__, allow ? "enabled" : "disabled"); + } } void dc_exit_ips_for_hw_access_internal(struct dc *dc, const char *caller_name) From 558cec793e73e5d22c96c56b1f70c83a8ce4b672 Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Tue, 15 Oct 2024 18:20:54 -0400 Subject: [PATCH 14/78] drm/amd/display: Do not read DSC state if not in use [why & how] DSC may be power gated when coming out of S0i3, so avoid polling DSC registers since it will fail anyways. Only read if it is known that DSC is in use. Reviewed-by: Charlene Liu Signed-off-by: Ovidiu Bunea Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index bd309dbdf7b2a7..3cb4e99074113e 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -841,6 +841,7 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) uint32_t num_opps = 0; uint32_t opp_id_src0 = OPP_ID_INVALID; uint32_t opp_id_src1 = OPP_ID_INVALID; + uint32_t optc_dsc_state = 0; // Step 1: To find out which OPTC is running & OPTC DSC is ON // We can't use res_pool->res_cap->num_timing_generator to check @@ -849,7 +850,6 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) // Some ASICs would be fused display pipes less than the default setting. // In dcnxx_resource_construct function, driver would obatin real information. for (i = 0; i < dc->res_pool->timing_generator_count; i++) { - uint32_t optc_dsc_state = 0; struct timing_generator *tg = dc->res_pool->timing_generators[i]; if (tg->funcs->is_tg_enabled(tg)) { @@ -864,15 +864,18 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) } } - // Step 2: To power down DSC but skip DSC of running OPTC + // Step 2: To power down DSC but skip DSC of running OPTC for (i = 0; i < dc->res_pool->res_cap->num_dsc; i++) { struct dcn_dsc_state s = {0}; - dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], &s); + /* avoid reading DSC state when it is not in use as it may be power gated */ + if (optc_dsc_state) { + dc->res_pool->dscs[i]->funcs->dsc_read_state(dc->res_pool->dscs[i], &s); - if ((s.dsc_opp_source == opp_id_src0 || s.dsc_opp_source == opp_id_src1) && - s.dsc_clock_en && s.dsc_fw_en) - continue; + if ((s.dsc_opp_source == opp_id_src0 || s.dsc_opp_source == opp_id_src1) && + s.dsc_clock_en && s.dsc_fw_en) + continue; + } pg_cntl->funcs->dsc_pg_control(pg_cntl, dc->res_pool->dscs[i]->inst, false); } From f3928f3d481920c748328192ec2ed4ab5d125d6b Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Fri, 18 Oct 2024 00:26:41 -0400 Subject: [PATCH 15/78] drm/amd/display: store sharpness 1dlut table in dscl_prog_data [Why] Previously dscl_prog_data stored pointer to sharpness 1dlut table. SPL had four pre-generated tables, one for each setup. This allowed us to minimize number of times we had to recalculate table when switching between setups. However, with dual display, this becomes an issue because for a given setup, we could have a different per app sharpness value than the global sharpness value. So the pre-generated table will change but both displays may point to the same table and one of them will have the wrong sharpness setting. [How] Store the sharpness 1dlut table in dscl_prog_data. This ensures that each display can have its own sharpness setting. Reviewed-by: Ilya Bakoulin Signed-off-by: Samson Tam Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c | 3 ++- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 3 ++- drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h | 1 - drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c index 5105fd580017c3..2f92e7d4981bae 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c @@ -1091,7 +1091,8 @@ void dpp401_dscl_set_scaler_manual_scale(struct dpp *dpp_base, /* ISHARP_DELTA_LUT */ dpp401_dscl_set_isharp_filter(dpp, scl_data->dscl_prog_data.isharp_delta); dpp->scl_data.dscl_prog_data.sharpness_level = scl_data->dscl_prog_data.sharpness_level; - dpp->scl_data.dscl_prog_data.isharp_delta = scl_data->dscl_prog_data.isharp_delta; + memcpy(dpp->scl_data.dscl_prog_data.isharp_delta, scl_data->dscl_prog_data.isharp_delta, + sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE); if (memcmp(&dpp->scl_data, scl_data, sizeof(*scl_data)) == 0) return; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 403fd1221803bf..133906e73a65c0 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -1607,7 +1607,8 @@ static void spl_set_isharp_data(struct dscl_prog_data *dscl_prog_data, spl_build_isharp_1dlut_from_reference_curve(ratio, setup, adp_sharpness, scale_to_sharpness_policy); - dscl_prog_data->isharp_delta = spl_get_pregen_filter_isharp_1D_lut(setup); + memcpy(dscl_prog_data->isharp_delta, spl_get_pregen_filter_isharp_1D_lut(setup), + sizeof(uint32_t) * ISHARP_LUT_TABLE_SIZE); dscl_prog_data->sharpness_level = adp_sharpness.sharpness_level; dscl_prog_data->isharp_en = 1; // ISHARP_EN diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h index afcc66206ca2a7..89af91e19b6ce8 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_isharp_filters.h @@ -7,7 +7,6 @@ #include "dc_spl_types.h" -#define ISHARP_LUT_TABLE_SIZE 32 const uint32_t *spl_get_filter_isharp_1D_lut_0(void); const uint32_t *spl_get_filter_isharp_1D_lut_0p5x(void); const uint32_t *spl_get_filter_isharp_1D_lut_1p0x(void); diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index fcb5d389592bef..8b00ccb1dfdad0 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -250,6 +250,7 @@ enum isharp_en { ISHARP_DISABLE, ISHARP_ENABLE }; +#define ISHARP_LUT_TABLE_SIZE 32 // Below struct holds values that can be directly used to program // hardware registers. No conversion/clamping is required struct dscl_prog_data { @@ -400,7 +401,7 @@ struct dscl_prog_data { uint32_t isharp_nl_en; // ISHARP_NL_EN ? TODO:check this struct isharp_lba isharp_lba; // ISHARP_LBA struct isharp_fmt isharp_fmt; // ISHARP_FMT - const uint32_t *isharp_delta; + uint32_t isharp_delta[ISHARP_LUT_TABLE_SIZE]; struct isharp_nldelta_sclip isharp_nldelta_sclip; // ISHARP_NLDELTA_SCLIP /* blur and scale filter */ const uint16_t *filter_blur_scale_v; From 12cfb5d8eaefbb594dbb0a5a58874e8c5aefba13 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Sun, 20 Oct 2024 03:35:34 -0400 Subject: [PATCH 16/78] drm/amd/display: [FW Promotion] Release 0.0.240.0 Add some scruct for secure display. Acked-by: Wayne Lin Signed-off-by: Taimur Hassan Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 53 +++++++++++++++---- 1 file changed, 44 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 6d96a840d24d6a..3aa6c60588b5f5 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -727,6 +727,7 @@ enum dmub_shared_state_feature_id { DMUB_SHARED_SHARE_FEATURE__INVALID = 0, DMUB_SHARED_SHARE_FEATURE__IPS_FW = 1, DMUB_SHARED_SHARE_FEATURE__IPS_DRIVER = 2, + DMUB_SHARED_SHARE_FEATURE__DEBUG_SETUP = 3, DMUB_SHARED_STATE_FEATURE__LAST, /* Total number of features. */ }; @@ -764,6 +765,14 @@ union dmub_shared_state_ips_driver_signals { */ #define DMUB_SHARED_STATE__IPS_FW_VERSION 1 +struct dmub_shared_state_debug_setup { + union { + struct { + uint32_t exclude_points[62]; + } profile_mode; + }; +}; + /** * struct dmub_shared_state_ips_fw - Firmware state for IPS. */ @@ -816,6 +825,7 @@ struct dmub_shared_state_feature_block { struct dmub_shared_state_feature_common common; /**< Generic data */ struct dmub_shared_state_ips_fw ips_fw; /**< IPS firmware state */ struct dmub_shared_state_ips_driver ips_driver; /**< IPS driver state */ + struct dmub_shared_state_debug_setup debug_setup; /**< Debug setup */ } data; /**< Shared state data. */ }; /* 256-bytes, fixed */ @@ -1158,6 +1168,10 @@ enum dmub_gpint_command { * RETURN: Total residency in microseconds - upper 32 bits */ DMUB_GPINT__GET_IPS_RESIDENCY_DURATION_US_HI = 133, + /** + * DESC: Setup debug configs. + */ + DMUB_GPINT__SETUP_DEBUG_MODE = 136, }; /** @@ -5172,7 +5186,34 @@ struct dmub_rb_cmd_get_usbc_cable_id { enum dmub_cmd_secure_display_type { DMUB_CMD__SECURE_DISPLAY_TEST_CMD = 0, /* test command to only check if inbox message works */ DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE, - DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY + DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY, + DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_STOP_UPDATE, + DMUB_CMD__SECURE_DISPLAY_MULTIPLE_CRC_WIN_NOTIFY +}; + +#define MAX_ROI_NUM 2 + +struct dmub_cmd_roi_info { + uint16_t x_start; + uint16_t x_end; + uint16_t y_start; + uint16_t y_end; + uint8_t otg_id; + uint8_t phy_id; +}; + +struct dmub_cmd_roi_window_ctl { + uint16_t x_start; + uint16_t x_end; + uint16_t y_start; + uint16_t y_end; + bool enable; +}; + +struct dmub_cmd_roi_ctl_info { + uint8_t otg_id; + uint8_t phy_id; + struct dmub_cmd_roi_window_ctl roi_ctl[MAX_ROI_NUM]; }; /** @@ -5183,14 +5224,8 @@ struct dmub_rb_cmd_secure_display { /** * Data passed from driver to dmub firmware. */ - struct dmub_cmd_roi_info { - uint16_t x_start; - uint16_t x_end; - uint16_t y_start; - uint16_t y_end; - uint8_t otg_id; - uint8_t phy_id; - } roi_info; + struct dmub_cmd_roi_info roi_info; + struct dmub_cmd_roi_ctl_info mul_roi_ctl; }; /** From cc1977d86e0109de03efe02682faf3775af56fb8 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 20 Oct 2024 21:41:45 -0400 Subject: [PATCH 17/78] drm/amd/display: 3.2.307 This version brings along following fixes: - Fix polling DSC registers during S0i3 - Fix idle optimizations entry log - Change MPC Tree visual confirm colours - Fix underflow when playing 8K video in full screen mode - Optimize power up sequence for specific OLED Acked-by: Wayne Lin Signed-off-by: Aric Cyr Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 412cdb01a61a60..72adbab589f5c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.306" +#define DC_VER "3.2.307" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 0880f58f9609f0200483a49429af0f050d281703 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 25 Oct 2024 15:56:39 +0100 Subject: [PATCH 18/78] drm/amd/pm: Vangogh: Fix kernel memory out of bounds write KASAN reports that the GPU metrics table allocated in vangogh_tables_init() is not large enough for the memset done in smu_cmn_init_soft_gpu_metrics(). Condensed report follows: [ 33.861314] BUG: KASAN: slab-out-of-bounds in smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu] [ 33.861799] Write of size 168 at addr ffff888129f59500 by task mangoapp/1067 ... [ 33.861808] CPU: 6 UID: 1000 PID: 1067 Comm: mangoapp Tainted: G W 6.12.0-rc4 #356 1a56f59a8b5182eeaf67eb7cb8b13594dd23b544 [ 33.861816] Tainted: [W]=WARN [ 33.861818] Hardware name: Valve Galileo/Galileo, BIOS F7G0107 12/01/2023 [ 33.861822] Call Trace: [ 33.861826] [ 33.861829] dump_stack_lvl+0x66/0x90 [ 33.861838] print_report+0xce/0x620 [ 33.861853] kasan_report+0xda/0x110 [ 33.862794] kasan_check_range+0xfd/0x1a0 [ 33.862799] __asan_memset+0x23/0x40 [ 33.862803] smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.863306] vangogh_get_gpu_metrics_v2_4+0x123/0xad0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.864257] vangogh_common_get_gpu_metrics+0xb0c/0xbc0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.865682] amdgpu_dpm_get_gpu_metrics+0xcc/0x110 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.866160] amdgpu_get_gpu_metrics+0x154/0x2d0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779] [ 33.867135] dev_attr_show+0x43/0xc0 [ 33.867147] sysfs_kf_seq_show+0x1f1/0x3b0 [ 33.867155] seq_read_iter+0x3f8/0x1140 [ 33.867173] vfs_read+0x76c/0xc50 [ 33.867198] ksys_read+0xfb/0x1d0 [ 33.867214] do_syscall_64+0x90/0x160 ... [ 33.867353] Allocated by task 378 on cpu 7 at 22.794876s: [ 33.867358] kasan_save_stack+0x33/0x50 [ 33.867364] kasan_save_track+0x17/0x60 [ 33.867367] __kasan_kmalloc+0x87/0x90 [ 33.867371] vangogh_init_smc_tables+0x3f9/0x840 [amdgpu] [ 33.867835] smu_sw_init+0xa32/0x1850 [amdgpu] [ 33.868299] amdgpu_device_init+0x467b/0x8d90 [amdgpu] [ 33.868733] amdgpu_driver_load_kms+0x19/0xf0 [amdgpu] [ 33.869167] amdgpu_pci_probe+0x2d6/0xcd0 [amdgpu] [ 33.869608] local_pci_probe+0xda/0x180 [ 33.869614] pci_device_probe+0x43f/0x6b0 Empirically we can confirm that the former allocates 152 bytes for the table, while the latter memsets the 168 large block. Root cause appears that when GPU metrics tables for v2_4 parts were added it was not considered to enlarge the table to fit. The fix in this patch is rather "brute force" and perhaps later should be done in a smarter way, by extracting and consolidating the part version to size logic to a common helper, instead of brute forcing the largest possible allocation. Nevertheless, for now this works and fixes the out of bounds write. v2: * Drop impossible v3_0 case. (Mario) Signed-off-by: Tvrtko Ursulin Fixes: 41cec40bc9ba ("drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire gpu_metrics") Cc: Mario Limonciello Cc: Evan Quan Cc: Wenyou Yang Cc: Alex Deucher Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241025145639.19124-1-tursulin@igalia.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index a333ab827f48aa..6c43724c01dd7a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -242,7 +242,9 @@ static int vangogh_tables_init(struct smu_context *smu) goto err0_out; smu_table->metrics_time = 0; - smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2)); + smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2); + smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_3)); + smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_4)); smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL); if (!smu_table->gpu_metrics_table) goto err1_out; From 58a8c756fc4ca243fb5c070e1b9e0970f00757d9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 27 Sep 2024 16:05:21 +0800 Subject: [PATCH 19/78] drm/amdgpu: correct the S3 abort check condition In the normal S3 entry, the TOS cycle counter is not reset during BIOS execution the _S3 method, so it doesn't determine whether the _S3 method is executed exactly. Howerver, the PM core performs the S3 suspend will set the PM_SUSPEND_FLAG_FW_RESUME bit if all the devices suspend successfully. Therefore, drivers can check the pm_suspend_global_flags bit(1) to detect the S3 suspend abort event. Fixes: 6704dbf71928 ("drm/amdgpu: update suspend status for aborting from deeper suspend") Signed-off-by: Prike Liang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 93e44e7ee3fabe..af739b13b6b597 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -578,16 +578,13 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) { - u32 sol_reg; - - sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - /* Will reset for the following suspend abort cases. - * 1) Only reset limit on APU side, dGPU hasn't checked yet. - * 2) S3 suspend abort and TOS already launched. + * 1) Only reset on APU side, dGPU hasn't checked yet. + * 2) S3 suspend aborted in the normal S3 suspend or + * performing pm core test. */ if (adev->flags & AMD_IS_APU && adev->in_s3 && - sol_reg) { + !pm_resume_via_firmware()) { adev->suspend_complete = false; return true; } else { @@ -603,11 +600,17 @@ static int soc15_asic_reset(struct amdgpu_device *adev) * successfully. So now, temporarily enable it for the * S3 suspend abort case. */ - if (((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) && - !soc15_need_reset_on_resume(adev)) + + if ((adev->apu_flags & AMD_APU_IS_PICASSO || + !(adev->apu_flags & AMD_APU_IS_RAVEN)) && + soc15_need_reset_on_resume(adev)) + goto asic_reset; + + if ((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) return 0; +asic_reset: switch (soc15_asic_reset_method(adev)) { case AMD_RESET_METHOD_PCI: dev_info(adev->dev, "PCI reset\n"); From d5e3d8a2a6cb8b8c8678e60ae8067c18ffbc2da2 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 14 Oct 2024 15:25:35 +0800 Subject: [PATCH 20/78] drm/amdgpu: clean up the suspend_complete To check the status of S3 suspend completion, use the PM core pm_suspend_global_flags bit(1) to detect S3 abort events. Therefore, clean up the AMDGPU driver's private flag suspend_complete. Signed-off-by: Prike Liang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/soc15.c | 7 ++----- drivers/gpu/drm/amd/amdgpu/soc21.c | 5 +++-- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3af5acff8518fe..607998d8b1d52e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1111,8 +1111,6 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; - /* indicate amdgpu suspension status */ - bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46f75651394882..ff3ac30744dca9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2500,7 +2500,6 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2515,7 +2514,6 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 66947850d7e4e5..e9248a855ba770 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3288,8 +3288,8 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) * confirmed that the APU gfx10/gfx11 needn't such update. */ if (adev->flags & AMD_IS_APU && - adev->in_s3 && !adev->suspend_complete) { - DRM_INFO(" Will skip the CSB packet resubmit\n"); + adev->in_s3 && !pm_resume_via_firmware()) { + DRM_INFO("Will skip the CSB packet resubmit\n"); return 0; } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index af739b13b6b597..533b4b2b432d07 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -584,13 +584,10 @@ static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) * performing pm core test. */ if (adev->flags & AMD_IS_APU && adev->in_s3 && - !pm_resume_via_firmware()) { - adev->suspend_complete = false; + !pm_resume_via_firmware()) return true; - } else { - adev->suspend_complete = true; + else return false; - } } static int soc15_asic_reset(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 1c07ebdc0d1fa3..93fbb3354720dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -897,9 +897,10 @@ static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) /* Will reset for the following suspend abort cases. * 1) Only reset dGPU side. * 2) S3 suspend got aborted and TOS is active. + * As for dGPU suspend abort cases the SOL value + * will be kept as zero at this resume point. */ - if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && - !adev->suspend_complete) { + if (!(adev->flags & AMD_IS_APU) && adev->in_s3) { sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); msleep(100); sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); From ea9d8863daa93f2bfd39ce820254a788b1fe0c1f Mon Sep 17 00:00:00 2001 From: Le Ma Date: Fri, 25 Oct 2024 17:43:57 +0800 Subject: [PATCH 21/78] drm/amdgpu: add generic func to check if ta fw is applicable Separated xgmi ta is required for specific APU, and driver needs parse the ta binary properly with aux xgmi ta packed. v2: make the check function more generic (Lijo) Signed-off-by: Le Ma Reviewed-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 34 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 1 + 2 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index abd5e980c9c74e..ae24df65a3df7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3563,6 +3563,36 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) return err; } +static bool is_ta_fw_applicable(struct psp_context *psp, + const struct psp_fw_bin_desc *desc) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t fw_version; + + switch (desc->fw_type) { + case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: + /* for now, AUX TA only exists on 13.0.6 ta bin, + * from v20.00.0x.14 + */ + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 6)) { + fw_version = le32_to_cpu(desc->fw_version); + + if (adev->flags & AMD_IS_APU && + (fw_version & 0xff) >= 0x14) + return desc->fw_type == TA_FW_TYPE_PSP_XGMI_AUX; + else + return desc->fw_type == TA_FW_TYPE_PSP_XGMI; + } + break; + default: + break; + } + + return true; +} + static int parse_ta_bin_descriptor(struct psp_context *psp, const struct psp_fw_bin_desc *desc, const struct ta_firmware_header_v2_0 *ta_hdr) @@ -3572,6 +3602,9 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, if (!psp || !desc || !ta_hdr) return -EINVAL; + if (!is_ta_fw_applicable(psp, desc)) + return 0; + ucode_start_addr = (uint8_t *)ta_hdr + le32_to_cpu(desc->offset_bytes) + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); @@ -3584,6 +3617,7 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, psp->asd_context.bin_desc.start_addr = ucode_start_addr; break; case TA_FW_TYPE_PSP_XGMI: + case TA_FW_TYPE_PSP_XGMI_AUX: psp->xgmi_context.context.bin_desc.fw_version = le32_to_cpu(desc->fw_version); psp->xgmi_context.context.bin_desc.size_bytes = le32_to_cpu(desc->size_bytes); psp->xgmi_context.context.bin_desc.start_addr = ucode_start_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4e23419b92d4eb..4150ec0aa10d65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -163,6 +163,7 @@ enum ta_fw_type { TA_FW_TYPE_PSP_DTM, TA_FW_TYPE_PSP_RAP, TA_FW_TYPE_PSP_SECUREDISPLAY, + TA_FW_TYPE_PSP_XGMI_AUX, TA_FW_TYPE_MAX_INDEX, }; From 7daa0f6b2859201a851f4553bea755cec14acb41 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 25 Oct 2024 13:56:03 +0800 Subject: [PATCH 22/78] drm/amdgpu: optimize ACA log print - skip to print CE ACA log. - optimize ACA log print for MCA. Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 18ee60378727fc..3ca03b5e0f9139 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -348,6 +348,24 @@ static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgp return ret; } +static bool amdgpu_mca_bank_should_dump(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry) +{ + bool ret; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_CE: + ret = amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]); + break; + case AMDGPU_MCA_ERROR_TYPE_UE: + default: + ret = true; + break; + } + + return ret; +} + static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set, struct ras_query_context *qctx) { @@ -373,7 +391,8 @@ static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mc amdgpu_mca_bank_set_add_entry(mca_set, &entry); - amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); + if (amdgpu_mca_bank_should_dump(adev, type, &entry)) + amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx); } return 0; From cb67ff6272eceb5fcb2fe3b74f0293fa0706841a Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 22 Oct 2024 12:30:50 -0400 Subject: [PATCH 23/78] drm/amdkfd: flag per-queue reset support for gfx9 Flag KFD support for per-queue reset on GFX9 devices. Signed-off-by: Jonathan Kim Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 ++ include/uapi/linux/kfd_sysfs.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3871591c9aec98..9476e30d6baa1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1998,6 +1998,8 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 4, 2)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; + + dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; diff --git a/include/uapi/linux/kfd_sysfs.h b/include/uapi/linux/kfd_sysfs.h index 5e8d28617efad4..859b8e91d4d302 100644 --- a/include/uapi/linux/kfd_sysfs.h +++ b/include/uapi/linux/kfd_sysfs.h @@ -60,7 +60,8 @@ #define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000 #define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000 -#define HSA_CAP_RESERVED 0x800f8000 +#define HSA_CAP_PER_QUEUE_RESET_SUPPORTED 0x80000000 +#define HSA_CAP_RESERVED 0x000f8000 /* debug_prop bits in node properties */ #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f From ecfe9b237687a55d596fff0650ccc8cc455edd3f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 09:13:21 -0400 Subject: [PATCH 24/78] drm/amdgpu/smu13: fix profile reporting The following 3 commits landed in parallel: commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave optimization") commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without fullscreen 3D") While everything is set correctly, this caused the profile to be reported incorrectly because both the powersave and fullscreen3d bits were set in the mask and when the driver prints the profile, it looks for the first bit set. Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting") Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 3e2277abc75468..8d25cc1f218f10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2473,7 +2473,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask; + u32 workload_mask, selected_workload_mask; smu->power_profile_mode = input[size]; @@ -2540,7 +2540,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - workload_mask = 1 << workload_type; + selected_workload_mask = workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2560,7 +2560,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask, NULL); if (!ret) - smu->workload_mask = workload_mask; + smu->workload_mask = selected_workload_mask; return ret; } From 8fe7cf58ff0e46769b86b3890d657c8996b86bc6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Oct 2024 10:51:10 -0400 Subject: [PATCH 25/78] drm/amdkfd: add an interface to query whether is KFD is active Add an interface to query whether KFD has any active queues. v2: fix build issues Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 9 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 7 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 25 ++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b545940e512bbf..24343c312480a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -890,6 +890,15 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id) return kgd2kfd_start_sched(adev->kfd.dev, node_id); } +/* check if there are KFD queues active */ +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id) +{ + if (!adev->kfd.init_complete) + return false; + + return kgd2kfd_compute_active(adev->kfd.dev, node_id); +} + /* Config CGTT_SQ_CLK_CTRL */ int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 7e0a220725369c..4b80ad860639c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -268,6 +268,7 @@ int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id); int amdgpu_amdkfd_config_sq_perfmon(struct amdgpu_device *adev, uint32_t xcp_id, bool core_override_enable, bool reg_override_enable, bool perfmon_override_enable); +bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id); /* Read user wptr from a specified user address space with page fault @@ -431,6 +432,7 @@ int kgd2kfd_check_and_lock_kfd(void); void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); #else static inline int kgd2kfd_init(void) { @@ -511,5 +513,10 @@ static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) { return 0; } + +static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + return false; +} #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index fad1c8f2bc8334..348925254bff9c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -1392,6 +1392,13 @@ void kfd_dec_compute_active(struct kfd_node *node) WARN_ONCE(count < 0, "Compute profile ref. count error"); } +static bool kfd_compute_active(struct kfd_node *node) +{ + if (atomic_read(&node->kfd->compute_profile)) + return true; + return false; +} + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) { /* @@ -1485,6 +1492,24 @@ int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id) return node->dqm->ops.halt(node->dqm); } +bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) +{ + struct kfd_node *node; + + if (!kfd->init_complete) + return false; + + if (node_id >= kfd->num_nodes) { + dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", + node_id, kfd->num_nodes - 1); + return false; + } + + node = kfd->nodes[node_id]; + + return kfd_compute_active(node); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS From 370e8fdbb09a4c60d355abd622a9be85428cf0b1 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 4 Oct 2024 23:56:42 +0100 Subject: [PATCH 26/78] drm/amd/display: Remove unused regamma functions calculate_user_regamma_coeff() and calculate_user_regamma_ramp() were added in 2018 in commit 55a01d4023ce ("drm/amd/display: Add user_regamma to color module") but never used. Remove them and their helpers. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../amd/display/modules/color/color_gamma.c | 307 ------------------ .../amd/display/modules/color/color_gamma.h | 11 - 2 files changed, 318 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index 3699e633801d2e..a71df052cf2523 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1399,71 +1399,6 @@ static void scale_gamma_dx(struct pwl_float_data *pwl_rgb, pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b); } -/* todo: all these scale_gamma functions are inherently the same but - * take different structures as params or different format for ramp - * values. We could probably implement it in a more generic fashion - */ -static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb, - const struct regamma_ramp *ramp, - struct dividers dividers) -{ - unsigned short max_driver = 0xFFFF; - unsigned short max_os = 0xFF00; - unsigned short scaler = max_os; - uint32_t i; - struct pwl_float_data *rgb = pwl_rgb; - struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1; - - i = 0; - do { - if (ramp->gamma[i] > max_os || - ramp->gamma[i + 256] > max_os || - ramp->gamma[i + 512] > max_os) { - scaler = max_driver; - break; - } - i++; - } while (i != GAMMA_RGB_256_ENTRIES); - - i = 0; - do { - rgb->r = dc_fixpt_from_fraction( - ramp->gamma[i], scaler); - rgb->g = dc_fixpt_from_fraction( - ramp->gamma[i + 256], scaler); - rgb->b = dc_fixpt_from_fraction( - ramp->gamma[i + 512], scaler); - - ++rgb; - ++i; - } while (i != GAMMA_RGB_256_ENTRIES); - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider1); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider1); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider1); - - ++rgb; - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider2); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider2); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider2); - - ++rgb; - - rgb->r = dc_fixpt_mul(rgb_last->r, - dividers.divider3); - rgb->g = dc_fixpt_mul(rgb_last->g, - dividers.divider3); - rgb->b = dc_fixpt_mul(rgb_last->b, - dividers.divider3); -} - /* * RS3+ color transform DDI - 1D LUT adjustment is composed with regamma here * Input is evenly distributed in the output color space as specified in @@ -1663,106 +1598,6 @@ static bool calculate_interpolated_hardware_curve( return true; } -/* The "old" interpolation uses a complicated scheme to build an array of - * coefficients while also using an array of 0-255 normalized to 0-1 - * Then there's another loop using both of the above + new scaled user ramp - * and we concatenate them. It also searches for points of interpolation and - * uses enums for positions. - * - * This function uses a different approach: - * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255 - * To find index for hwX , we notice the following: - * i/255 <= hwX < (i+1)/255 <=> i <= 255*hwX < i+1 - * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT - * - * Once the index is known, combined Y is simply: - * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index) - * - * We should switch to this method in all cases, it's simpler and faster - * ToDo one day - for now this only applies to ADL regamma to avoid regression - * for regular use cases (sRGB and PQ) - */ -static void interpolate_user_regamma(uint32_t hw_points_num, - struct pwl_float_data *rgb_user, - bool apply_degamma, - struct dc_transfer_func_distributed_points *tf_pts) -{ - uint32_t i; - uint32_t color = 0; - int32_t index; - int32_t index_next; - struct fixed31_32 *tf_point; - struct fixed31_32 hw_x; - struct fixed31_32 norm_factor = - dc_fixpt_from_int(255); - struct fixed31_32 norm_x; - struct fixed31_32 index_f; - struct fixed31_32 lut1; - struct fixed31_32 lut2; - struct fixed31_32 delta_lut; - struct fixed31_32 delta_index; - const struct fixed31_32 one = dc_fixpt_from_int(1); - - i = 0; - /* fixed_pt library has problems handling too small values */ - while (i != 32) { - tf_pts->red[i] = dc_fixpt_zero; - tf_pts->green[i] = dc_fixpt_zero; - tf_pts->blue[i] = dc_fixpt_zero; - ++i; - } - while (i <= hw_points_num + 1) { - for (color = 0; color < 3; color++) { - if (color == 0) - tf_point = &tf_pts->red[i]; - else if (color == 1) - tf_point = &tf_pts->green[i]; - else - tf_point = &tf_pts->blue[i]; - - if (apply_degamma) { - if (color == 0) - hw_x = coordinates_x[i].regamma_y_red; - else if (color == 1) - hw_x = coordinates_x[i].regamma_y_green; - else - hw_x = coordinates_x[i].regamma_y_blue; - } else - hw_x = coordinates_x[i].x; - - if (dc_fixpt_le(one, hw_x)) - hw_x = one; - - norm_x = dc_fixpt_mul(norm_factor, hw_x); - index = dc_fixpt_floor(norm_x); - if (index < 0 || index > 255) - continue; - - index_f = dc_fixpt_from_int(index); - index_next = (index == 255) ? index : index + 1; - - if (color == 0) { - lut1 = rgb_user[index].r; - lut2 = rgb_user[index_next].r; - } else if (color == 1) { - lut1 = rgb_user[index].g; - lut2 = rgb_user[index_next].g; - } else { - lut1 = rgb_user[index].b; - lut2 = rgb_user[index_next].b; - } - - // we have everything now, so interpolate - delta_lut = dc_fixpt_sub(lut2, lut1); - delta_index = dc_fixpt_sub(norm_x, index_f); - - *tf_point = dc_fixpt_add(lut1, - dc_fixpt_mul(delta_index, delta_lut)); - } - ++i; - } -} - static void build_new_custom_resulted_curve( uint32_t hw_points_num, struct dc_transfer_func_distributed_points *tf_pts) @@ -1784,29 +1619,6 @@ static void build_new_custom_resulted_curve( } } -static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma, - uint32_t hw_points_num, struct calculate_buffer *cal_buffer) -{ - uint32_t i; - - struct gamma_coefficients coeff; - struct pwl_float_data_ex *rgb = rgb_regamma; - const struct hw_x_point *coord_x = coordinates_x; - - build_coefficients(&coeff, TRANSFER_FUNCTION_SRGB); - - i = 0; - while (i != hw_points_num + 1) { - rgb->r = translate_from_linear_space_ex( - coord_x->x, &coeff, 0, cal_buffer); - rgb->g = rgb->r; - rgb->b = rgb->r; - ++coord_x; - ++rgb; - ++i; - } -} - static bool map_regamma_hw_to_x_user( const struct dc_gamma *ramp, struct pixel_gamma_point *coeff128, @@ -1855,125 +1667,6 @@ static bool map_regamma_hw_to_x_user( #define _EXTRA_POINTS 3 -bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp) -{ - struct gamma_coefficients coeff; - const struct hw_x_point *coord_x = coordinates_x; - uint32_t i = 0; - - do { - coeff.a0[i] = dc_fixpt_from_fraction( - regamma->coeff.A0[i], 10000000); - coeff.a1[i] = dc_fixpt_from_fraction( - regamma->coeff.A1[i], 1000); - coeff.a2[i] = dc_fixpt_from_fraction( - regamma->coeff.A2[i], 1000); - coeff.a3[i] = dc_fixpt_from_fraction( - regamma->coeff.A3[i], 1000); - coeff.user_gamma[i] = dc_fixpt_from_fraction( - regamma->coeff.gamma[i], 1000); - - ++i; - } while (i != 3); - - i = 0; - /* fixed_pt library has problems handling too small values */ - while (i != 32) { - output_tf->tf_pts.red[i] = dc_fixpt_zero; - output_tf->tf_pts.green[i] = dc_fixpt_zero; - output_tf->tf_pts.blue[i] = dc_fixpt_zero; - ++coord_x; - ++i; - } - while (i != MAX_HW_POINTS + 1) { - output_tf->tf_pts.red[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 0, cal_buffer); - output_tf->tf_pts.green[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 1, cal_buffer); - output_tf->tf_pts.blue[i] = translate_from_linear_space_ex( - coord_x->x, &coeff, 2, cal_buffer); - ++coord_x; - ++i; - } - - if (ramp && ramp->type == GAMMA_CS_TFM_1D) - apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts); - - // this function just clamps output to 0-1 - build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts); - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - - return true; -} - -bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp) -{ - struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; - struct dividers dividers; - - struct pwl_float_data *rgb_user = NULL; - struct pwl_float_data_ex *rgb_regamma = NULL; - bool ret = false; - - if (regamma == NULL) - return false; - - output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - - rgb_user = kcalloc(GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS, - sizeof(*rgb_user), - GFP_KERNEL); - if (!rgb_user) - goto rgb_user_alloc_fail; - - rgb_regamma = kcalloc(MAX_HW_POINTS + _EXTRA_POINTS, - sizeof(*rgb_regamma), - GFP_KERNEL); - if (!rgb_regamma) - goto rgb_regamma_alloc_fail; - - dividers.divider1 = dc_fixpt_from_fraction(3, 2); - dividers.divider2 = dc_fixpt_from_int(2); - dividers.divider3 = dc_fixpt_from_fraction(5, 2); - - scale_user_regamma_ramp(rgb_user, ®amma->ramp, dividers); - - if (regamma->flags.bits.applyDegamma == 1) { - apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS, cal_buffer); - copy_rgb_regamma_to_coordinates_x(coordinates_x, - MAX_HW_POINTS, rgb_regamma); - } - - interpolate_user_regamma(MAX_HW_POINTS, rgb_user, - regamma->flags.bits.applyDegamma, tf_pts); - - // no custom HDR curves! - tf_pts->end_exponent = 0; - tf_pts->x_point_at_y1_red = 1; - tf_pts->x_point_at_y1_green = 1; - tf_pts->x_point_at_y1_blue = 1; - - if (ramp && ramp->type == GAMMA_CS_TFM_1D) - apply_lut_1d(ramp, MAX_HW_POINTS, &output_tf->tf_pts); - - // this function just clamps output to 0-1 - build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts); - - ret = true; - - kfree(rgb_regamma); -rgb_regamma_alloc_fail: - kfree(rgb_user); -rgb_user_alloc_fail: - return ret; -} - bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *input_tf, const struct dc_gamma *ramp, bool map_user_ramp) diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index ee5c466613de7b..97e55278940e23 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -115,15 +115,4 @@ bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); -bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp); - -bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf, - const struct regamma_lut *regamma, - struct calculate_buffer *cal_buffer, - const struct dc_gamma *ramp); - - #endif /* COLOR_MOD_COLOR_GAMMA_H_ */ From 8b89acc0b2baecfe331f5336e7ff1fcc5a44b062 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 9 Oct 2024 01:33:34 +0100 Subject: [PATCH 27/78] drm/amd/display: Remove unused cm3_helper_translate_curve_to_degamma_hw_format cm3_helper_translate_curve_to_degamma_hw_format() since it was added in 2020's commit 03f54d7d3448 ("drm/amd/display: Add DCN3 DPP") Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn30/dcn30_cm_common.c | 151 ------------------ .../display/dc/dwb/dcn30/dcn30_cm_common.h | 4 - 2 files changed, 155 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 1e1038fb04e8dd..0690c346f2c521 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -280,157 +280,6 @@ bool cm3_helper_translate_curve_to_hw_format( return true; } -#define NUM_DEGAMMA_REGIONS 12 - - -bool cm3_helper_translate_curve_to_degamma_hw_format( - const struct dc_transfer_func *output_tf, - struct pwl_params *lut_params) -{ - struct curve_points3 *corner_points; - struct pwl_result_data *rgb_resulted; - struct pwl_result_data *rgb; - struct pwl_result_data *rgb_plus_1; - - int32_t region_start, region_end; - int32_t i; - uint32_t j, k, seg_distr[MAX_REGIONS_NUMBER], increment, start_index, hw_points; - - if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) - return false; - - corner_points = lut_params->corner_points; - rgb_resulted = lut_params->rgb_resulted; - hw_points = 0; - - memset(lut_params, 0, sizeof(struct pwl_params)); - memset(seg_distr, 0, sizeof(seg_distr)); - - region_start = -NUM_DEGAMMA_REGIONS; - region_end = 0; - - - for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) - seg_distr[i] = -1; - /* 12 segments - * segments are from 2^-12 to 0 - */ - for (i = 0; i < NUM_DEGAMMA_REGIONS ; i++) - seg_distr[i] = 4; - - for (k = 0; k < MAX_REGIONS_NUMBER; k++) { - if (seg_distr[k] != -1) - hw_points += (1 << seg_distr[k]); - } - - j = 0; - for (k = 0; k < (region_end - region_start); k++) { - increment = NUMBER_SW_SEGMENTS / (1 << seg_distr[k]); - start_index = (region_start + k + MAX_LOW_POINT) * - NUMBER_SW_SEGMENTS; - for (i = start_index; i < start_index + NUMBER_SW_SEGMENTS; - i += increment) { - if (j == hw_points - 1) - break; - if (i >= TRANSFER_FUNC_POINTS) - return false; - rgb_resulted[j].red = output_tf->tf_pts.red[i]; - rgb_resulted[j].green = output_tf->tf_pts.green[i]; - rgb_resulted[j].blue = output_tf->tf_pts.blue[i]; - j++; - } - } - - /* last point */ - start_index = (region_end + MAX_LOW_POINT) * NUMBER_SW_SEGMENTS; - rgb_resulted[hw_points - 1].red = output_tf->tf_pts.red[start_index]; - rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; - rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - - corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), - dc_fixpt_from_int(region_start)); - corner_points[0].green.x = corner_points[0].red.x; - corner_points[0].blue.x = corner_points[0].red.x; - corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), - dc_fixpt_from_int(region_end)); - corner_points[1].green.x = corner_points[1].red.x; - corner_points[1].blue.x = corner_points[1].red.x; - - corner_points[0].red.y = rgb_resulted[0].red; - corner_points[0].green.y = rgb_resulted[0].green; - corner_points[0].blue.y = rgb_resulted[0].blue; - - /* see comment above, m_arrPoints[1].y should be the Y value for the - * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) - */ - corner_points[1].red.y = rgb_resulted[hw_points - 1].red; - corner_points[1].green.y = rgb_resulted[hw_points - 1].green; - corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue; - corner_points[1].red.slope = dc_fixpt_zero; - corner_points[1].green.slope = dc_fixpt_zero; - corner_points[1].blue.slope = dc_fixpt_zero; - - if (output_tf->tf == TRANSFER_FUNCTION_PQ) { - /* for PQ, we want to have a straight line from last HW X point, - * and the slope to be such that we hit 1.0 at 10000 nits. - */ - const struct fixed31_32 end_value = - dc_fixpt_from_int(125); - - corner_points[1].red.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y), - dc_fixpt_sub(end_value, corner_points[1].red.x)); - corner_points[1].green.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y), - dc_fixpt_sub(end_value, corner_points[1].green.x)); - corner_points[1].blue.slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y), - dc_fixpt_sub(end_value, corner_points[1].blue.x)); - } - - lut_params->hw_points_num = hw_points; - - k = 0; - for (i = 1; i < MAX_REGIONS_NUMBER; i++) { - if (seg_distr[k] != -1) { - lut_params->arr_curve_points[k].segments_num = - seg_distr[k]; - lut_params->arr_curve_points[i].offset = - lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]); - } - k++; - } - - if (seg_distr[k] != -1) - lut_params->arr_curve_points[k].segments_num = seg_distr[k]; - - rgb = rgb_resulted; - rgb_plus_1 = rgb_resulted + 1; - - i = 1; - while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; - - rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); - rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); - rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue); - - ++rgb_plus_1; - ++rgb; - ++i; - } - cm3_helper_convert_to_custom_float(rgb_resulted, - lut_params->corner_points, - hw_points, false); - - return true; -} - bool cm3_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, struct curve_points3 *corner_points, diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h index bd98b327a6c70e..b86347c9b0389d 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_cm_common.h @@ -63,10 +63,6 @@ bool cm3_helper_translate_curve_to_hw_format( const struct dc_transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint); -bool cm3_helper_translate_curve_to_degamma_hw_format( - const struct dc_transfer_func *output_tf, - struct pwl_params *lut_params); - bool cm3_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, struct curve_points3 *corner_points, From 5fd95dab6094ba0b851767fc460c2806eaafe8bd Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 10 Oct 2024 21:51:54 +0100 Subject: [PATCH 28/78] drm/amd/display: Remove last parts of timing_trace Commit c2c2ce1e9623 ("drm/amd/display: Optimize passive update planes.") removed the last caller of context_timing_trace. Remove it. With that gone, no one is now looking at the 'timing_trace' flag, remove it and all the places that set it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_debug.c | 42 ------------------- drivers/gpu/drm/amd/display/dc/dc.h | 1 - .../dc/resource/dcn10/dcn10_resource.c | 2 - .../dc/resource/dcn20/dcn20_resource.c | 1 - .../dc/resource/dcn201/dcn201_resource.c | 1 - .../dc/resource/dcn21/dcn21_resource.c | 1 - .../dc/resource/dcn30/dcn30_resource.c | 1 - .../dc/resource/dcn301/dcn301_resource.c | 1 - .../dc/resource/dcn302/dcn302_resource.c | 1 - .../dc/resource/dcn303/dcn303_resource.c | 1 - .../dc/resource/dcn31/dcn31_resource.c | 1 - .../dc/resource/dcn314/dcn314_resource.c | 1 - .../dc/resource/dcn315/dcn315_resource.c | 1 - .../dc/resource/dcn316/dcn316_resource.c | 1 - .../dc/resource/dcn32/dcn32_resource.c | 1 - .../dc/resource/dcn321/dcn321_resource.c | 1 - .../dc/resource/dcn35/dcn35_resource.c | 1 - .../dc/resource/dcn351/dcn351_resource.c | 1 - .../dc/resource/dcn401/dcn401_resource.c | 1 - .../amd/display/include/logger_interface.h | 4 -- 20 files changed, 65 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 801cdbc8117d9b..0bb25c53724386 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -46,11 +46,6 @@ DC_LOG_IF_TRACE(__VA_ARGS__); \ } while (0) -#define TIMING_TRACE(...) do {\ - if (dc->debug.timing_trace) \ - DC_LOG_SYNC(__VA_ARGS__); \ -} while (0) - #define CLOCK_TRACE(...) do {\ if (dc->debug.clock_trace) \ DC_LOG_BANDWIDTH_CALCS(__VA_ARGS__); \ @@ -306,43 +301,6 @@ void post_surface_trace(struct dc *dc) } -void context_timing_trace( - struct dc *dc, - struct resource_context *res_ctx) -{ - int i; - int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0}; - struct crtc_position position; - unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; - DC_LOGGER_INIT(dc->ctx->logger); - - - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - /* get_position() returns CRTC vertical/horizontal counter - * hence not applicable for underlay pipe - */ - if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) - continue; - - pipe_ctx->stream_res.tg->funcs->get_position(pipe_ctx->stream_res.tg, &position); - h_pos[i] = position.horizontal_count; - v_pos[i] = position.vertical_count; - } - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - - if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) - continue; - - TIMING_TRACE("OTG_%d H_tot:%d V_tot:%d H_pos:%d V_pos:%d\n", - pipe_ctx->stream_res.tg->inst, - pipe_ctx->stream->timing.h_total, - pipe_ctx->stream->timing.v_total, - h_pos[i], v_pos[i]); - } -} - void context_clock_trace( struct dc *dc, struct dc_state *context) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 72adbab589f5c5..76130bbf2fe4b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -868,7 +868,6 @@ struct dc_debug_options { bool sanity_checks; bool max_disp_clk; bool surface_trace; - bool timing_trace; bool clock_trace; bool validation_trace; bool bandwidth_calcs_trace; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c index 05d6d41ef9d31d..4f1bd71b9ad9fb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn10/dcn10_resource.c @@ -533,7 +533,6 @@ static const struct dc_debug_options debug_defaults_drv = { .sanity_checks = true, .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, /* raven smu dones't allow 0 disp clk, @@ -563,7 +562,6 @@ static const struct dc_debug_options debug_defaults_drv = { static const struct dc_debug_options debug_defaults_diags = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = true, .clock_trace = true, .disable_stutter = true, .disable_pplib_clock_request = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 288189913e1ec8..189d0c85872e6f 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -706,7 +706,6 @@ static const struct resource_caps res_cap_nv14 = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 15180ad71513d9..d3d67d36652308 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -600,7 +600,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 14b28841657d21..021ba8ac5c8c9c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -610,7 +610,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = false, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index baa4e2647dadf4..cd31e4f16c14b4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -711,7 +711,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, //No DMCU on DCN30 .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index d8a7c2cf05de3e..a9816affd312db 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -682,7 +682,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_dpp_power_gate = false, .disable_hubp_power_gate = false, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 40c20b04635a01..02af8b8f4d277b 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -81,7 +81,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index daf1b65fd08810..7002a8dd358a55 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -82,7 +82,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 36bb26182e117a..f71a5b8286b2df 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -858,7 +858,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index 58a5fbcf22bfbc..8aa10da68432ea 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -876,7 +876,6 @@ static const struct dc_debug_options debug_defaults_drv = { .replay_skip_crtc_disabled = true, .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_dpp_power_gate = false, .disable_hubp_power_gate = false, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index 3acad708c31b8f..6c3295259a81ed 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -858,7 +858,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, /*hw not support it*/ .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index ce56f5d162c028..6edaaadcb173b8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -853,7 +853,6 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_z10 = true, /*hw not support it*/ .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_DYNAMIC, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index aaaa888d112dac..01d1a11d554553 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -689,7 +689,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, // Due to CRB, no need to MPC split anymore diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 35acc13cb5a9cc..5cb74fd9cb7d29 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -686,7 +686,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 795f2c71c70fba..6cc2960b6104e6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -712,7 +712,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 0b8dc2eff596e6..d87e2641cda1af 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -692,7 +692,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 306b4117e21937..db93bac247c0f4 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -685,7 +685,6 @@ static const struct dc_plane_cap plane_cap = { static const struct dc_debug_options debug_defaults_drv = { .disable_dmcu = true, .force_abm_enable = false, - .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, .pipe_split_policy = MPC_SPLIT_AVOID, diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h index 02c23b04d34be3..058f882d5bddc9 100644 --- a/drivers/gpu/drm/amd/display/include/logger_interface.h +++ b/drivers/gpu/drm/amd/display/include/logger_interface.h @@ -52,10 +52,6 @@ void update_surface_trace( void post_surface_trace(struct dc *dc); -void context_timing_trace( - struct dc *dc, - struct resource_context *res_ctx); - void context_clock_trace( struct dc *dc, struct dc_state *context); From efe6a8774375ddcbdd46fb920be55cc2d0120836 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 14 Oct 2024 11:58:34 -0400 Subject: [PATCH 29/78] drm/amdgpu: fix fairness in enforce isolation handling Make sure KFD gets a turn when serializing access to the GC IP. Currently non-KFD jobs can starve KFD if they submit often enough. This patch prevents that by stalling non-KFD if its time period has elapsed. v2: fix units v3: check enablement properly Acked-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 53 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 607998d8b1d52e..7645e498faa411 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -118,7 +118,7 @@ #define MAX_GPU_INSTANCE 64 -#define GFX_SLICE_PERIOD msecs_to_jiffies(250) +#define GFX_SLICE_PERIOD_MS 250 struct amdgpu_gpu_instance { struct amdgpu_device *adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e96984c53e72cd..b8cc4b146bdc64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1752,7 +1752,7 @@ static void amdgpu_gfx_kfd_sch_ctrl(struct amdgpu_device *adev, u32 idx, if (adev->gfx.kfd_sch_req_count[idx] == 0 && adev->gfx.kfd_sch_inactive[idx]) { schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(adev->gfx.enforce_isolation_time[idx])); } } else { if (adev->gfx.kfd_sch_req_count[idx] == 0) { @@ -1807,8 +1807,9 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); } if (fences) { + /* we've already had our timeslice, so let's wrap this up */ schedule_delayed_work(&adev->gfx.enforce_isolation[idx].work, - GFX_SLICE_PERIOD); + msecs_to_jiffies(1)); } else { /* Tell KFD to resume the runqueue */ if (adev->kfd.init_complete) { @@ -1821,6 +1822,51 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) mutex_unlock(&adev->enforce_isolation_mutex); } +static void +amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, + u32 idx) +{ + unsigned long cjiffies; + bool wait = false; + + mutex_lock(&adev->enforce_isolation_mutex); + if (adev->enforce_isolation[idx]) { + /* set the initial values if nothing is set */ + if (!adev->gfx.enforce_isolation_jiffies[idx]) { + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + /* Make sure KFD gets a chance to run */ + if (amdgpu_amdkfd_compute_active(adev, idx)) { + cjiffies = jiffies; + if (time_after(cjiffies, adev->gfx.enforce_isolation_jiffies[idx])) { + cjiffies -= adev->gfx.enforce_isolation_jiffies[idx]; + if ((jiffies_to_msecs(cjiffies) >= GFX_SLICE_PERIOD_MS)) { + /* if our time is up, let KGD work drain before scheduling more */ + wait = true; + /* reset the timer period */ + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } else { + /* set the timer period to what's left in our time slice */ + adev->gfx.enforce_isolation_time[idx] = + GFX_SLICE_PERIOD_MS - jiffies_to_msecs(cjiffies); + } + } else { + /* if jiffies wrap around we will just wait a little longer */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + } + } else { + /* if there is no KFD work, then set the full slice period */ + adev->gfx.enforce_isolation_jiffies[idx] = jiffies; + adev->gfx.enforce_isolation_time[idx] = GFX_SLICE_PERIOD_MS; + } + } + mutex_unlock(&adev->enforce_isolation_mutex); + + if (wait) + msleep(GFX_SLICE_PERIOD_MS); +} + void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -1837,6 +1883,9 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) if (idx >= MAX_XCP) return; + /* Don't submit more work until KFD has had some time */ + amdgpu_gfx_enforce_isolation_wait_for_kfd(adev, idx); + mutex_lock(&adev->enforce_isolation_mutex); if (adev->enforce_isolation[idx]) { if (adev->kfd.init_complete) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f710178a21bc77..af9dbd760fee2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -472,6 +472,8 @@ struct amdgpu_gfx { struct mutex kfd_sch_mutex; u64 kfd_sch_req_count[MAX_XCP]; bool kfd_sch_inactive[MAX_XCP]; + unsigned long enforce_isolation_jiffies[MAX_XCP]; + unsigned long enforce_isolation_time[MAX_XCP]; }; struct amdgpu_gfx_ras_reg_entry { From 35984fd4a093ccb9e0bb82db4cac5c1bf2df7d93 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Oct 2024 14:13:58 -0400 Subject: [PATCH 30/78] drm/amdgpu: add ring reset messages Add messages to make it clear when a per ring reset happens. This is helpful for debugging and aligns with other reset methods. v2: add ring name in success/fail messages (Lijo) Reviewed-by: Lijo Lazar Reviewed-by: Kent Russell (v1) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 102742f1faa243..cbae2fc7b94e0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -137,6 +137,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* attempt a per ring reset */ if (amdgpu_gpu_recovery && ring->funcs->reset) { + dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); /* stop the scheduler, but don't mess with the * bad job yet because if ring reset fails * we'll fall back to full GPU reset. @@ -150,8 +151,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_fence_driver_force_completion(ring); if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched); + dev_err(adev->dev, "Ring %s reset success\n", ring->sched.name); goto exit; } + dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); } if (amdgpu_device_should_recover_gpu(ring->adev)) { From a1144da794adedb9447437c57d69add56494309d Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Wed, 30 Oct 2024 04:27:58 +0800 Subject: [PATCH 31/78] drm/amdgpu: Fix the memory allocation issue in amdgpu_discovery_get_nps_info() Fix two issues with memory allocation in amdgpu_discovery_get_nps_info() for mem_ranges: - Add a check for allocation failure to avoid dereferencing a null pointer. - As suggested by Christophe, use kvcalloc() for memory allocation, which checks for multiplication overflow. Additionally, assign the output parameters nps_type and range_cnt after the kvcalloc() call to prevent modifying the output parameters in case of an error return. Fixes: b194d21b9bcc ("drm/amdgpu: Use NPS ranges from discovery table") Suggested-by: Christophe JAILLET Reviewed-by: Lijo Lazar Signed-off-by: Li Huafei Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 73f4d56c5de4ac..1040204ac8b97c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1795,11 +1795,13 @@ int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev, switch (le16_to_cpu(nps_info->v1.header.version_major)) { case 1: + mem_ranges = kvcalloc(nps_info->v1.count, + sizeof(*mem_ranges), + GFP_KERNEL); + if (!mem_ranges) + return -ENOMEM; *nps_type = nps_info->v1.nps_type; *range_cnt = nps_info->v1.count; - mem_ranges = kvzalloc( - *range_cnt * sizeof(struct amdgpu_gmc_memrange), - GFP_KERNEL); for (i = 0; i < *range_cnt; i++) { mem_ranges[i].base_address = nps_info->v1.instance_info[i].base_address; From f2863650384b32f1a511e338f102b819044ca930 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Thu, 24 Oct 2024 10:23:38 +0100 Subject: [PATCH 32/78] drm/amdgpu: make drm-memory-* report resident memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old behavior reports the resident memory usage for this key and the documentation say so as well. However this was accidentally changed to include buffers that was evicted. Fixes: 04bdba46542c ("drm/amdgpu: Use drm_print_memory_stats helper from fdinfo") Signed-off-by: Yunxiang Li Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 00a4ab082459f2..8281dd45faaa0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_vm.h" @@ -95,11 +96,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) /* Legacy amdgpu keys, alias to drm-resident-memory-: */ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].total/1024UL); + stats[TTM_PL_VRAM].drm.resident/1024UL); drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", - stats[TTM_PL_TT].total/1024UL); + stats[TTM_PL_TT].drm.resident/1024UL); drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", - stats[TTM_PL_SYSTEM].total/1024UL); + stats[TTM_PL_SYSTEM].drm.resident/1024UL); /* Amdgpu specific memory accounting keys: */ drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 1e6a044e3143b7..d41686a8e5ec02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1224,7 +1224,6 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, /* DRM stats common fields: */ - stats[type].total += size; if (drm_gem_object_is_shared_for_memory_stats(obj)) stats[type].drm.shared += size; else diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7260349917ef06..a5653f474f85cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -142,7 +142,6 @@ struct amdgpu_bo_vm { struct amdgpu_mem_stats { struct drm_memory_stats drm; - uint64_t total; uint64_t visible; uint64_t evicted; uint64_t evicted_visible; From fdee0872a29fe86e8450ab00838b9c0533388733 Mon Sep 17 00:00:00 2001 From: Yunxiang Li Date: Thu, 24 Oct 2024 10:23:39 +0100 Subject: [PATCH 33/78] drm/amdgpu: stop tracking visible memory stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since on modern systems all of vram can be made visible anyways, to simplify the new implementation, drops tracking how much memory is visible for now. If this is really needed we can add it back on top of the new implementation, or just report all the BOs as visible. Signed-off-by: Yunxiang Li Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 6 ------ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 12 ++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 10 ---------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 ++++++++++- 4 files changed, 12 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 8281dd45faaa0f..7a9573958d87ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -103,16 +103,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) stats[TTM_PL_SYSTEM].drm.resident/1024UL); /* Amdgpu specific memory accounting keys: */ - drm_printf(p, "amd-memory-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].visible/1024UL); drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].evicted/1024UL); - drm_printf(p, "amd-evicted-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].evicted_visible/1024UL); drm_printf(p, "amd-requested-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].requested/1024UL); - drm_printf(p, "amd-requested-visible-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].requested_visible/1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats[TTM_PL_TT].requested/1024UL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index d41686a8e5ec02..0d3fb6b4212e6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,6 +40,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" +#include "amdgpu_vm.h" /** * DOC: amdgpu_object @@ -1236,23 +1237,14 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, stats[type].drm.active += size; else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) stats[type].drm.purgeable += size; - - if (type == TTM_PL_VRAM && amdgpu_res_cpu_visible(adev, res)) - stats[type].visible += size; } /* amdgpu specific stats: */ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { stats[TTM_PL_VRAM].requested += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].requested_visible += size; - - if (type != TTM_PL_VRAM) { + if (type != TTM_PL_VRAM) stats[TTM_PL_VRAM].evicted += size; - if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - stats[TTM_PL_VRAM].evicted_visible += size; - } } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { stats[TTM_PL_TT].requested += size; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index a5653f474f85cf..be6769852ece4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -139,16 +139,6 @@ struct amdgpu_bo_vm { struct amdgpu_vm_bo_base entries[]; }; -struct amdgpu_mem_stats { - struct drm_memory_stats drm; - - uint64_t visible; - uint64_t evicted; - uint64_t evicted_visible; - uint64_t requested; - uint64_t requested_visible; -}; - static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) { return container_of(tbo, struct amdgpu_bo, tbo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index c5b41e3ed14f2c..5d119ac26c4fe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -42,7 +42,6 @@ struct amdgpu_bo_va; struct amdgpu_job; struct amdgpu_bo_list_entry; struct amdgpu_bo_vm; -struct amdgpu_mem_stats; /* * GPUVM handling @@ -322,6 +321,16 @@ struct amdgpu_vm_fault_info { unsigned int vmhub; }; +struct amdgpu_mem_stats { + struct drm_memory_stats drm; + + /* buffers that requested this placement */ + uint64_t requested; + /* buffers that requested this placement + * but are currently evicted */ + uint64_t evicted; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; From cd3037f3fce5bf1556ad1a078cf458ebe52b12e8 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 24 Oct 2024 10:23:40 +0100 Subject: [PATCH 34/78] drm/amdgpu: Stop reporting special chip memory pools as CPU memory in fdinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far these specialized on chip memory pools were reported as system memory (aka 'cpu') which is not correct and misleading. Lets remove that and consider later making them visible as their own thing. Signed-off-by: Tvrtko Ursulin Suggested-by: Christian König Cc: Yunxiang Li Cc: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0d3fb6b4212e6b..b5f65ef1efcdee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1209,17 +1209,6 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, type = res->mem_type; } - /* Squash some into 'cpu' to keep the legacy userspace view. */ - switch (type) { - case TTM_PL_VRAM: - case TTM_PL_TT: - case TTM_PL_SYSTEM: - break; - default: - type = TTM_PL_SYSTEM; - break; - } - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) return; From aa2ac51c8e1952ff95588d082e1a8b402c510ed0 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 24 Oct 2024 10:23:41 +0100 Subject: [PATCH 35/78] drm/amdgpu: Expose special on chip memory pools in fdinfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past these specialized on chip memory pools were reported as system memory (aka 'cpu') which was not correct and misleading. That has since been removed so lets make them visible as their own respective memory regions. Reviewed-by: Christian König Signed-off-by: Tvrtko Ursulin Cc: Christian König Cc: Yunxiang Li Cc: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 7a9573958d87ce..df2cf5c3392554 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -66,6 +66,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [TTM_PL_VRAM] = "vram", [TTM_PL_TT] = "gtt", [TTM_PL_SYSTEM] = "cpu", + [AMDGPU_PL_GDS] = "gds", + [AMDGPU_PL_GWS] = "gws", + [AMDGPU_PL_OA] = "oa", + [AMDGPU_PL_DOORBELL] = "doorbell", }; unsigned int hw_ip, i; int ret; @@ -87,12 +91,16 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid); - for (i = 0; i < TTM_PL_PRIV; i++) + for (i = 0; i < ARRAY_SIZE(pl_name); i++) { + if (!pl_name[i]) + continue; + drm_print_memory_stats(p, &stats[i].drm, DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_PURGEABLE, pl_name[i]); + } /* Legacy amdgpu keys, alias to drm-resident-memory-: */ drm_printf(p, "drm-memory-vram:\t%llu KiB\n", From 0174c0791c042a357e54f91c68f58142e69c3584 Mon Sep 17 00:00:00 2001 From: "jeffbai@aosc.io" Date: Thu, 24 Oct 2024 21:07:18 +0800 Subject: [PATCH 36/78] drm/amdgpu: fix comment about amdgpu.abmlevel defaults Since 040fdcde288a ("drm/amdgpu: respect the abmlevel module parameter value if it is set"), the default value for amdgpu.abmlevel was set to -1, or auto. However, the comment explaining the default value was not updated to reflect the change (-1, or auto; not -1, or disabled). Clarify that the default value (-1) means auto. Fixes: 040fdcde288a ("drm/amdgpu: respect the abmlevel module parameter value if it is set") Reported-by: Ruikai Liu Signed-off-by: Mingcong Bai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ff3ac30744dca9..054d1837e56620 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -887,7 +887,7 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to -1, or disabled. Userspace can only override this level after + * Defaults to -1, or auto. Userspace can only override this level after * boot if it's set to auto. */ int amdgpu_dm_abm_level = -1; From b95264cf75bd8840b10733c50678d154c02b5431 Mon Sep 17 00:00:00 2001 From: R Sundar Date: Sun, 27 Oct 2024 19:35:37 +0530 Subject: [PATCH 37/78] drm/amdgpu: use string choice helpers Use string choice helpers for better readability. Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202410161814.I6p2Nnux-lkp@intel.com/ Signed-off-by: R Sundar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index 35fee3e8cde2b1..8cd69836dd9939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -200,7 +200,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, dev_err_ratelimited(&i2c_adap->dev, "maddr:0x%04X size:0x%02X:quirk max_%s_len must be > %d", eeprom_addr, buf_size, - read ? "read" : "write", EEPROM_OFFSET_SIZE); + str_read_write(read), EEPROM_OFFSET_SIZE); return -EINVAL; } From d29bd94c4fc9f10e043a5a207c902b4261cb7fd7 Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Fri, 18 Oct 2024 14:55:21 -0400 Subject: [PATCH 38/78] drm/amd/display: Do Not Fallback To SW Cursor If HW Cursor Required [Why/How] Tearing can occur if there is a flip immediate plane and SW cursor. check_subvp_sw_cursor_fallback_req falls back to SW cursor if the stream has the potential to use subVP. Check for fallback not needed if HW cursor is required. e.g. Fullscreen gaming Reviewed-by: Dillon Varone Signed-off-by: Austin Zheng Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 5d233c09d23989..8b0632104aef07 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -292,7 +292,9 @@ bool dc_stream_set_cursor_attributes( * 2. If not subvp high refresh, for single display cases, if resolution is >= 5K and refresh rate < 120hz * 3. If not subvp high refresh, for multi display cases, if resolution is >= 4K and refresh rate < 120hz */ - if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) { + if (dc->debug.allow_sw_cursor_fallback && + attributes->height * attributes->width * 4 > 16384 && + !stream->hw_cursor_req) { if (check_subvp_sw_cursor_fallback_req(dc, stream)) return false; } From d1fd30e511a70911151dc9f71c705e1fab175cef Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Tue, 22 Oct 2024 18:42:56 -0400 Subject: [PATCH 39/78] drm/amd/display: avoid divided by zero [why] insert divided by zero protection Reviewed-by: Alvin Lee Signed-off-by: Charlene Liu Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index fc4268729017e2..f980a84dceefce 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -129,6 +129,9 @@ unsigned int mod_freesync_calc_v_total_from_refresh( unsigned int v_total; unsigned int frame_duration_in_ns; + if (refresh_in_uhz == 0) + return stream->timing.v_total; + frame_duration_in_ns = ((unsigned int)(div64_u64((1000000000ULL * 1000000), refresh_in_uhz))); From d7b86a002cf7e1b55ec311c11264f70d079860b9 Mon Sep 17 00:00:00 2001 From: Ausef Yousof Date: Wed, 23 Oct 2024 13:24:11 -0400 Subject: [PATCH 40/78] Revert "drm/amd/display: Block UHBR Based On USB-C PD Cable ID" This reverts commit 4f01a68751194d05280d659a65758c09e4af04d6. [why & how] The offending commit caused a lighting issue for Samsung Odyssey G9 monitors when connecting via USB-C. The commit was intended to block certain UHBR rates. Reviewed-by: Charlene Liu Signed-off-by: Ausef Yousof Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/link/protocols/link_dp_capability.c | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 93b81918216dd1..72ef0c3a7ebd1b 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1417,8 +1417,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id) if (!link->ctx->dmub_srv || link->ep_type != DISPLAY_ENDPOINT_PHY || - link->link_enc->features.flags.bits.DP_IS_USB_C == 0 || - link->link_enc->features.flags.bits.IS_DP2_CAPABLE == 0) + link->link_enc->features.flags.bits.DP_IS_USB_C == 0) return false; memset(&cmd, 0, sizeof(cmd)); @@ -1431,9 +1430,7 @@ static bool get_usbc_cable_id(struct dc_link *link, union dp_cable_id *cable_id) cable_id->raw = cmd.cable_id.data.output_raw; DC_LOG_DC("usbc_cable_id = %d.\n", cable_id->raw); } - - ASSERT(cmd.cable_id.header.ret_status); - return true; + return cmd.cable_id.header.ret_status == 1; } static void retrieve_cable_id(struct dc_link *link) @@ -2130,8 +2127,6 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link) /* get max link encoder capability */ if (link_enc) link_enc->funcs->get_max_link_cap(link_enc, &max_link_cap); - else - return max_link_cap; /* Lower link settings based on sink's link cap */ if (link->reported_link_cap.lane_count < max_link_cap.lane_count) @@ -2165,15 +2160,10 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link) */ cable_max_link_rate = get_cable_max_link_rate(link); - if (!link->dc->debug.ignore_cable_id) { - if (cable_max_link_rate != LINK_RATE_UNKNOWN) - // cable max link rate known - max_link_cap.link_rate = MIN(max_link_cap.link_rate, cable_max_link_rate); - else if (link_enc->funcs->is_in_alt_mode && link_enc->funcs->is_in_alt_mode(link_enc)) - // cable max link rate ambiguous, DP alt mode, limit to HBR3 - max_link_cap.link_rate = MIN(max_link_cap.link_rate, LINK_RATE_HIGH3); - //else {} - // cable max link rate ambiguous, DP, do nothing + if (!link->dc->debug.ignore_cable_id && + cable_max_link_rate != LINK_RATE_UNKNOWN) { + if (cable_max_link_rate < max_link_cap.link_rate) + max_link_cap.link_rate = cable_max_link_rate; if (!link->dpcd_caps.cable_id.bits.UHBR13_5_CAPABILITY && link->dpcd_caps.cable_id.bits.CABLE_TYPE >= 2) From 215b6dd7e026fdc32290c61e6f4298587f807e2c Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Thu, 24 Oct 2024 08:01:23 +0200 Subject: [PATCH 41/78] drm/amd/display: fix rxstatus_msg_sz type narrowing [Why] Code reading rxstatus message size was incorrectly assigning it to uint8_t, despite the value being 10 bits long (lower byte plus lowest 2 bits from upper byte). This caused the highest 2 bits to be ignored, potentially missing invalid values. [How] Change all local variables holding rxstatus message size from uint8_t to uint16_t, as in mod_hdcp_message_hdcp2::rx_id_list_size. Replaced untyped HDCP_2_2_HMID_RXSTATUS_MSG_SZ_HI macro with function hdcp_2_2_hmid_rxstatus_msg_sz(const uint8_t[2]) to encapsulate entire calculation and return a typed result. Removed spaces mixed with tabs to fix indentation on modified lines. Reviewed-by: Wenjing Liu Signed-off-by: Dominik Kaszewski Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/modules/hdcp/hdcp2_execution.c | 31 +++++++------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c index c996365e84b015..1d41dd58f6bce1 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp2_execution.c @@ -27,6 +27,11 @@ #include "hdcp.h" +static inline uint16_t get_hdmi_rxstatus_msg_size(const uint8_t rxstatus[2]) +{ + return HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(rxstatus[1]) << 8 | rxstatus[0]; +} + static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp *hdcp) { uint8_t is_ready = 0; @@ -35,8 +40,7 @@ static inline enum mod_hdcp_status check_receiver_id_list_ready(struct mod_hdcp is_ready = HDCP_2_2_DP_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus_dp) ? 1 : 0; else is_ready = (HDCP_2_2_HDMI_RXSTATUS_READY(hdcp->auth.msg.hdcp2.rxstatus[1]) && - (HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0])) ? 1 : 0; + get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus) != 0) ? 1 : 0; return is_ready ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_RX_ID_LIST_NOT_READY; } @@ -84,15 +88,13 @@ static inline enum mod_hdcp_status check_link_integrity_failure_dp( static enum mod_hdcp_status check_ake_cert_available(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; - uint16_t size; if (is_dp_hdcp(hdcp)) { status = MOD_HDCP_STATUS_SUCCESS; } else { status = mod_hdcp_read_rxstatus(hdcp); if (status == MOD_HDCP_STATUS_SUCCESS) { - size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_cert)) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_AKE_CERT_PENDING; @@ -104,7 +106,6 @@ static enum mod_hdcp_status check_ake_cert_available(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; - uint8_t size; status = mod_hdcp_read_rxstatus(hdcp); if (status != MOD_HDCP_STATUS_SUCCESS) @@ -115,8 +116,7 @@ static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp) MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING; } else { - size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_h_prime)) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_H_PRIME_PENDING; @@ -128,7 +128,6 @@ static enum mod_hdcp_status check_h_prime_available(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; - uint8_t size; status = mod_hdcp_read_rxstatus(hdcp); if (status != MOD_HDCP_STATUS_SUCCESS) @@ -139,8 +138,7 @@ static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp) MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING; } else { - size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); status = (size == sizeof(hdcp->auth.msg.hdcp2.ake_pairing_info)) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_PAIRING_INFO_PENDING; @@ -152,7 +150,6 @@ static enum mod_hdcp_status check_pairing_info_available(struct mod_hdcp *hdcp) static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp) { enum mod_hdcp_status status = MOD_HDCP_STATUS_FAILURE; - uint8_t size; uint16_t max_wait = 20; // units of ms uint16_t num_polls = 5; uint16_t wait_time = max_wait / num_polls; @@ -167,8 +164,7 @@ static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp) if (status != MOD_HDCP_STATUS_SUCCESS) break; - size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); status = (size == sizeof(hdcp->auth.msg.hdcp2.lc_l_prime)) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_L_PRIME_PENDING; @@ -181,7 +177,6 @@ static enum mod_hdcp_status poll_l_prime_available(struct mod_hdcp *hdcp) static enum mod_hdcp_status check_stream_ready_available(struct mod_hdcp *hdcp) { enum mod_hdcp_status status; - uint8_t size; if (is_dp_hdcp(hdcp)) { status = MOD_HDCP_STATUS_INVALID_OPERATION; @@ -189,8 +184,7 @@ static enum mod_hdcp_status check_stream_ready_available(struct mod_hdcp *hdcp) status = mod_hdcp_read_rxstatus(hdcp); if (status != MOD_HDCP_STATUS_SUCCESS) goto out; - size = HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + const uint16_t size = get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); status = (size == sizeof(hdcp->auth.msg.hdcp2.repeater_auth_stream_ready)) ? MOD_HDCP_STATUS_SUCCESS : MOD_HDCP_STATUS_HDCP2_STREAM_READY_PENDING; @@ -249,8 +243,7 @@ static uint8_t process_rxstatus(struct mod_hdcp *hdcp, sizeof(hdcp->auth.msg.hdcp2.rx_id_list); else hdcp->auth.msg.hdcp2.rx_id_list_size = - HDCP_2_2_HDMI_RXSTATUS_MSG_SZ_HI(hdcp->auth.msg.hdcp2.rxstatus[1]) << 8 | - hdcp->auth.msg.hdcp2.rxstatus[0]; + get_hdmi_rxstatus_msg_size(hdcp->auth.msg.hdcp2.rxstatus); } out: return (*status == MOD_HDCP_STATUS_SUCCESS); From 69603bfcffc887fdfb5f8e877849e400958fb72d Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 23 Oct 2024 12:53:37 -0400 Subject: [PATCH 42/78] drm/amd/display: Remove inaccessible registers from DMU diagnostics [Why] SEC_CNTL isn't readable by x86 and can block Z8 entry if read. [How] Remove the read. Reviewed-by: Ovidiu Bunea Reviewed-by: Charlene Liu Signed-off-by: Nicholas Kazlauskas Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c index 3be315f1a44348..e5e77bd3c31ea1 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn35.c @@ -464,7 +464,7 @@ uint32_t dmub_dcn35_get_current_time(struct dmub_srv *dmub) void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) { - uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; + uint32_t is_dmub_enabled, is_soft_reset; uint32_t is_traceport_enabled, is_cw6_enabled; if (!dmub || !diag_data) @@ -514,9 +514,6 @@ void dmub_dcn35_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnosti REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &is_soft_reset); diag_data->is_dmcub_soft_reset = is_soft_reset; - REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); - diag_data->is_dmcub_secure_reset = is_sec_reset; - REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); diag_data->is_traceport_en = is_traceport_enabled; From 9626890e56f70eeb863c2960c105afd0df0c73be Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Sat, 19 Oct 2024 22:07:31 -0400 Subject: [PATCH 43/78] drm/amd/display: fix asserts in SPL during bootup [Why] During mode validation, there maybe modes that fail max_downscale_src_width check and scaling_quality taps are 0. This will cause an assert to trigger in spl_set_filters_data() because taps are 0. [How] Move taps calculation for non-adaptive scaling mode to separate function and call it if max_downscale_src_width fails. This will populate taps if scaling_quality taps are 0. Reviewed-by: Alvin Lee Signed-off-by: Samson Tam Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 86 ++++++++++++--------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 133906e73a65c0..9095da7b842bc5 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -868,6 +868,50 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, return enable_isharp; } +/* Calculate number of tap with adaptive scaling off */ +static void spl_get_taps_non_adaptive_scaler( + struct spl_scratch *spl_scratch, const struct spl_taps *in_taps) +{ + if (in_taps->h_taps == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) + spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz), 8); + else + spl_scratch->scl_data.taps.h_taps = 4; + } else + spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; + + if (in_taps->v_taps == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) + spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert, 2)), 8); + else + spl_scratch->scl_data.taps.v_taps = 4; + } else + spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; + + if (in_taps->v_taps_c == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) + spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( + spl_scratch->scl_data.ratios.vert_c, 2)), 8); + else + spl_scratch->scl_data.taps.v_taps_c = 4; + } else + spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; + + if (in_taps->h_taps_c == 0) { + if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) + spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.horz_c), 8); + else + spl_scratch->scl_data.taps.h_taps_c = 4; + } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) + /* Only 1 and even h_taps_c are supported by hw */ + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; + else + spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; +} + /* Calculate optimal number of taps */ static bool spl_get_optimal_number_of_taps( int max_downscale_src_width, struct spl_in *spl_in, struct spl_scratch *spl_scratch, @@ -883,7 +927,7 @@ static bool spl_get_optimal_number_of_taps( if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && spl_scratch->scl_data.viewport.width > max_downscale_src_width) { - memcpy(&spl_scratch->scl_data.taps, in_taps, sizeof(struct spl_taps)); + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); *enable_easf_v = false; *enable_easf_h = false; *enable_isharp = false; @@ -910,43 +954,9 @@ static bool spl_get_optimal_number_of_taps( * From programming guide: taps = min{ ceil(2*H_RATIO,1), 8} for downscaling * taps = 4 for upscaling */ - if (skip_easf) { - if (in_taps->h_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz) > 1) - spl_scratch->scl_data.taps.h_taps = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz), 8); - else - spl_scratch->scl_data.taps.h_taps = 4; - } else - spl_scratch->scl_data.taps.h_taps = in_taps->h_taps; - if (in_taps->v_taps == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) - spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps = 4; - } else - spl_scratch->scl_data.taps.v_taps = in_taps->v_taps; - if (in_taps->v_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) - spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert_c, 2)), 8); - else - spl_scratch->scl_data.taps.v_taps_c = 4; - } else - spl_scratch->scl_data.taps.v_taps_c = in_taps->v_taps_c; - if (in_taps->h_taps_c == 0) { - if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.horz_c) > 1) - spl_scratch->scl_data.taps.h_taps_c = spl_min(2 * spl_fixpt_ceil( - spl_scratch->scl_data.ratios.horz_c), 8); - else - spl_scratch->scl_data.taps.h_taps_c = 4; - } else if ((in_taps->h_taps_c % 2) != 0 && in_taps->h_taps_c != 1) - /* Only 1 and even h_taps_c are supported by hw */ - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c - 1; - else - spl_scratch->scl_data.taps.h_taps_c = in_taps->h_taps_c; - } else { + if (skip_easf) + spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); + else { if (spl_is_yuv420(spl_in->basic_in.format)) { spl_scratch->scl_data.taps.h_taps = 6; spl_scratch->scl_data.taps.v_taps = 6; From 7875afafba84817b791be6d2282b836695146060 Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Wed, 9 Oct 2024 17:09:38 +0800 Subject: [PATCH 44/78] drm/amd/display: Fix brightness level not retained over reboot [Why] During boot up and resume the DC layer will reset the panel brightness to fix a flicker issue. It will cause the dm->actual_brightness is not the current panel brightness level. (the dm->brightness is the correct panel level) [How] Set the backlight level after do the set mode. Cc: Mario Limonciello Cc: Alex Deucher Fixes: d9e865826c20 ("drm/amd/display: Simplify brightness initialization") Reported-by: Mark Herbert Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3655 Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 75d6b90104f8fe..a07ed83b186bc5 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9442,6 +9442,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, bool mode_set_reset_required = false; u32 i; struct dc_commit_streams_params params = {dc_state->streams, dc_state->stream_count}; + bool set_backlight_level = false; /* Disable writeback */ for_each_old_connector_in_state(state, connector, old_con_state, i) { @@ -9561,6 +9562,7 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->hw_mode = new_crtc_state->mode; crtc->hwmode = new_crtc_state->mode; mode_set_reset_required = true; + set_backlight_level = true; } else if (modereset_required(new_crtc_state)) { drm_dbg_atomic(dev, "Atomic commit: RESET. crtc id %d:[%p]\n", @@ -9612,6 +9614,19 @@ static void amdgpu_dm_commit_streams(struct drm_atomic_state *state, acrtc->otg_inst = status->primary_otg_inst; } } + + /* During boot up and resume the DC layer will reset the panel brightness + * to fix a flicker issue. + * It will cause the dm->actual_brightness is not the current panel brightness + * level. (the dm->brightness is the correct panel level) + * So we set the backlight level with dm->brightness value after set mode + */ + if (set_backlight_level) { + for (i = 0; i < dm->num_of_edps; i++) { + if (dm->backlight_dev[i]) + amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); + } + } } static void dm_set_writeback(struct amdgpu_display_manager *dm, From 820a84edd4c8224d2397fc9637dda41224755b25 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Fri, 25 Oct 2024 11:28:33 -0400 Subject: [PATCH 45/78] drm/amd/display: SPL cleanup [Why & How] Move from pointer to callback to reference callback directly Missed renaming fixpt functions with spl prefix Reviewed-by: Navid Assadian Signed-off-by: Samson Tam Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dc_spl_translate.c | 14 ++++---- drivers/gpu/drm/amd/display/dc/spl/dc_spl.c | 2 +- .../gpu/drm/amd/display/dc/spl/dc_spl_types.h | 4 +-- .../drm/amd/display/dc/spl/spl_fixpt31_32.c | 34 +++++++++---------- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 24aa9df892f3a3..c8d8e335fa37a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -8,13 +8,13 @@ #include "dcn32/dcn32_dpp.h" #include "dcn401/dcn401_dpp.h" -static struct spl_funcs dcn2_spl_funcs = { +static struct spl_callbacks dcn2_spl_callbacks = { .spl_calc_lb_num_partitions = dscl2_spl_calc_lb_num_partitions, }; -static struct spl_funcs dcn32_spl_funcs = { +static struct spl_callbacks dcn32_spl_callbacks = { .spl_calc_lb_num_partitions = dscl32_spl_calc_lb_num_partitions, }; -static struct spl_funcs dcn401_spl_funcs = { +static struct spl_callbacks dcn401_spl_callbacks = { .spl_calc_lb_num_partitions = dscl401_spl_calc_lb_num_partitions, }; static void populate_splrect_from_rect(struct spl_rect *spl_rect, const struct rect *rect) @@ -77,16 +77,16 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl // This is used to determine the vtap support switch (plane_state->ctx->dce_version) { case DCN_VERSION_2_0: - spl_in->funcs = &dcn2_spl_funcs; + spl_in->callbacks = dcn2_spl_callbacks; break; case DCN_VERSION_3_2: - spl_in->funcs = &dcn32_spl_funcs; + spl_in->callbacks = dcn32_spl_callbacks; break; case DCN_VERSION_4_01: - spl_in->funcs = &dcn401_spl_funcs; + spl_in->callbacks = dcn401_spl_callbacks; break; default: - spl_in->funcs = &dcn2_spl_funcs; + spl_in->callbacks = dcn2_spl_callbacks; } // Make format field from spl_in point to plane_res scl_data format spl_in->basic_in.format = (enum spl_pixel_format)pipe_ctx->plane_res.scl_data.format; diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 9095da7b842bc5..a29a9f131e046d 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -981,7 +981,7 @@ static bool spl_get_optimal_number_of_taps( else lb_config = LB_MEMORY_CONFIG_0; // Determine max vtap support by calculating how much line buffer can fit - spl_in->funcs->spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, + spl_in->callbacks.spl_calc_lb_num_partitions(spl_in->basic_out.alpha_en, &spl_scratch->scl_data, lb_config, &num_part_y, &num_part_c); /* MAX_V_TAPS = MIN (NUM_LINES - MAX(CEILING(V_RATIO,1)-2, 0), 8) */ if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 2) diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 8b00ccb1dfdad0..55d557df4aa5bf 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -497,7 +497,7 @@ enum scale_to_sharpness_policy { SCALE_TO_SHARPNESS_ADJ_YUV = 1, SCALE_TO_SHARPNESS_ADJ_ALL = 2 }; -struct spl_funcs { +struct spl_callbacks { void (*spl_calc_lb_num_partitions) (bool alpha_en, const struct spl_scaler_data *scl_data, @@ -518,7 +518,7 @@ struct spl_in { // Basic slice information int odm_slice_index; // ODM Slice Index using get_odm_split_index struct spl_taps scaling_quality; // Explicit Scaling Quality - struct spl_funcs *funcs; + struct spl_callbacks callbacks; // Inputs for isharp and EASF struct adaptive_sharpness adaptive_sharpness; // Adaptive Sharpness enum linear_light_scaling lls_pref; // Linear Light Scaling diff --git a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c index 5fd79d9c67e200..131f1e3949d33f 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c +++ b/drivers/gpu/drm/amd/display/dc/spl/spl_fixpt31_32.c @@ -22,7 +22,7 @@ static inline unsigned long long abs_i64( * result = dividend / divisor * *remainder = dividend % divisor */ -static inline unsigned long long complete_integer_division_u64( +static inline unsigned long long spl_complete_integer_division_u64( unsigned long long dividend, unsigned long long divisor, unsigned long long *remainder) @@ -60,7 +60,7 @@ struct spl_fixed31_32 spl_fixpt_from_fraction(long long numerator, long long den /* determine integer part */ - unsigned long long res_value = complete_integer_division_u64( + unsigned long long res_value = spl_complete_integer_division_u64( arg1_value, arg2_value, &remainder); SPL_ASSERT(res_value <= (unsigned long long)LONG_MAX); @@ -286,7 +286,7 @@ struct spl_fixed31_32 spl_fixpt_cos(struct spl_fixed31_32 arg) * * Calculated as Taylor series. */ -static struct spl_fixed31_32 fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) +static struct spl_fixed31_32 spl_fixed31_32_exp_from_taylor_series(struct spl_fixed31_32 arg) { unsigned int n = 9; @@ -345,14 +345,14 @@ struct spl_fixed31_32 spl_fixpt_exp(struct spl_fixed31_32 arg) if (m > 0) return spl_fixpt_shl( - fixed31_32_exp_from_taylor_series(r), + spl_fixed31_32_exp_from_taylor_series(r), (unsigned char)m); else return spl_fixpt_div_int( - fixed31_32_exp_from_taylor_series(r), + spl_fixed31_32_exp_from_taylor_series(r), 1LL << -m); } else if (arg.value != 0) - return fixed31_32_exp_from_taylor_series(arg); + return spl_fixed31_32_exp_from_taylor_series(arg); else return spl_fixpt_one; } @@ -396,7 +396,7 @@ struct spl_fixed31_32 spl_fixpt_log(struct spl_fixed31_32 arg) * part in 32 bits. It is used in hw programming (scaler) */ -static inline unsigned int ux_dy( +static inline unsigned int spl_ux_dy( long long value, unsigned int integer_bits, unsigned int fractional_bits) @@ -415,13 +415,13 @@ static inline unsigned int ux_dy( return result | fractional_part; } -static inline unsigned int clamp_ux_dy( +static inline unsigned int spl_clamp_ux_dy( long long value, unsigned int integer_bits, unsigned int fractional_bits, unsigned int min_clamp) { - unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); + unsigned int truncated_val = spl_ux_dy(value, integer_bits, fractional_bits); if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) return (1 << (integer_bits + fractional_bits)) - 1; @@ -433,40 +433,40 @@ static inline unsigned int clamp_ux_dy( unsigned int spl_fixpt_u4d19(struct spl_fixed31_32 arg) { - return ux_dy(arg.value, 4, 19); + return spl_ux_dy(arg.value, 4, 19); } unsigned int spl_fixpt_u3d19(struct spl_fixed31_32 arg) { - return ux_dy(arg.value, 3, 19); + return spl_ux_dy(arg.value, 3, 19); } unsigned int spl_fixpt_u2d19(struct spl_fixed31_32 arg) { - return ux_dy(arg.value, 2, 19); + return spl_ux_dy(arg.value, 2, 19); } unsigned int spl_fixpt_u0d19(struct spl_fixed31_32 arg) { - return ux_dy(arg.value, 0, 19); + return spl_ux_dy(arg.value, 0, 19); } unsigned int spl_fixpt_clamp_u0d14(struct spl_fixed31_32 arg) { - return clamp_ux_dy(arg.value, 0, 14, 1); + return spl_clamp_ux_dy(arg.value, 0, 14, 1); } unsigned int spl_fixpt_clamp_u0d10(struct spl_fixed31_32 arg) { - return clamp_ux_dy(arg.value, 0, 10, 1); + return spl_clamp_ux_dy(arg.value, 0, 10, 1); } int spl_fixpt_s4d19(struct spl_fixed31_32 arg) { if (arg.value < 0) - return -(int)ux_dy(spl_fixpt_abs(arg).value, 4, 19); + return -(int)spl_ux_dy(spl_fixpt_abs(arg).value, 4, 19); else - return ux_dy(arg.value, 4, 19); + return spl_ux_dy(arg.value, 4, 19); } struct spl_fixed31_32 spl_fixpt_from_ux_dy(unsigned int value, From b4c804628485af2b46f0d24a87190735cac37d61 Mon Sep 17 00:00:00 2001 From: Ausef Yousof Date: Thu, 24 Oct 2024 14:06:39 -0400 Subject: [PATCH 46/78] drm/amd/display: Remove hw w/a toggle if on DP2/HPO [why&how] Applying a hw w/a only relevant to DIG FIFO causing corruption using HPO, do not apply the w/a if on DP2/HPO Reviewed-by: Charlene Liu Signed-off-by: Ausef Yousof Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 1d4fe0de0f672d..07b49b4030f986 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -133,6 +133,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * for (i = 0; i < dc->res_pool->pipe_count; ++i) { struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct clk_mgr_internal *clk_mgr_internal = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dccg *dccg = clk_mgr_internal->dccg; struct pipe_ctx *pipe = safe_to_lower ? &context->res_ctx.pipe_ctx[i] : &dc->current_state->res_ctx.pipe_ctx[i]; @@ -149,8 +151,13 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * new_pipe->stream_res.stream_enc && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe->stream->link_enc) && !stream_changed_otg_dig_on) { + bool has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); + + if (!has_active_hpo && !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe) && + (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || + !pipe->stream->link_enc) && !stream_changed_otg_dig_on)) { + + /* This w/a should not trigger when we have a dig active */ if (disable) { if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) From 583c21c2b2595d7bf9542a9406294d2fe16b6f01 Mon Sep 17 00:00:00 2001 From: Ausef Yousof Date: Thu, 24 Oct 2024 15:17:20 -0400 Subject: [PATCH 47/78] drm/amd/display: Remove otg w/a toggling on HPO interfaces [why&how] Adjust otg w/a disable condition to include HPO explicitly rather than assuming it is implicitly used through DP2. Reviewed-by: Charlene Liu Signed-off-by: Ausef Yousof Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index 07b49b4030f986..b77333817f1895 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -151,7 +151,15 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * new_pipe->stream_res.stream_enc && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); - bool has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); + + bool has_active_hpo = false; + + if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) { + has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && + dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); + + } + if (!has_active_hpo && !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe) && (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || From 2551b4a321a68134360b860113dd460133e856e5 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 18 Oct 2024 10:52:16 -0400 Subject: [PATCH 48/78] drm/amd/display: parse umc_info or vram_info based on ASIC An upstream bug report suggests that there are production dGPUs that are older than DCN401 but still have a umc_info in VBIOS tables with the same version as expected for a DCN401 product. Hence, reading this tables should be guarded with a version check. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3678 Reviewed-by: Dillon Varone Signed-off-by: Aurabindo Pillai Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 0d8498ab9b235e..be8fbb04ad98f8 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -3127,7 +3127,9 @@ static enum bp_result bios_parser_get_vram_info( struct atom_data_revision revision; // vram info moved to umc_info for DCN4x - if (info && DATA_TABLES(umc_info)) { + if (dcb->ctx->dce_version >= DCN_VERSION_4_01 && + dcb->ctx->dce_version < DCN_VERSION_MAX && + info && DATA_TABLES(umc_info)) { header = GET_IMAGE(struct atom_common_table_header, DATA_TABLES(umc_info)); From caccee7b296b1f6c37f09b5d4808606c66438e9d Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Wed, 9 Oct 2024 15:26:48 -0400 Subject: [PATCH 49/78] drm/amd/display: Minimize wait for pending updates [Why/How] Move the wait for pending updates past prepare_bandwidth if the previous update was not a full update to reduce the average time it takes to complete a full update. Reviewed-by: Dillon Varone Reviewed-by: Alvin Lee Signed-off-by: Ilya Bakoulin Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8a52fef46785c0..7872c6cabb14c3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -3835,7 +3835,7 @@ static void commit_planes_for_stream(struct dc *dc, dc_exit_ips_for_hw_access(dc); dc_z10_restore(dc); - if (update_type == UPDATE_TYPE_FULL) + if (update_type == UPDATE_TYPE_FULL && dc->optimized_required) hwss_process_outstanding_hw_updates(dc, dc->current_state); for (i = 0; i < dc->res_pool->pipe_count; i++) { @@ -3862,6 +3862,9 @@ static void commit_planes_for_stream(struct dc *dc, context_clock_trace(dc, context); } + if (update_type == UPDATE_TYPE_FULL) + hwss_wait_for_outstanding_hw_updates(dc, dc->current_state); + top_pipe_to_program = resource_get_otg_master_for_stream( &context->res_ctx, stream); From bc068194f548ef1f230d96c4398046bf59165992 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Fri, 25 Oct 2024 12:27:26 +0800 Subject: [PATCH 50/78] drm/amd/display: Don't write DP_MSTM_CTRL after LT [Why] Observe after suspend/resme, we can't light up mst monitors under specific mst hub. The reason is that driver still writes DPCD DP_MSTM_CTRL after LT. It's forbidden even we write the same value for that dpcd register. [How] We already resume the mst branch device dpcd settings during resume_mst_branch_status(). Leverage drm_dp_mst_topology_queue_probe() to only probe the topology, not calling drm_dp_mst_topology_mgr_resume() which will set DP_MSTM_CTRL as well. Reviewed-by: Jerry Zuo Signed-off-by: Wayne Lin Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a07ed83b186bc5..9876f4836ee0e3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3170,8 +3170,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state); enum dc_connection_type new_connection_type = dc_connection_none; struct dc_state *dc_state; - int i, r, j, ret; - bool need_hotplug = false; + int i, r, j; struct dc_commit_streams_params commit_params = {}; if (dm->dc->caps.ips_support) { @@ -3360,23 +3359,16 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) aconnector->mst_root) continue; - ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true); - - if (ret < 0) { - dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx, - aconnector->dc_link); - need_hotplug = true; - } + drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); } drm_connector_list_iter_end(&iter); - if (need_hotplug) - drm_kms_helper_hotplug_event(ddev); - amdgpu_dm_irq_resume_late(adev); amdgpu_dm_smu_write_watermarks_table(adev); + drm_kms_helper_hotplug_event(ddev); + return 0; } From 8b7f3529cd7bca239404d7279056e566639ac055 Mon Sep 17 00:00:00 2001 From: Boyuan Zhang Date: Wed, 2 Oct 2024 20:51:53 -0400 Subject: [PATCH 51/78] drm/amd/pm: add inst to dpm_set_vcn_enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an instance parameter to the existing function dpm_set_vcn_enable() for future implementation. Re-write all pptable functions accordingly. v2: Remove duplicated dpm_set_vcn_enable() functions in v1. Instead, adding instance parameter to existing functions. Signed-off-by: Boyuan Zhang Suggested-by: Christian König Suggested-by: Alex Deucher Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 2 +- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 3 ++- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c | 4 +++- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 3 ++- 13 files changed, 31 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 67d5a81234160b..ba97070acf9f71 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -252,7 +252,7 @@ static int smu_dpm_set_vcn_enable(struct smu_context *smu, if (atomic_read(&power_gate->vcn_gated) ^ enable) return 0; - ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable); + ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, 0xff); if (!ret) atomic_set(&power_gate->vcn_gated, !enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 8bb32b3f0d9c65..4ebcc1e53ea2f6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -739,7 +739,7 @@ struct pptable_funcs { * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power * management. */ - int (*dpm_set_vcn_enable)(struct smu_context *smu, bool enable); + int (*dpm_set_vcn_enable)(struct smu_context *smu, bool enable, int inst); /** * @dpm_set_jpeg_enable: Enable/disable JPEG engine dynamic power diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 044d6893b43e2e..ae3563d71fa0c7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -255,7 +255,8 @@ int smu_v13_0_wait_for_event(struct smu_context *smu, enum smu_event_type event, uint64_t event_arg); int smu_v13_0_set_vcn_enable(struct smu_context *smu, - bool enable); + bool enable, + int inst); int smu_v13_0_set_jpeg_enable(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h index 07c220102c1dc7..0546b02e198dde 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h @@ -210,7 +210,8 @@ int smu_v14_0_wait_for_event(struct smu_context *smu, enum smu_event_type event, uint64_t event_arg); int smu_v14_0_set_vcn_enable(struct smu_context *smu, - bool enable); + bool enable, + int inst); int smu_v14_0_set_jpeg_enable(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 5ad09323a29df2..6c8e80f6b592d0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1571,7 +1571,9 @@ static bool arcturus_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } -static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int arcturus_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 9fa305ba642267..faa8e7d9c3c628 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1135,7 +1135,9 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) return 0; } -static int navi10_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int navi10_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 77e58eb463280c..a9cb28ce213375 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1152,7 +1152,9 @@ static int sienna_cichlid_set_default_dpm_table(struct smu_context *smu) return 0; } -static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { struct amdgpu_device *adev = smu->adev; int i, ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 6c43724c01dd7a..cd3e9ba3eff442 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -461,7 +461,9 @@ static int vangogh_init_smc_tables(struct smu_context *smu) return smu_v11_0_init_smc_tables(smu); } -static int vangogh_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int vangogh_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 0b210b1f26285e..a34797f3576bf5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -645,7 +645,9 @@ static enum amd_pm_state_type renoir_get_current_power_state(struct smu_context return pm_type; } -static int renoir_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int renoir_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 6cfd663639155d..2bfea740daceed 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2104,7 +2104,8 @@ int smu_v13_0_get_current_pcie_link_speed(struct smu_context *smu) } int smu_v13_0_set_vcn_enable(struct smu_context *smu, - bool enable) + bool enable, + int inst) { struct amdgpu_device *adev = smu->adev; int i, ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index a71b7c0803f189..f5db181ef489a2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -193,7 +193,9 @@ static int smu_v13_0_5_system_features_control(struct smu_context *smu, bool en) return ret; } -static int smu_v13_0_5_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int smu_v13_0_5_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index 71d58c8c8cc041..73b4506ef5a87f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -220,7 +220,9 @@ static int yellow_carp_system_features_control(struct smu_context *smu, bool en) return ret; } -static int yellow_carp_dpm_set_vcn_enable(struct smu_context *smu, bool enable) +static int yellow_carp_dpm_set_vcn_enable(struct smu_context *smu, + bool enable, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index f7745eaf118e05..ecb0164d533eea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -1507,7 +1507,8 @@ int smu_v14_0_set_single_dpm_table(struct smu_context *smu, } int smu_v14_0_set_vcn_enable(struct smu_context *smu, - bool enable) + bool enable, + int inst) { struct amdgpu_device *adev = smu->adev; int i, ret = 0; From c7b4ecc1fa29235e5a14ad178ab96ef15a0d16f6 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 1 Nov 2024 16:16:18 +0000 Subject: [PATCH 52/78] drm/amd/display: Add a missing DCN401 reg definition Add a mising reg field to the autogenerated header for future use Signed-off-by: Aurabindo Pillai Reviewed-by: Dillon Varone Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h index f42a276499cd1c..5d9d5fea6e06b5 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_4_1_0_sh_mask.h @@ -6199,10 +6199,12 @@ #define DCHUBBUB_CTRL_STATUS__ROB_UNDERFLOW_STATUS__SHIFT 0x1 #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_STATUS__SHIFT 0x2 #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_CLEAR__SHIFT 0x3 +#define DCHUBBUB_CTRL_STATUS__DCHUBBUB_HW_DEBUG__SHIFT 0x4 #define DCHUBBUB_CTRL_STATUS__CSTATE_SWATH_CHK_GOOD_MODE__SHIFT 0x1f #define DCHUBBUB_CTRL_STATUS__ROB_UNDERFLOW_STATUS_MASK 0x00000002L #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_STATUS_MASK 0x00000004L #define DCHUBBUB_CTRL_STATUS__ROB_OVERFLOW_CLEAR_MASK 0x00000008L +#define DCHUBBUB_CTRL_STATUS__DCHUBBUB_HW_DEBUG_MASK 0x3FFFFFF0L #define DCHUBBUB_CTRL_STATUS__CSTATE_SWATH_CHK_GOOD_MODE_MASK 0x80000000L //DCHUBBUB_TIMEOUT_DETECTION_CTRL1 #define DCHUBBUB_TIMEOUT_DETECTION_CTRL1__DCHUBBUB_TIMEOUT_ERROR_STATUS__SHIFT 0x0 From ebacc134031a70a69d19ac267f3414bfeb0b6f07 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Sun, 27 Oct 2024 20:12:59 -0400 Subject: [PATCH 53/78] drm/amd/display: [FW Promotion] Release 0.0.241.0 - Add DPCS health check - Update USB4 PHY SSC - Fix FAMS2 SubVP Close to VBlank changes - Create VESA Aux-based backlight control path - Fix PSR1 CRC error during CTS test Acked-by: Rodrigo Siqueira Signed-off-by: Taimur Hassan Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 3aa6c60588b5f5..a9b90fa00b88ad 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -495,6 +495,7 @@ struct dmub_feature_caps { uint8_t gecc_enable; uint8_t replay_supported; uint8_t replay_reserved[3]; + uint8_t abm_aux_backlight_support; }; struct dmub_visual_confirm_color { @@ -754,7 +755,7 @@ union dmub_shared_state_ips_driver_signals { uint32_t allow_ips1 : 1; /**< 1 is IPS1 is allowed */ uint32_t allow_ips2 : 1; /**< 1 is IPS1 is allowed */ uint32_t allow_z10 : 1; /**< 1 if Z10 is allowed */ - uint32_t allow_idle : 1; /**< 1 if driver is allowing idle */ + uint32_t allow_idle: 1; /**< 1 if driver is allowing idle */ uint32_t reserved_bits : 27; /**< Reversed bits */ } bits; uint32_t all; @@ -4460,9 +4461,9 @@ struct dmub_cmd_abm_set_backlight_data { uint8_t backlight_control_type; /** - * Explicit padding to 4 byte boundary. + * AUX HW instance. */ - uint8_t pad[1]; + uint8_t aux_inst; /** * Minimum luminance in nits. From 38077562e0594a294eaf4d8e6bbd8c1c26c2540f Mon Sep 17 00:00:00 2001 From: Kaitlyn Tse Date: Thu, 3 Oct 2024 18:13:27 -0400 Subject: [PATCH 54/78] drm/amd/display: Implement new backlight_level_params structure [Why] Implement the new backlight_level_params structure as part of the VBAC framework, the information in this structure is needed to be passed down to the DMCUB to identify the backlight control type, to adjust the backlight of the panel and to perform any required conversions from PWM to nits or vice versa. [How] Modified existing functions to include the new backlight_level_params structure. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Kaitlyn Tse Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++- .../drm/amd/display/dc/core/dc_link_exports.c | 5 +- drivers/gpu/drm/amd/display/dc/dc.h | 4 +- drivers/gpu/drm/amd/display/dc/dc_types.h | 27 ++++++++++ .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 6 +-- .../amd/display/dc/hwss/dcn21/dcn21_hwseq.c | 16 +++--- .../amd/display/dc/hwss/dcn21/dcn21_hwseq.h | 2 + .../amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 49 +++++++++++++++++++ .../amd/display/dc/hwss/dcn31/dcn31_hwseq.h | 3 +- .../amd/display/dc/hwss/dcn31/dcn31_init.c | 2 +- .../amd/display/dc/hwss/dcn314/dcn314_init.c | 2 +- .../amd/display/dc/hwss/dcn32/dcn32_init.c | 2 +- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 1 + .../amd/display/dc/hwss/dcn35/dcn35_init.c | 2 +- .../amd/display/dc/hwss/dcn351/dcn351_init.c | 2 +- .../amd/display/dc/hwss/dcn401/dcn401_init.c | 2 +- .../drm/amd/display/dc/hwss/hw_sequencer.h | 5 -- drivers/gpu/drm/amd/display/dc/inc/link.h | 3 +- .../link/protocols/link_edp_panel_control.c | 17 ++++--- .../link/protocols/link_edp_panel_control.h | 3 +- 20 files changed, 119 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9876f4836ee0e3..7cfa0c91e7485c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4640,7 +4640,12 @@ static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, if (!rc) DRM_DEBUG("DM: Failed to update backlight via AUX on eDP[%d]\n", bl_idx); } else { - rc = dc_link_set_backlight_level(link, brightness, 0); + struct set_backlight_level_params backlight_level_params = { 0 }; + + backlight_level_params.backlight_pwm_u16_16 = brightness; + backlight_level_params.transition_time_in_ms = 0; + + rc = dc_link_set_backlight_level(link, &backlight_level_params); if (!rc) DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index dfdfe22d9e8510..457d60eeb486c2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -430,11 +430,10 @@ bool dc_link_get_backlight_level_nits(struct dc_link *link, } bool dc_link_set_backlight_level(const struct dc_link *link, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp) + struct set_backlight_level_params *backlight_level_params) { return link->dc->link_srv->edp_set_backlight_level(link, - backlight_pwm_u16_16, frame_ramp); + backlight_level_params); } bool dc_link_set_backlight_level_nits(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 76130bbf2fe4b5..5cb58ca7789047 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -225,6 +225,7 @@ struct dc_dmub_caps { bool subvp_psr; bool gecc_enable; uint8_t fams_ver; + bool aux_backlight_support; }; struct dc_scl_caps { @@ -2210,8 +2211,7 @@ void dc_link_edp_panel_backlight_power_on(struct dc_link *link, * and 16 bit fractional, where 1.0 is max backlight value. */ bool dc_link_set_backlight_level(const struct dc_link *dc_link, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *backlight_level_params); /* Set/get nits-based backlight level of an embedded panel (eDP, LVDS). */ bool dc_link_set_backlight_level_nits(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index f0776484abb752..1fd030e3f4be9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1303,4 +1303,31 @@ struct dc_commit_streams_params { enum dc_power_source_type power_source; }; +struct set_backlight_level_params { + /* backlight in pwm */ + uint32_t backlight_pwm_u16_16; + /* brightness ramping */ + uint32_t frame_ramp; + /* backlight control type + * 0: PWM backlight control + * 1: VESA AUX backlight control + * 2: AMD AUX backlight control + */ + enum backlight_control_type control_type; + /* backlight in millinits */ + uint32_t backlight_millinits; + /* transition time in ms */ + uint32_t transition_time_in_ms; + /* minimum luminance in nits */ + uint32_t min_luminance; + /* maximum luminance in nits */ + uint32_t max_luminance; + /* minimum backlight in pwm */ + uint32_t min_backlight_pwm; + /* maximum backlight in pwm */ + uint32_t max_backlight_pwm; + /* AUX HW instance */ + uint8_t aux_inst; +}; + #endif /* DC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 427fd6ea062a19..81f4c386c28751 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -3143,10 +3143,10 @@ static void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) } bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, - struct set_backlight_level_params *params) + struct set_backlight_level_params *backlight_level_params) { - uint32_t backlight_pwm_u16_16 = params->backlight_pwm_u16_16; - uint32_t frame_ramp = params->frame_ramp; + uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16; + uint32_t frame_ramp = backlight_level_params->frame_ramp; struct dc_link *link = pipe_ctx->stream->link; struct dc *dc = link->ctx->dc; struct abm *abm = pipe_ctx->stream_res.abm; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c index 630e05f32c8065..61efb15572ff0d 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.c @@ -137,7 +137,7 @@ void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx) pipe_ctx->stream->dpms_off = true; } -static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, +bool dcn21_dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst) { union dmub_rb_cmd cmd; @@ -199,7 +199,7 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) abm->funcs->set_pipe_ex(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst, panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, + dcn21_dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_IMMEDIATELY_DISABLE, panel_cntl->inst, @@ -234,7 +234,7 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) panel_cntl->inst, panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, otg_inst, + dcn21_dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst, panel_cntl->pwrseq_inst); @@ -242,15 +242,15 @@ void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) } bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, - struct set_backlight_level_params *params) + struct set_backlight_level_params *backlight_level_params) { struct dc_context *dc = pipe_ctx->stream->ctx; struct abm *abm = pipe_ctx->stream_res.abm; struct timing_generator *tg = pipe_ctx->stream_res.tg; struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; uint32_t otg_inst; - uint32_t backlight_pwm_u16_16 = params->backlight_pwm_u16_16; - uint32_t frame_ramp = params->frame_ramp; + uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16; + uint32_t frame_ramp = backlight_level_params->frame_ramp; if (!abm || !tg || !panel_cntl) return false; @@ -258,7 +258,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, otg_inst = tg->inst; if (dc->dc->res_pool->dmcu) { - dce110_set_backlight_level(pipe_ctx, params); + dce110_set_backlight_level(pipe_ctx, backlight_level_params); return true; } @@ -269,7 +269,7 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, panel_cntl->inst, panel_cntl->pwrseq_inst); } else { - dmub_abm_set_pipe(abm, + dcn21_dmub_abm_set_pipe(abm, otg_inst, SET_ABM_PIPE_NORMAL, panel_cntl->inst, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h index a7eaaa4596be46..f72a27ac1bf1b3 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn21/dcn21_hwseq.h @@ -47,6 +47,8 @@ void dcn21_optimize_pwr_state( void dcn21_PLAT_58856_wa(struct dc_state *context, struct pipe_ctx *pipe_ctx); +bool dcn21_dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, + uint32_t option, uint32_t panel_inst, uint32_t pwrseq_inst); void dcn21_set_pipe(struct pipe_ctx *pipe_ctx); void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx); bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index bfc78a42bc2a76..036cb7e9b5bb5c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -47,9 +47,11 @@ #include "dce/dmub_outbox.h" #include "link.h" #include "dcn10/dcn10_hwseq.h" +#include "dcn21/dcn21_hwseq.h" #include "inc/link_enc_cfg.h" #include "dcn30/dcn30_vpg.h" #include "dce/dce_i2c_hw.h" +#include "dce/dmub_abm_lcd.h" #define DC_LOGGER_INIT(logger) @@ -638,3 +640,50 @@ void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg, triggers, params->num_frames); } + +static void dmub_abm_set_backlight(struct dc_context *dc, + struct set_backlight_level_params *backlight_level_params, uint32_t panel_inst) +{ + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.abm_set_backlight.header.type = DMUB_CMD__ABM; + cmd.abm_set_backlight.header.sub_type = DMUB_CMD__ABM_SET_BACKLIGHT; + cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = backlight_level_params->frame_ramp; + cmd.abm_set_backlight.abm_set_backlight_data.backlight_user_level = backlight_level_params->backlight_pwm_u16_16; + cmd.abm_set_backlight.abm_set_backlight_data.backlight_control_type = backlight_level_params->control_type; + cmd.abm_set_backlight.abm_set_backlight_data.min_luminance = backlight_level_params->min_luminance; + cmd.abm_set_backlight.abm_set_backlight_data.max_luminance = backlight_level_params->max_luminance; + cmd.abm_set_backlight.abm_set_backlight_data.min_backlight_pwm = backlight_level_params->min_backlight_pwm; + cmd.abm_set_backlight.abm_set_backlight_data.max_backlight_pwm = backlight_level_params->max_backlight_pwm; + cmd.abm_set_backlight.abm_set_backlight_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; + cmd.abm_set_backlight.abm_set_backlight_data.panel_mask = (0x01 << panel_inst); + cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); + + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} + +bool dcn31_set_backlight_level(struct pipe_ctx *pipe_ctx, + struct set_backlight_level_params *backlight_level_params) +{ + struct dc_context *dc = pipe_ctx->stream->ctx; + struct abm *abm = pipe_ctx->stream_res.abm; + struct timing_generator *tg = pipe_ctx->stream_res.tg; + struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; + uint32_t otg_inst; + + if (!abm || !tg || !panel_cntl) + return false; + + otg_inst = tg->inst; + + dcn21_dmub_abm_set_pipe(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); + + dmub_abm_set_backlight(dc, backlight_level_params, panel_cntl->inst); + + return true; +} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h index b8bc939da1554f..0d09aa8cfb65d7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.h @@ -51,6 +51,8 @@ int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_ void dcn31_reset_hw_ctx_wrap( struct dc *dc, struct dc_state *context); +bool dcn31_set_backlight_level(struct pipe_ctx *pipe_ctx, + struct set_backlight_level_params *params); bool dcn31_is_abm_supported(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); void dcn31_init_pipes(struct dc *dc, struct dc_state *context); @@ -59,5 +61,4 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable); void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx, int num_pipes, const struct dc_static_screen_params *params); - #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 56f3c70d4b5548..5f8f45b4872050 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -98,7 +98,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 68e6de6b5758d5..6bdfbf22ce8728 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -100,7 +100,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index dbcd2dfb19c125..5ecee7e320da96 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -98,7 +98,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .calc_vupdate_position = dcn10_calc_vupdate_position, .apply_idle_power_optimizations = dcn32_apply_idle_power_optimizations, .does_plane_fit_in_mall = NULL, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .hardware_release = dcn30_hardware_release, .set_pipe = dcn21_set_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 3cb4e99074113e..e599cdc465bfd2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -309,6 +309,7 @@ void dcn35_init_hw(struct dc *dc) dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv); dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support; } if (dc->res_pool->pg_cntl) { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index 55dc5799e725a0..fd67779c27a948 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -101,7 +101,7 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index a93864b63d48fd..3c275a1eff589b 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -100,7 +100,7 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index c73305e57d39af..23e4f208152efd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -77,7 +77,7 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .calc_vupdate_position = dcn10_calc_vupdate_position, .apply_idle_power_optimizations = dcn401_apply_idle_power_optimizations, .does_plane_fit_in_mall = NULL, - .set_backlight_level = dcn21_set_backlight_level, + .set_backlight_level = dcn31_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .hardware_release = dcn401_hardware_release, .set_pipe = dcn21_set_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 1df17c54f3a9f0..66fdc5805d0a93 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -174,11 +174,6 @@ union block_sequence_params { struct fams2_global_control_lock_fast_params fams2_global_control_lock_fast_params; }; -struct set_backlight_level_params { - uint32_t backlight_pwm_u16_16; - uint32_t frame_ramp; -}; - enum block_sequence_func { DMUB_SUBVP_PIPE_CONTROL_LOCK_FAST = 0, OPTC_PIPE_CONTROL_LOCK, diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index 72a8479e1f2d73..f04292086c08a0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -248,8 +248,7 @@ struct link_service { uint32_t *backlight_millinits_avg, uint32_t *backlight_millinits_peak); bool (*edp_set_backlight_level)(const struct dc_link *link, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *backlight_level_params); bool (*edp_set_backlight_level_nits)(struct dc_link *link, bool isHDR, uint32_t backlight_millinits, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 43a467f6ce7bd9..e0e3bb86535952 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -161,7 +161,9 @@ bool edp_set_backlight_level_nits(struct dc_link *link, link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; - if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX) { + // use internal backlight control if dmub capabilities are not present + if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX && + !link->dc->caps.dmub_caps.aux_backlight_support) { uint8_t backlight_enable = 0; struct target_luminance_value *target_luminance = NULL; @@ -185,7 +187,7 @@ bool edp_set_backlight_level_nits(struct dc_link *link, (uint8_t *)(target_luminance), sizeof(struct target_luminance_value)) != DC_OK) return false; - } else { + } else if (link->backlight_control_type == BACKLIGHT_CONTROL_AMD_AUX) { struct dpcd_source_backlight_set dpcd_backlight_set; *(uint32_t *)&dpcd_backlight_set.backlight_level_millinits = backlight_millinits; *(uint16_t *)&dpcd_backlight_set.backlight_transition_time_ms = (uint16_t)transition_time_in_ms; @@ -517,17 +519,17 @@ static struct pipe_ctx *get_pipe_from_link(const struct dc_link *link) } bool edp_set_backlight_level(const struct dc_link *link, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp) + struct set_backlight_level_params *backlight_level_params) { struct dc *dc = link->ctx->dc; + uint32_t backlight_pwm_u16_16 = backlight_level_params->backlight_pwm_u16_16; + uint32_t frame_ramp = backlight_level_params->frame_ramp; DC_LOGGER_INIT(link->ctx->logger); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { struct pipe_ctx *pipe_ctx = get_pipe_from_link(link); - struct set_backlight_level_params backlight_level_param = { 0 }; if (link->panel_cntl) link->panel_cntl->stored_backlight_registers.USER_LEVEL = backlight_pwm_u16_16; @@ -542,12 +544,11 @@ bool edp_set_backlight_level(const struct dc_link *link, return false; } - backlight_level_param.backlight_pwm_u16_16 = backlight_pwm_u16_16; - backlight_level_param.frame_ramp = frame_ramp; + backlight_level_params->frame_ramp = frame_ramp; dc->hwss.set_backlight_level( pipe_ctx, - &backlight_level_param); + backlight_level_params); } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index 30dc8c24c008c4..bcfa6ac5d4e7b6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -36,8 +36,7 @@ bool edp_get_backlight_level_nits(struct dc_link *link, uint32_t *backlight_millinits_avg, uint32_t *backlight_millinits_peak); bool edp_set_backlight_level(const struct dc_link *link, - uint32_t backlight_pwm_u16_16, - uint32_t frame_ramp); + struct set_backlight_level_params *backlight_level_params); bool edp_set_backlight_level_nits(struct dc_link *link, bool isHDR, uint32_t backlight_millinits, From abdd2768d7630bc8ec3403aea24f4197bada3c1f Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Thu, 17 Oct 2024 18:15:10 -0400 Subject: [PATCH 55/78] drm/amd/display: Prune Invalid Modes For HDMI Output [Why] 1. HDMI does not have 6 bpc support. Having 6 bpc pass validation does not comply with spec. 2. Validate 420 only for native HDMI, but not apply to pcon use case. 3. Current mode validation log is not readable. [how] 1. Cap 8 bpc for dp-hdmi converter. 2. Validate yuv420 for pcon use case as well, if rgb/yuv444 8bpc cannot fit into pcon bw limitation of the link from the converter to HDMI sink. 3. Add readable pixel_format and color_depth into debug log. Reviewed-by: Wayne Lin Signed-off-by: Fangzhi Zuo Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 ++++++--- .../gpu/drm/amd/display/dc/core/dc_debug.c | 40 +++++++++++++++++++ .../gpu/drm/amd/display/dc/core/dc_stream.c | 6 +-- .../gpu/drm/amd/display/dc/inc/core_status.h | 2 + 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7cfa0c91e7485c..16653e0573b19a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7323,10 +7323,15 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct drm_connector_state *drm_state = dm_state ? &dm_state->base : NULL; int requested_bpc = drm_state ? drm_state->max_requested_bpc : 8; enum dc_status dc_result = DC_OK; + uint8_t bpc_limit = 6; if (!dm_state) return NULL; + if (aconnector->dc_link->connector_signal == SIGNAL_TYPE_HDMI_TYPE_A || + aconnector->dc_link->dpcd_caps.dongle_type == DISPLAY_DONGLE_DP_HDMI_CONVERTER) + bpc_limit = 8; + do { stream = create_stream_for_sink(connector, drm_mode, dm_state, old_stream, @@ -7347,11 +7352,12 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, dc_result = dm_validate_stream_and_context(adev->dm.dc, stream); if (dc_result != DC_OK) { - DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n", + DRM_DEBUG_KMS("Mode %dx%d (clk %d) pixel_encoding:%s color_depth:%s failed validation -- %s\n", drm_mode->hdisplay, drm_mode->vdisplay, drm_mode->clock, - dc_result, + dc_pixel_encoding_to_str(stream->timing.pixel_encoding), + dc_color_depth_to_str(stream->timing.display_color_depth), dc_status_to_str(dc_result)); dc_stream_release(stream); @@ -7359,10 +7365,13 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector, requested_bpc -= 2; /* lower bpc to retry validation */ } - } while (stream == NULL && requested_bpc >= 6); + } while (stream == NULL && requested_bpc >= bpc_limit); - if (dc_result == DC_FAIL_ENC_VALIDATE && !aconnector->force_yuv420_output) { - DRM_DEBUG_KMS("Retry forcing YCbCr420 encoding\n"); + if ((dc_result == DC_FAIL_ENC_VALIDATE || + dc_result == DC_EXCEED_DONGLE_CAP) && + !aconnector->force_yuv420_output) { + DRM_DEBUG_KMS("%s:%d Retry forcing yuv420 encoding\n", + __func__, __LINE__); aconnector->force_yuv420_output = true; stream = create_validate_stream_for_sink(aconnector, drm_mode, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index 0bb25c53724386..af1ea579256003 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -392,3 +392,43 @@ char *dc_status_to_str(enum dc_status status) return "Unexpected status error"; } + +char *dc_pixel_encoding_to_str(enum dc_pixel_encoding pixel_encoding) +{ + switch (pixel_encoding) { + case PIXEL_ENCODING_RGB: + return "RGB"; + case PIXEL_ENCODING_YCBCR422: + return "YUV422"; + case PIXEL_ENCODING_YCBCR444: + return "YUV444"; + case PIXEL_ENCODING_YCBCR420: + return "YUV420"; + default: + return "Unknown"; + } +} + +char *dc_color_depth_to_str(enum dc_color_depth color_depth) +{ + switch (color_depth) { + case COLOR_DEPTH_666: + return "6-bpc"; + case COLOR_DEPTH_888: + return "8-bpc"; + case COLOR_DEPTH_101010: + return "10-bpc"; + case COLOR_DEPTH_121212: + return "12-bpc"; + case COLOR_DEPTH_141414: + return "14-bpc"; + case COLOR_DEPTH_161616: + return "16-bpc"; + case COLOR_DEPTH_999: + return "9-bpc"; + case COLOR_DEPTH_111111: + return "11-bpc"; + default: + return "Unknown"; + } +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 8b0632104aef07..55dc482d9b3660 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -812,12 +812,12 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) stream->dst.height, stream->output_color_space); DC_LOG_DC( - "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixelencoder:%d, displaycolorDepth:%d\n", + "\tpix_clk_khz: %d, h_total: %d, v_total: %d, pixel_encoding:%s, color_depth:%s\n", stream->timing.pix_clk_100hz / 10, stream->timing.h_total, stream->timing.v_total, - stream->timing.pixel_encoding, - stream->timing.display_color_depth); + dc_pixel_encoding_to_str(stream->timing.pixel_encoding), + dc_color_depth_to_str(stream->timing.display_color_depth)); DC_LOG_DC( "\tlink: %d\n", stream->link->link_index); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_status.h b/drivers/gpu/drm/amd/display/dc/inc/core_status.h index fa5edd03d00439..b5afd8c3103dba 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_status.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_status.h @@ -60,5 +60,7 @@ enum dc_status { }; char *dc_status_to_str(enum dc_status status); +char *dc_pixel_encoding_to_str(enum dc_pixel_encoding pixel_encoding); +char *dc_color_depth_to_str(enum dc_color_depth color_depth); #endif /* _CORE_STATUS_H_ */ From 69516fbdba6c809c70a2c8c633c3a34361e9d3f0 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Sun, 27 Oct 2024 22:37:01 -0400 Subject: [PATCH 56/78] drm/amd/display: 3.2.308 This version brings along following fixes: - Prune Invalid Modes for HDMI Output - SPL Cleanup - Fix brightness level not retained over reboot - Remove inaccessible registers from DMU diagnostics Acked-by: Rodrigo Siqueira Signed-off-by: Aric Cyr Signed-off-by: Zaeem Mohamed Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5cb58ca7789047..ad9ce3d0bfcf54 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.307" +#define DC_VER "3.2.308" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 91c9e221fe2553edf2db71627d8453f083de87a1 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Thu, 31 Oct 2024 16:28:48 +0100 Subject: [PATCH 57/78] drm/amdgpu: prevent NULL pointer dereference if ATIF is not supported acpi_evaluate_object() may return AE_NOT_FOUND (failure), which would result in dereferencing buffer.pointer (obj) while being NULL. Although this case may be unrealistic for the current code, it is still better to protect against possible bugs. Bail out also when status is AE_NOT_FOUND. This fixes 1 FORWARD_NULL issue reported by Coverity Report: CID 1600951: Null pointer dereferences (FORWARD_NULL) Signed-off-by: Antonio Quartulli Fixes: c9b7c809b89f ("drm/amd: Guard against bad data for ATIF ACPI method") Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20241031152848.4716-1-antonio@mandelbit.com Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index cce85389427f35..b8d4e07d2043ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -172,8 +172,8 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, &buffer); obj = (union acpi_object *)buffer.pointer; - /* Fail if calling the method fails and ATIF is supported */ - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* Fail if calling the method fails */ + if (ACPI_FAILURE(status)) { DRM_DEBUG_DRIVER("failed to evaluate ATIF got %s\n", acpi_format_exception(status)); kfree(obj); From 136ce12bd5907388cb4e9aa63ee5c9c8c441640b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 1 Nov 2024 11:55:25 +0800 Subject: [PATCH 58/78] drm/amd/pm: always pick the pptable from IFWI always pick the pptable from IFWI on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 65 +------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index cefe10b95d8ede..884938d69fcae9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -367,54 +367,6 @@ static int smu_v14_0_2_store_powerplay_table(struct smu_context *smu) return 0; } -#ifndef atom_smc_dpm_info_table_14_0_0 -struct atom_smc_dpm_info_table_14_0_0 { - struct atom_common_table_header table_header; - BoardTable_t BoardTable; -}; -#endif - -static int smu_v14_0_2_append_powerplay_table(struct smu_context *smu) -{ - struct smu_table_context *table_context = &smu->smu_table; - PPTable_t *smc_pptable = table_context->driver_pptable; - struct atom_smc_dpm_info_table_14_0_0 *smc_dpm_table; - BoardTable_t *BoardTable = &smc_pptable->BoardTable; - int index, ret; - - index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - smc_dpm_info); - - ret = amdgpu_atombios_get_data_table(smu->adev, index, NULL, NULL, NULL, - (uint8_t **)&smc_dpm_table); - if (ret) - return ret; - - memcpy(BoardTable, &smc_dpm_table->BoardTable, sizeof(BoardTable_t)); - - return 0; -} - -#if 0 -static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, - void **table, - uint32_t *size) -{ - struct smu_table_context *smu_table = &smu->smu_table; - void *combo_pptable = smu_table->combo_pptable; - int ret = 0; - - ret = smu_cmn_get_combo_pptable(smu); - if (ret) - return ret; - - *table = combo_pptable; - *size = sizeof(struct smu_14_0_powerplay_table); - - return 0; -} -#endif - static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, void **table, uint32_t *size) @@ -436,16 +388,12 @@ static int smu_v14_0_2_get_pptable_from_pmfw(struct smu_context *smu, static int smu_v14_0_2_setup_pptable(struct smu_context *smu) { struct smu_table_context *smu_table = &smu->smu_table; - struct amdgpu_device *adev = smu->adev; int ret = 0; if (amdgpu_sriov_vf(smu->adev)) return 0; - if (!adev->scpm_enabled) - ret = smu_v14_0_setup_pptable(smu); - else - ret = smu_v14_0_2_get_pptable_from_pmfw(smu, + ret = smu_v14_0_2_get_pptable_from_pmfw(smu, &smu_table->power_play_table, &smu_table->power_play_table_size); if (ret) @@ -455,16 +403,6 @@ static int smu_v14_0_2_setup_pptable(struct smu_context *smu) if (ret) return ret; - /* - * With SCPM enabled, the operation below will be handled - * by PSP. Driver involvment is unnecessary and useless. - */ - if (!adev->scpm_enabled) { - ret = smu_v14_0_2_append_powerplay_table(smu); - if (ret) - return ret; - } - ret = smu_v14_0_2_check_powerplay_table(smu); if (ret) return ret; @@ -2786,7 +2724,6 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .check_fw_status = smu_v14_0_check_fw_status, .setup_pptable = smu_v14_0_2_setup_pptable, .check_fw_version = smu_v14_0_check_fw_version, - .write_pptable = smu_cmn_write_pptable, .set_driver_table_location = smu_v14_0_set_driver_table_location, .system_features_control = smu_v14_0_system_features_control, .set_allowed_mask = smu_v14_0_set_allowed_mask, From 922f0e00017b09d9d47e3efac008c8b20ed546a0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 4 Oct 2024 09:12:39 +0530 Subject: [PATCH 59/78] drm/amdkfd: Use dynamic allocation for CU occupancy array in 'kfd_get_cu_occupancy()' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `kfd_get_cu_occupancy` function previously declared a large `cu_occupancy` array as a local variable, which could lead to stack overflows due to excessive stack usage. This commit replaces the static array allocation with dynamic memory allocation using `kcalloc`, thereby reducing the stack size. This change avoids the risk of stack overflows in kernel space, in scenarios where `AMDGPU_MAX_QUEUES` is large. The allocated memory is freed using `kfree` before the function returns to prevent memory leaks. Fixes the below with gcc W=1: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_process.c: In function ‘kfd_get_cu_occupancy’: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_process.c:322:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=] 322 | } | ^ Fixes: 6ae9e1aba97e ("drm/amdkfd: Update logic for CU occupancy calculations") Cc: Harish Kasiviswanathan Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Mukul Joshi Reviewed-by: Mukul Joshi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d4aa843aacfdd9..6bab6fc6a35d61 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -271,11 +271,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; int i; - struct kfd_cu_occupancy cu_occupancy[AMDGPU_MAX_QUEUES]; + struct kfd_cu_occupancy *cu_occupancy; u32 queue_format; - memset(cu_occupancy, 0x0, sizeof(cu_occupancy)); - pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); dev = pdd->dev; if (dev->kfd2kgd->get_cu_occupancy == NULL) @@ -293,6 +291,10 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt = 0; max_waves_per_cu = 0; + cu_occupancy = kcalloc(AMDGPU_MAX_QUEUES, sizeof(*cu_occupancy), GFP_KERNEL); + if (!cu_occupancy) + return -ENOMEM; + /* * For GFX 9.4.3, fetch the CU occupancy from the first XCC in the partition. * For AQL queues, because of cooperative dispatch we multiply the wave count @@ -318,6 +320,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; + kfree(cu_occupancy); return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); } From afe260df55ac280cd56306248cb6d8a6b0db095c Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Thu, 24 Oct 2024 13:40:39 +0800 Subject: [PATCH 60/78] drm/amdgpu: skip amdgpu_device_cache_pci_state under sriov Under sriov, host driver will save and restore vf pci cfg space during reset. And during device init, under sriov, pci_restore_state happens after fullaccess released, and it can have race condition with mmio protection enable from host side leading to missing interrupts. So skip amdgpu_device_cache_pci_state for sriov. Signed-off-by: Victor Zhao Acked-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index a7b55d6ac5c639..d97417c4c8e830 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6451,6 +6451,9 @@ bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) struct amdgpu_device *adev = drm_to_adev(dev); int r; + if (amdgpu_sriov_vf(adev)) + return false; + r = pci_save_state(pdev); if (!r) { kfree(adev->pci_state); From b78612939de33ffd247f3d39eaca7fb2648801ba Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 24 Oct 2024 16:51:05 +0800 Subject: [PATCH 61/78] drm/amdgpu: Fix dummy_read_page overlapping mappings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the dma_map_page_attrs() with DMA_ATTR_SKIP_CPU_SYNC attribute setting to handle the dummy page overlapping mappings. Signed-off-by: Prike Liang Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 256b95232de541..b2033f8352f50e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -78,8 +78,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; - adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, - PAGE_SIZE, DMA_BIDIRECTIONAL); + adev->dummy_page_addr = dma_map_page_attrs(&adev->pdev->dev, dummy_page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; @@ -99,8 +100,9 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { if (!adev->dummy_page_addr) return; - dma_unmap_page(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(&adev->pdev->dev, adev->dummy_page_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL, + DMA_ATTR_SKIP_CPU_SYNC); adev->dummy_page_addr = 0; } From c5c63d9cb5d3bbb2fc5973757616b17629795829 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Tue, 29 Oct 2024 10:11:05 +0800 Subject: [PATCH 62/78] drm/amdgpu: add amdgpu_gfx_sched_mask and amdgpu_compute_sched_mask debugfs compute/gfx may have multiple rings on some hardware. In some cases, userspace wants to run jobs on a specific ring for validation purposes. This debugfs entry helps to disable or enable submitting jobs to a specific ring. This entry is populated only if there are at least two or more cores in the gfx/compute ip. Signed-off-by: Jesse Zhang Suggested-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 141 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 + 3 files changed, 145 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 37d8657f077631..6e3f657cab9cd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2096,6 +2096,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); amdgpu_debugfs_jpeg_sched_mask_init(adev); + amdgpu_debugfs_gfx_sched_mask_init(adev); + amdgpu_debugfs_compute_sched_mask_init(adev); amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b8cc4b146bdc64..63f5fa736f3264 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1917,3 +1917,144 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) } mutex_unlock(&adev->enforce_isolation_mutex); } + +/* + * debugfs for to enable/disable gfx job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_gfx_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + ring = &adev->gfx.gfx_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_gfx_sched_mask_fops, + amdgpu_debugfs_gfx_sched_mask_get, + amdgpu_debugfs_gfx_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_gfx_rings > 1)) + return; + sprintf(name, "amdgpu_gfx_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_gfx_sched_mask_fops); +#endif +} + +/* + * debugfs for to enable/disable compute job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->gfx.num_compute_rings) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + ring = &adev->gfx.compute_ring[i]; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_compute_sched_mask_fops, + amdgpu_debugfs_compute_sched_mask_get, + amdgpu_debugfs_compute_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->gfx.num_compute_rings > 1)) + return; + sprintf(name, "amdgpu_compute_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_compute_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index af9dbd760fee2e..7183831df0c396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -584,6 +584,8 @@ void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); +void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); +void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); static inline const char *amdgpu_gfx_compute_mode_desc(int mode) { From d2e3961ae37171811a3d442e601599b85711adcb Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Tue, 29 Oct 2024 10:14:35 +0800 Subject: [PATCH 63/78] drm/amdgpu: add amdgpu_sdma_sched_mask debugfs Userspace wants to run jobs on a specific sdma ring for verification purposes. This debugfs entry helps to disable or enable submitting jobs to a specific ring. This entry is populated only if there are at least two or more cores in the sdma ip. Signed-off-by: Jesse Zhang Suggested-by: Alex Deucher Reviewed-by: Tim Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 70 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 6e3f657cab9cd4..c446bfccea590f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2098,6 +2098,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_jpeg_sched_mask_init(adev); amdgpu_debugfs_gfx_sched_mask_init(adev); amdgpu_debugfs_compute_sched_mask_init(adev); + amdgpu_debugfs_sdma_sched_mask_init(adev); amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 183a976ba29dd0..5868b4a32ea676 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -343,3 +343,73 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) return 0; } + +/* + * debugfs for to enable/disable sdma job submission to specific core. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1 << adev->sdma.num_instances) - 1; + if ((val & mask) == 0) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (val & (1 << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->sdma.num_instances; ++i) { + ring = &adev->sdma.instance[i].ring; + if (ring->sched.ready) + mask |= 1 << i; + } + + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_sdma_sched_mask_fops, + amdgpu_debugfs_sdma_sched_mask_get, + amdgpu_debugfs_sdma_sched_mask_set, "%llx\n"); + +#endif + +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (!(adev->sdma.num_instances > 1)) + return; + sprintf(name, "amdgpu_sdma_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_sdma_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 087ce0f6fa0763..a37fcd9bb981a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -175,5 +175,5 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); - +void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); #endif From 12e5df81bb1f006be2bc8341c732ebd966e573e4 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 17 Oct 2024 12:21:40 +0800 Subject: [PATCH 64/78] drm/amdgpu: Add nps_mode in RAS init_flag Add nps_mode in RAS init_flag. Signed-off-by: Candice Li Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 3 +++ drivers/gpu/drm/amd/amdgpu/ta_ras_if.h | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index ae24df65a3df7f..17cf10c0b72be7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1834,6 +1834,9 @@ int psp_ras_initialize(struct psp_context *psp) ras_cmd->ras_in_message.init_flags.xcc_mask = adev->gfx.xcc_mask; ras_cmd->ras_in_message.init_flags.channel_dis_num = hweight32(adev->gmc.m_half_use) * 2; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + ras_cmd->ras_in_message.init_flags.nps_mode = + adev->gmc.gmc_funcs->query_mem_partition_mode(adev); ret = psp_ta_load(psp, &psp->ras_context.context); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 3ac56a9645ebdf..21b71a427b1fdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -113,6 +113,14 @@ enum ta_ras_address_type { TA_RAS_PA_TO_MCA, }; +enum ta_ras_nps_mode { + TA_RAS_UNKNOWN_MODE = 0, + TA_RAS_NPS1_MODE = 1, + TA_RAS_NPS2_MODE = 2, + TA_RAS_NPS4_MODE = 4, + TA_RAS_NPS8_MODE = 8, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -139,6 +147,7 @@ struct ta_ras_init_flags { uint8_t dgpu_mode; uint16_t xcc_mask; uint8_t channel_dis_num; + uint8_t nps_mode; }; struct ta_ras_mca_addr { From 047767ddc93666704026c79c01554597375beb50 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 29 Oct 2024 10:47:26 +0530 Subject: [PATCH 65/78] drm/amdgpu: Group gfx sysfs functions Make amdgpu_gfx_sysfs_init/fini functions as common entry points for all gfx related sysfs nodes. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 40 +++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 -- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 ---- 7 files changed, 40 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 63f5fa736f3264..2f3f09dfb1fd20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1602,7 +1602,7 @@ static DEVICE_ATTR(current_compute_partition, 0644, static DEVICE_ATTR(available_compute_partition, 0444, amdgpu_gfx_get_available_compute_partition, NULL); -int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_xcp_init(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1629,7 +1629,7 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return r; } -void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) +static void amdgpu_gfx_sysfs_xcp_fini(struct amdgpu_device *adev) { struct amdgpu_xcp_mgr *xcp_mgr = adev->xcp_mgr; bool xcp_switch_supported; @@ -1646,25 +1646,47 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) &dev_attr_available_compute_partition); } -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) +static int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev) { int r; r = device_create_file(adev->dev, &dev_attr_enforce_isolation); if (r) return r; + if (adev->gfx.enable_cleaner_shader) + r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - r = device_create_file(adev->dev, &dev_attr_run_cleaner_shader); - if (r) + return r; +} + +static void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_enforce_isolation); + if (adev->gfx.enable_cleaner_shader) + device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); +} + +int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_gfx_sysfs_xcp_init(adev); + if (r) { + dev_err(adev->dev, "failed to create xcp sysfs files"); return r; + } - return 0; + r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + if (r) + dev_err(adev->dev, "failed to create isolation sysfs files"); + + return r; } -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev) +void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { - device_remove_file(adev->dev, &dev_attr_enforce_isolation); - device_remove_file(adev->dev, &dev_attr_run_cleaner_shader); + amdgpu_gfx_sysfs_xcp_fini(adev); + amdgpu_gfx_sysfs_isolation_shader_fini(adev); } int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 7183831df0c396..fd73e527f44607 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -579,8 +579,6 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev); void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, unsigned int cleaner_shader_size, const void *cleaner_shader_ptr); -int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev); -void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev); void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9da95b25e15844..d1a18ca584dd18 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4853,9 +4853,10 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v10_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; + return 0; } @@ -4907,7 +4908,7 @@ static int gfx_v10_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v10_0_rlc_backdoor_autoload_buffer_fini(adev); gfx_v10_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 894fc04201c3ab..cfe17a36b8ee4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1708,7 +1708,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v11_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1773,7 +1773,7 @@ static int gfx_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v11_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 9fec28d8a5fcaf..1b99f90cd19340 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1466,7 +1466,7 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v12_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -1529,7 +1529,7 @@ static int gfx_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) gfx_v12_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e9248a855ba770..a880dce16ae2f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2402,7 +2402,7 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_0_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); + r = amdgpu_gfx_sysfs_init(adev); if (r) return r; @@ -2443,7 +2443,7 @@ static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) } gfx_v9_0_free_microcode(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); + amdgpu_gfx_sysfs_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 016290f005922d..983088805c3a26 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1171,10 +1171,6 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) gfx_v9_4_3_alloc_ip_dump(adev); - r = amdgpu_gfx_sysfs_isolation_shader_init(adev); - if (r) - return r; - return 0; } @@ -1199,7 +1195,6 @@ static int gfx_v9_4_3_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); amdgpu_gfx_sysfs_fini(adev); - amdgpu_gfx_sysfs_isolation_shader_fini(adev); kfree(adev->gfx.ip_dump_core); kfree(adev->gfx.ip_dump_compute_queues); From 81db4eab2847094137a266616954e5f1c6e33575 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 4 Nov 2024 10:03:15 +0530 Subject: [PATCH 66/78] drm/amdgpu: Skip IP coredump for RAS errors For RAS errors, source of error is known. Skip the core dump of IP states. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 1d9eda883bb88c..b772299e106711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2605,6 +2605,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; reset_context.src = AMDGPU_RESET_SRC_RAS; + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); /* Perform full reset in fatal error mode */ if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) From e5ad71779df6f448d6edb910bc635680b9419ec0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 30 Oct 2024 13:54:49 +0530 Subject: [PATCH 67/78] drm/amdgpu: Add compatible NPS mode info Populate the compatible NPS modes also for providing partition configuration details through sysfs. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h | 1 + drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 7ac89d78a5bf79..b63f53242c5734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -77,6 +77,7 @@ struct amdgpu_xcp_cfg { u8 num_res; struct amdgpu_xcp_mgr *xcp_mgr; struct kobject kobj; + u16 compatible_nps_modes; }; struct amdgpu_xcp_ip_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 890976b7ce7790..fccccea0d2d0eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -455,6 +455,7 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, int max_res[AMDGPU_XCP_RES_MAX] = {}; bool res_lt_xcp; int num_xcp, i; + u16 nps_modes; if (!(xcp_mgr->supp_xcp_modes & BIT(mode))) return -EINVAL; @@ -467,23 +468,33 @@ static int aqua_vanjaram_get_xcp_res_info(struct amdgpu_xcp_mgr *xcp_mgr, switch (mode) { case AMDGPU_SPX_PARTITION_MODE: num_xcp = 1; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_DPX_PARTITION_MODE: num_xcp = 2; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE); break; case AMDGPU_TPX_PARTITION_MODE: num_xcp = 3; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_QPX_PARTITION_MODE: num_xcp = 4; + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; case AMDGPU_CPX_PARTITION_MODE: num_xcp = NUM_XCC(adev->gfx.xcc_mask); + nps_modes = BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); break; default: return -EINVAL; } + xcp_cfg->compatible_nps_modes = + (adev->gmc.supported_nps_modes & nps_modes); xcp_cfg->num_res = ARRAY_SIZE(max_res); for (i = 0; i < xcp_cfg->num_res; i++) { From 8cc438be5d49b8326b2fcade0bdb7e6a97df9e0b Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Wed, 30 Oct 2024 13:22:44 +0800 Subject: [PATCH 68/78] drm/amd/pm: correct the workload setting Correct the workload setting in order not to mix the setting with the end user. Update the workload mask accordingly. v2: changes as below: 1. the end user can not erase the workload from driver except default workload. 2. always shows the real highest priority workoad to the end user. 3. the real workload mask is combined with driver workload mask and end user workload mask. v3: apply this to the other ASICs as well. v4: simplify the code v5: refine the code based on the review comments. Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 49 +++++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 +- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 5 +- .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 5 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +- .../gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 20 ++++++-- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 5 +- .../drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 ++-- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 8 +++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 2 + 12 files changed, 84 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index ba97070acf9f71..aa7c57b587650e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1261,26 +1261,33 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) smu->watermarks_bitmap = 0; smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + smu->user_dpm_profile.user_workload_mask = 0; atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; - smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; - smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; - smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3; - smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4; - smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; - smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; + smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0; + smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1; + smu->workload_priority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2; + smu->workload_priority[PP_SMC_POWER_PROFILE_VIDEO] = 3; + smu->workload_priority[PP_SMC_POWER_PROFILE_VR] = 4; + smu->workload_priority[PP_SMC_POWER_PROFILE_COMPUTE] = 5; + smu->workload_priority[PP_SMC_POWER_PROFILE_CUSTOM] = 6; if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; - else - smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT]; + } else { + smu->driver_workload_mask = + 1 << smu->workload_priority[PP_SMC_POWER_PROFILE_FULLSCREEN3D]; + smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D; smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING; @@ -2355,17 +2362,20 @@ static int smu_switch_power_profile(void *handle, return -EINVAL; if (!en) { - smu->workload_mask &= ~(1 << smu->workload_prority[type]); + smu->driver_workload_mask &= ~(1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } else { - smu->workload_mask |= (1 << smu->workload_prority[type]); + smu->driver_workload_mask |= (1 << smu->workload_priority[type]); index = fls(smu->workload_mask); index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; } + smu->workload_mask = smu->driver_workload_mask | + smu->user_dpm_profile.user_workload_mask; + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); @@ -3056,12 +3066,23 @@ static int smu_set_power_profile_mode(void *handle, uint32_t param_size) { struct smu_context *smu = handle; + int ret; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled || !smu->ppt_funcs->set_power_profile_mode) return -EOPNOTSUPP; - return smu_bump_power_profile_mode(smu, param, param_size); + if (smu->user_dpm_profile.user_workload_mask & + (1 << smu->workload_priority[param[param_size]])) + return 0; + + smu->user_dpm_profile.user_workload_mask = + (1 << smu->workload_priority[param[param_size]]); + smu->workload_mask = smu->user_dpm_profile.user_workload_mask | + smu->driver_workload_mask; + ret = smu_bump_power_profile_mode(smu, param, param_size); + + return ret; } static int smu_get_fan_control_mode(void *handle, u32 *fan_mode) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 4ebcc1e53ea2f6..d665c47f19b77b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -240,6 +240,7 @@ struct smu_user_dpm_profile { /* user clock state information */ uint32_t clk_mask[SMU_CLK_COUNT]; uint32_t clk_dependency; + uint32_t user_workload_mask; }; #define SMU_TABLE_INIT(tables, table_id, s, a, d) \ @@ -557,7 +558,8 @@ struct smu_context { bool disable_uclk_switch; uint32_t workload_mask; - uint32_t workload_prority[WORKLOAD_POLICY_MAX]; + uint32_t driver_workload_mask; + uint32_t workload_priority[WORKLOAD_POLICY_MAX]; uint32_t workload_setting[WORKLOAD_POLICY_MAX]; uint32_t power_profile_mode; uint32_t default_power_profile_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index 6c8e80f6b592d0..4b36c230e43a09 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -1455,7 +1455,6 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, return -EINVAL; } - if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) && (smu->smc_fw_version >= 0x360d00)) { if (size != 10) @@ -1523,14 +1522,14 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu, ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index faa8e7d9c3c628..211635dabed851 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2083,10 +2083,13 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index a9cb28ce213375..d0ed0d060a8a3a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1788,10 +1788,13 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); + else + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index cd3e9ba3eff442..f89c487dce723d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -1081,7 +1081,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", @@ -1089,7 +1089,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index a34797f3576bf5..75a9ea87f419af 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -892,14 +892,14 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, - 1 << workload_type, + smu->workload_mask, NULL); if (ret) { dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type); return ret; } - smu->power_profile_mode = profile_mode; + smu_cmn_assign_power_profile(smu); return 0; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 8d25cc1f218f10..1d29e99d795577 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -2473,7 +2473,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, DpmActivityMonitorCoeffInt_t *activity_monitor = &(activity_monitor_external.DpmActivityMonitorCoeffInt); int workload_type, ret = 0; - u32 workload_mask, selected_workload_mask; + u32 workload_mask; smu->power_profile_mode = input[size]; @@ -2540,7 +2540,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - selected_workload_mask = workload_mask = 1 << workload_type; + workload_mask = 1 << workload_type; /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) && @@ -2555,12 +2555,22 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, workload_mask |= 1 << workload_type; } + smu->workload_mask |= workload_mask; ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - workload_mask, + smu->workload_mask, NULL); - if (!ret) - smu->workload_mask = selected_workload_mask; + if (!ret) { + smu_cmn_assign_power_profile(smu); + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING) { + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + PP_SMC_POWER_PROFILE_FULLSCREEN3D); + smu->power_profile_mode = smu->workload_mask & (1 << workload_type) + ? PP_SMC_POWER_PROFILE_FULLSCREEN3D + : PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + } + } return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 23f13388455fe4..12d622d4aceb00 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2487,13 +2487,14 @@ static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *inp smu->power_profile_mode); if (workload_type < 0) return -EINVAL; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, - 1 << workload_type, NULL); + smu->workload_mask, NULL); if (ret) dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__); else - smu->workload_mask = (1 << workload_type); + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 884938d69fcae9..59b369eff30fb8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1795,12 +1795,11 @@ static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu, if (workload_type < 0) return -EINVAL; - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_SetWorkloadMask, - 1 << workload_type, - NULL); + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask, + smu->workload_mask, NULL); + if (!ret) - smu->workload_mask = 1 << workload_type; + smu_cmn_assign_power_profile(smu); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 843f00c9e4075c..f1ab1a6bb46703 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -1141,6 +1141,14 @@ int smu_cmn_set_mp1_state(struct smu_context *smu, return ret; } +void smu_cmn_assign_power_profile(struct smu_context *smu) +{ + uint32_t index; + index = fls(smu->workload_mask); + index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; + smu->power_profile_mode = smu->workload_setting[index]; +} + bool smu_cmn_is_audio_func_enabled(struct amdgpu_device *adev) { struct pci_dev *p = NULL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1de685defe85b1..8a801e389659d1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -130,6 +130,8 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev); int smu_cmn_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state); +void smu_cmn_assign_power_profile(struct smu_context *smu); + /* * Helper function to make sysfs_emit_at() happy. Align buf to * the current page boundary and record the offset. From cfffd980bf21b5a84fd364861d482d5a2ec21c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wolfgang=20M=C3=BCller?= Date: Tue, 29 Oct 2024 12:17:52 +0100 Subject: [PATCH 69/78] drm/amd/pm: add zero RPM OD setting support for SMU13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whilst we have support for setting fan curves there is no support for disabling the zero RPM feature. Since the relevant bits are already present in the OverDriveTable, hook them up to a sysctl setting so users can influence this behaviour. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3489 Reviewed-by: Kenneth Feng Signed-off-by: Wolfgang Müller Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 6 ++ .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 62 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 55 +++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 55 +++++++++++++++- 8 files changed, 183 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index 6d942b5c58f054..ec6c1f1d5a4a62 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -100,6 +100,12 @@ fan_minimum_pwm .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: fan_minimum_pwm +fan_zero_rpm_enable +---------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_zero_rpm_enable + GFXOFF ====== diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 2fa71f68205ebb..80e4b5a7df2344 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -119,6 +119,7 @@ enum pp_clock_type { OD_ACOUSTIC_TARGET, OD_FAN_TARGET_TEMPERATURE, OD_FAN_MINIMUM_PWM, + OD_FAN_ZERO_RPM_ENABLE, }; enum amd_pp_sensors { @@ -199,6 +200,7 @@ enum PP_OD_DPM_TABLE_COMMAND { PP_OD_EDIT_ACOUSTIC_TARGET, PP_OD_EDIT_FAN_TARGET_TEMPERATURE, PP_OD_EDIT_FAN_MINIMUM_PWM, + PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, }; struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index ea940773353cdf..cb96f1f8ca8d9a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4109,6 +4109,60 @@ static umode_t fan_minimum_pwm_visible(struct amdgpu_device *adev) return umode; } +/** + * DOC: fan_zero_rpm_enable + * + * The amdgpu driver provides a sysfs API for checking and adjusting the + * zero RPM feature. + * + * Reading back the file shows you the current setting and the permitted + * ranges if changable. + * + * Writing an integer to the file, change the setting accordingly. + * + * When you have finished the editing, write "c" (commit) to the file to commit + * your changes. + * + * If you want to reset to the default value, write "r" (reset) to the file to + * reset them. + */ +static ssize_t fan_zero_rpm_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_ENABLE, buf); +} + +static ssize_t fan_zero_rpm_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_distribute_custom_od_settings(adev, + PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, + buf, + count); +} + +static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) +{ + umode_t umode = 0000; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE) + umode |= S_IRUSR | S_IRGRP | S_IROTH; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET) + umode |= S_IWUSR; + + return umode; +} + static struct od_feature_set amdgpu_od_set = { .containers = { [0] = { @@ -4154,6 +4208,14 @@ static struct od_feature_set amdgpu_od_set = { .store = fan_minimum_pwm_store, }, }, + [5] = { + .name = "fan_zero_rpm_enable", + .ops = { + .is_visible = fan_zero_rpm_enable_visible, + .show = fan_zero_rpm_enable_show, + .store = fan_zero_rpm_enable_store, + }, + }, }, }, }, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index f5bf41f21c4123..b5daa12c0864a6 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -328,6 +328,8 @@ struct config_table_setting #define OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET BIT(7) #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE BIT(8) #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) struct amdgpu_pm { struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index aa7c57b587650e..3458674fe864e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2895,6 +2895,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) clk_type = SMU_OD_FAN_TARGET_TEMPERATURE; break; case OD_FAN_MINIMUM_PWM: clk_type = SMU_OD_FAN_MINIMUM_PWM; break; + case OD_FAN_ZERO_RPM_ENABLE: + clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; default: clk_type = SMU_CLK_COUNT; break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index e71a721c12b985..e0abb449a3a1e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -313,6 +313,7 @@ enum smu_clk_type { SMU_OD_ACOUSTIC_TARGET, SMU_OD_FAN_TARGET_TEMPERATURE, SMU_OD_FAN_MINIMUM_PWM, + SMU_OD_FAN_ZERO_RPM_ENABLE, SMU_CLK_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 1d29e99d795577..a7c62bb09aa0d7 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -107,6 +107,7 @@ #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 +#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 #define LINK_SPEED_MAX 3 @@ -1130,6 +1131,10 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanMinimumPwm; od_max_setting = overdrive_upperlimits->FanMinimumPwm; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: + od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; + od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1450,6 +1455,24 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_0_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmEnable); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1547,6 +1570,11 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + od_table->OverDriveTable.FanZeroRpmEnable = + boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1840,6 +1868,27 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmEnable = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); @@ -2110,7 +2159,9 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; } static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) @@ -2176,6 +2227,8 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanTargetTemperature; user_od_table->OverDriveTable.FanMinimumPwm = user_od_table_bak.OverDriveTable.FanMinimumPwm; + user_od_table->OverDriveTable.FanZeroRpmEnable = + user_od_table_bak.OverDriveTable.FanZeroRpmEnable; } smu_v13_0_0_set_supported_od_feature_mask(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 12d622d4aceb00..1ba2089d865036 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -83,6 +83,7 @@ #define PP_OD_FEATURE_FAN_ACOUSTIC_TARGET 8 #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 +#define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 #define LINK_SPEED_MAX 3 @@ -1119,6 +1120,10 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanMinimumPwm; od_max_setting = overdrive_upperlimits->FanMinimumPwm; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE: + od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; + od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1439,6 +1444,24 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_7_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_ENABLE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmEnable); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_ENABLE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1535,6 +1558,11 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp od_table->OverDriveTable.FanMode = FAN_MODE_AUTO; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + od_table->OverDriveTable.FanZeroRpmEnable = + boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1828,6 +1856,27 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_FAN_CURVE_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_ENABLE: + if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM enable setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmEnable = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); @@ -2094,7 +2143,9 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_RETRIEVE | OD_OPS_SUPPORT_FAN_TARGET_TEMPERATURE_SET | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | - OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET; + OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; } static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) @@ -2160,6 +2211,8 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanTargetTemperature; user_od_table->OverDriveTable.FanMinimumPwm = user_od_table_bak.OverDriveTable.FanMinimumPwm; + user_od_table->OverDriveTable.FanZeroRpmEnable = + user_od_table_bak.OverDriveTable.FanZeroRpmEnable; } smu_v13_0_7_set_supported_od_feature_mask(smu); From e89bd3615bc0883adc90209c1aac6d4bac7d221f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Oct 2024 22:54:38 -0400 Subject: [PATCH 70/78] drm/amdgpu/mes: fetch fw version from firmware header We need this prior to the firmware being loaded so fetch from the header. v2: fetch directly from the firmware v3: store both fw versions Reviewed-by: Srinivasan Shanmugam Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 6909af56fcad2c..b10383f83d73dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1594,6 +1594,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) char ucode_prefix[30]; char fw_name[50]; bool need_retry = false; + u32 *ucode_ptr; int r; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, @@ -1631,6 +1632,10 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) adev->mes.data_start_addr[pipe] = le32_to_cpu(mes_hdr->mes_data_start_addr_lo) | ((uint64_t)(le32_to_cpu(mes_hdr->mes_data_start_addr_hi)) << 32); + ucode_ptr = (u32 *)(adev->mes.fw[pipe]->data + + sizeof(union amdgpu_firmware_header)); + adev->mes.fw_version[pipe] = + le32_to_cpu(ucode_ptr[24]) & AMDGPU_MES_VERSION_MASK; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { int ucode, ucode_data; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 96788c0f42f1be..0684e482a204ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -75,6 +75,7 @@ struct amdgpu_mes { uint32_t sched_version; uint32_t kiq_version; + uint32_t fw_version[AMDGPU_MAX_MES_PIPES]; bool enable_legacy_queue_map; uint32_t total_max_queue; From 6bfe777e9267ee6d1c4712b52bb5d32e59508a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wolfgang=20M=C3=BCller?= Date: Tue, 29 Oct 2024 12:17:53 +0100 Subject: [PATCH 71/78] drm/amd/pm: add zero RPM stop temperature OD setting support for SMU13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Together with the feature to enable or disable zero RPM in the last commit, it also makes sense to expose the OD setting determining under which temperature the fan should stop if zero RPM is enabled. Reviewed-by: Kenneth Feng Signed-off-by: Wolfgang Müller Signed-off-by: Alex Deucher --- Documentation/gpu/amdgpu/thermal.rst | 6 ++ .../gpu/drm/amd/include/kgd_pp_interface.h | 2 + drivers/gpu/drm/amd/pm/amdgpu_pm.c | 65 +++++++++++++++++++ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 + drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 2 + drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 1 + .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 55 +++++++++++++++- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 55 +++++++++++++++- 8 files changed, 186 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/amdgpu/thermal.rst b/Documentation/gpu/amdgpu/thermal.rst index ec6c1f1d5a4a62..1768a106aab169 100644 --- a/Documentation/gpu/amdgpu/thermal.rst +++ b/Documentation/gpu/amdgpu/thermal.rst @@ -106,6 +106,12 @@ fan_zero_rpm_enable .. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c :doc: fan_zero_rpm_enable +fan_zero_rpm_stop_temperature +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c + :doc: fan_zero_rpm_stop_temperature + GFXOFF ====== diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 80e4b5a7df2344..bb27c0d2a9ae89 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -120,6 +120,7 @@ enum pp_clock_type { OD_FAN_TARGET_TEMPERATURE, OD_FAN_MINIMUM_PWM, OD_FAN_ZERO_RPM_ENABLE, + OD_FAN_ZERO_RPM_STOP_TEMP, }; enum amd_pp_sensors { @@ -201,6 +202,7 @@ enum PP_OD_DPM_TABLE_COMMAND { PP_OD_EDIT_FAN_TARGET_TEMPERATURE, PP_OD_EDIT_FAN_MINIMUM_PWM, PP_OD_EDIT_FAN_ZERO_RPM_ENABLE, + PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, }; struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index cb96f1f8ca8d9a..136e8193867c30 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4163,6 +4163,63 @@ static umode_t fan_zero_rpm_enable_visible(struct amdgpu_device *adev) return umode; } +/** + * DOC: fan_zero_rpm_stop_temperature + * + * The amdgpu driver provides a sysfs API for checking and adjusting the + * zero RPM stop temperature feature. + * + * Reading back the file shows you the current setting and the permitted + * ranges if changable. + * + * Writing an integer to the file, change the setting accordingly. + * + * When you have finished the editing, write "c" (commit) to the file to commit + * your changes. + * + * If you want to reset to the default value, write "r" (reset) to the file to + * reset them. + * + * This setting works only if the Zero RPM setting is enabled. It adjusts the + * temperature below which the fan can stop. + */ +static ssize_t fan_zero_rpm_stop_temp_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_retrieve_od_settings(adev, OD_FAN_ZERO_RPM_STOP_TEMP, buf); +} + +static ssize_t fan_zero_rpm_stop_temp_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + struct od_kobj *container = container_of(kobj, struct od_kobj, kobj); + struct amdgpu_device *adev = (struct amdgpu_device *)container->priv; + + return (ssize_t)amdgpu_distribute_custom_od_settings(adev, + PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP, + buf, + count); +} + +static umode_t fan_zero_rpm_stop_temp_visible(struct amdgpu_device *adev) +{ + umode_t umode = 0000; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE) + umode |= S_IRUSR | S_IRGRP | S_IROTH; + + if (adev->pm.od_feature_mask & OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET) + umode |= S_IWUSR; + + return umode; +} + static struct od_feature_set amdgpu_od_set = { .containers = { [0] = { @@ -4216,6 +4273,14 @@ static struct od_feature_set amdgpu_od_set = { .store = fan_zero_rpm_enable_store, }, }, + [6] = { + .name = "fan_zero_rpm_stop_temperature", + .ops = { + .is_visible = fan_zero_rpm_stop_temp_visible, + .show = fan_zero_rpm_stop_temp_show, + .store = fan_zero_rpm_stop_temp_store, + }, + }, }, }, }, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index b5daa12c0864a6..363af8990aa257 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -330,6 +330,8 @@ struct config_table_setting #define OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET BIT(9) #define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE BIT(10) #define OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET BIT(11) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE BIT(12) +#define OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET BIT(13) struct amdgpu_pm { struct mutex mutex; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 3458674fe864e9..64f9179595766a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2897,6 +2897,8 @@ static enum smu_clk_type smu_convert_to_smuclk(enum pp_clock_type type) clk_type = SMU_OD_FAN_MINIMUM_PWM; break; case OD_FAN_ZERO_RPM_ENABLE: clk_type = SMU_OD_FAN_ZERO_RPM_ENABLE; break; + case OD_FAN_ZERO_RPM_STOP_TEMP: + clk_type = SMU_OD_FAN_ZERO_RPM_STOP_TEMP; break; default: clk_type = SMU_CLK_COUNT; break; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index e0abb449a3a1e9..a299dc4a807149 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -314,6 +314,7 @@ enum smu_clk_type { SMU_OD_FAN_TARGET_TEMPERATURE, SMU_OD_FAN_MINIMUM_PWM, SMU_OD_FAN_ZERO_RPM_ENABLE, + SMU_OD_FAN_ZERO_RPM_STOP_TEMP, SMU_CLK_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index a7c62bb09aa0d7..80c6b1e523aae0 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -108,6 +108,7 @@ #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 #define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 +#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 #define LINK_SPEED_MAX 3 @@ -1135,6 +1136,10 @@ static void smu_v13_0_0_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: + od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; + od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1473,6 +1478,24 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_0_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmStopTemp); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1575,6 +1598,11 @@ static int smu_v13_0_0_od_restore_table_single(struct smu_context *smu, long inp boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + od_table->OverDriveTable.FanZeroRpmStopTemp = + boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1889,6 +1917,27 @@ static int smu_v13_0_0_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_0_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_0_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_0_od_restore_table_single(smu, input[0]); @@ -2161,7 +2210,9 @@ static void smu_v13_0_0_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | - OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; } static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) @@ -2229,6 +2280,8 @@ static int smu_v13_0_0_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanMinimumPwm; user_od_table->OverDriveTable.FanZeroRpmEnable = user_od_table_bak.OverDriveTable.FanZeroRpmEnable; + user_od_table->OverDriveTable.FanZeroRpmStopTemp = + user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; } smu_v13_0_0_set_supported_od_feature_mask(smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 1ba2089d865036..c5d3e25cc967e1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -84,6 +84,7 @@ #define PP_OD_FEATURE_FAN_TARGET_TEMPERATURE 9 #define PP_OD_FEATURE_FAN_MINIMUM_PWM 10 #define PP_OD_FEATURE_FAN_ZERO_RPM_ENABLE 11 +#define PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP 12 #define LINK_SPEED_MAX 3 @@ -1124,6 +1125,10 @@ static void smu_v13_0_7_get_od_setting_limits(struct smu_context *smu, od_min_setting = overdrive_lowerlimits->FanZeroRpmEnable; od_max_setting = overdrive_upperlimits->FanZeroRpmEnable; break; + case PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP: + od_min_setting = overdrive_lowerlimits->FanZeroRpmStopTemp; + od_max_setting = overdrive_upperlimits->FanZeroRpmStopTemp; + break; default: od_min_setting = od_max_setting = INT_MAX; break; @@ -1462,6 +1467,24 @@ static int smu_v13_0_7_print_clk_levels(struct smu_context *smu, min_value, max_value); break; + case SMU_OD_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_7_is_od_feature_supported(smu, + PP_OD_FEATURE_ZERO_FAN_BIT)) + break; + + size += sysfs_emit_at(buf, size, "FAN_ZERO_RPM_STOP_TEMPERATURE:\n"); + size += sysfs_emit_at(buf, size, "%d\n", + (int)od_table->OverDriveTable.FanZeroRpmStopTemp); + + size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &min_value, + &max_value); + size += sysfs_emit_at(buf, size, "ZERO_RPM_STOP_TEMPERATURE: %u %u\n", + min_value, max_value); + break; + case SMU_OD_RANGE: if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT) && !smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_UCLK_BIT) && @@ -1563,6 +1586,11 @@ static int smu_v13_0_7_od_restore_table_single(struct smu_context *smu, long inp boot_overdrive_table->OverDriveTable.FanZeroRpmEnable; od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + od_table->OverDriveTable.FanZeroRpmStopTemp = + boot_overdrive_table->OverDriveTable.FanZeroRpmStopTemp; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; default: dev_info(adev->dev, "Invalid table index: %ld\n", input); return -EINVAL; @@ -1877,6 +1905,27 @@ static int smu_v13_0_7_od_edit_dpm_table(struct smu_context *smu, od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); break; + case PP_OD_EDIT_FAN_ZERO_RPM_STOP_TEMP: + if (!smu_v13_0_7_is_od_feature_supported(smu, PP_OD_FEATURE_ZERO_FAN_BIT)) { + dev_warn(adev->dev, "Zero RPM setting not supported!\n"); + return -ENOTSUPP; + } + + smu_v13_0_7_get_od_setting_limits(smu, + PP_OD_FEATURE_FAN_ZERO_RPM_STOP_TEMP, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "zero RPM stop temperature setting(%ld) must be within [%d, %d]!\n", + input[0], minimum, maximum); + return -EINVAL; + } + + od_table->OverDriveTable.FanZeroRpmStopTemp = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= BIT(PP_OD_FEATURE_ZERO_FAN_BIT); + break; + case PP_OD_RESTORE_DEFAULT_TABLE: if (size == 1) { ret = smu_v13_0_7_od_restore_table_single(smu, input[0]); @@ -2145,7 +2194,9 @@ static void smu_v13_0_7_set_supported_od_feature_mask(struct smu_context *smu) OD_OPS_SUPPORT_FAN_MINIMUM_PWM_RETRIEVE | OD_OPS_SUPPORT_FAN_MINIMUM_PWM_SET | OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_RETRIEVE | - OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET; + OD_OPS_SUPPORT_FAN_ZERO_RPM_ENABLE_SET | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_RETRIEVE | + OD_OPS_SUPPORT_FAN_ZERO_RPM_STOP_TEMP_SET; } static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) @@ -2213,6 +2264,8 @@ static int smu_v13_0_7_set_default_od_settings(struct smu_context *smu) user_od_table_bak.OverDriveTable.FanMinimumPwm; user_od_table->OverDriveTable.FanZeroRpmEnable = user_od_table_bak.OverDriveTable.FanZeroRpmEnable; + user_od_table->OverDriveTable.FanZeroRpmStopTemp = + user_od_table_bak.OverDriveTable.FanZeroRpmStopTemp; } smu_v13_0_7_set_supported_od_feature_mask(smu); From 949d817c78cc6416d6e22f3f72a6960cd7412755 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 30 Oct 2024 14:41:41 +0530 Subject: [PATCH 72/78] drm/amdgpu/gfx11: Add cleaner shader for GFX11.0.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds the cleaner shader microcode for GFX11.0.3 GPUs. The cleaner shader is a piece of GPU code that is used to clear or initialize certain GPU resources, such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Clearing these resources is important for ensuring data isolation between different workloads running on the GPU. Without the cleaner shader, residual data from a previous workload could potentially be accessed by a subsequent workload, leading to data leaks and incorrect computation results. The cleaner shader microcode is represented as an array of 32-bit words (`gfx_11_0_3_cleaner_shader_hex`). This array is the binary representation of the cleaner shader code, which is written in a low-level GPU instruction set. When the cleaner shader feature is enabled, the AMDGPU driver loads this array into a specific location in the GPU memory. The GPU then reads this memory location to fetch and execute the cleaner shader instructions. The cleaner shader is executed automatically by the GPU at the end of each workload, before the next workload starts. This ensures that all GPU resources are in a clean state before the start of each workload. This addition is part of the cleaner shader feature implementation. The cleaner shader feature helps resource utilization by cleaning up GPU resources after they are used. It also enhances security and reliability by preventing data leaks between workloads. Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 17 +++ .../amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm | 118 ++++++++++++++++++ .../drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h | 56 +++++++++ 3 files changed, 191 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm create mode 100644 drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index cfe17a36b8ee4e..62e4c446793d4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -46,6 +46,7 @@ #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" +#include "gfx_v11_0_cleaner_shader.h" #include "gfx_v11_0_3.h" #include "nbio_v4_3.h" #include "mes_v11_0.h" @@ -1579,8 +1580,24 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(11, 0, 3): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 2280 && + adev->gfx.pfp_fw_version >= 2370 && + adev->gfx.mec_fw_version >= 2450 && + adev->mes.fw_version[0] >= 99) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; + break; } /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm new file mode 100644 index 00000000000000..9b90b66368c7ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3_cleaner_shader.asm @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// Navi3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + +shader main + asic(GFX11) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + + // + // CLEAR VGPRs + // + s_mov_b32 m0, 0x00000058 // Loop 96/8=12 times (loop unrolled for performance) + +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_sub_u32 m0, m0, 8 + s_cbranch_scc0 label_0005 + // + // + + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h new file mode 100644 index 00000000000000..3218cc04f543e4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_cleaner_shader.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_11_0_3 */ +static const u32 gfx_11_0_3_cleaner_shader_hex[] = { + 0xb0804006, 0xbe8200ff, + 0x00000058, 0xbefd0080, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefd0002, 0x80828802, + 0xbfa1fff5, 0xbe8200ff, + 0x80000000, 0x8b020002, + 0xbfa10012, 0xbefe00c1, + 0xbeff00c1, 0xd71f0001, + 0x0001007f, 0xd7200001, + 0x0002027e, 0x16020288, + 0xbe8200bf, 0xbefd00c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd7006a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbfa1fff7, 0xbefd00ff, + 0x00000068, 0xbe804280, + 0xbe814280, 0xbe824280, + 0xbe834280, 0x80fd847d, + 0xbfa1fffa, 0xbeea0180, + 0xbeec0180, 0xbeee0180, + 0xbef00180, 0xbef20180, + 0xbef40180, 0xbef60180, + 0xbef80180, 0xbefa0180, + 0xbfb00000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; From 990c4f580742de7bb78fa57420ffd182fc3ab4cd Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 4 Nov 2024 10:36:13 +0530 Subject: [PATCH 73/78] drm/amdgpu: Fix DPX valid mode check on GC 9.4.3 For DPX mode, the number of memory partitions supported should be less than or equal to 2. Fixes: 1589c82a1085 ("drm/amdgpu: Check memory ranges for valid xcp mode") Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index fccccea0d2d0eb..e157d6d857b6ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -548,7 +548,7 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, case AMDGPU_SPX_PARTITION_MODE: return adev->gmc.num_mem_partitions == 1 && num_xcc > 0; case AMDGPU_DPX_PARTITION_MODE: - return adev->gmc.num_mem_partitions != 8 && (num_xcc % 4) == 0; + return adev->gmc.num_mem_partitions <= 2 && (num_xcc % 4) == 0; case AMDGPU_TPX_PARTITION_MODE: return (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 3) && From e2e97435783979124ba92d6870415c57ecfef6a5 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 31 Oct 2024 10:59:17 +0800 Subject: [PATCH 74/78] drm/amdgpu: set the right AMDGPU sg segment limitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver needs to set the correct max_segment_size; otherwise debug_dma_map_sg() will complain about the over-mapping of the AMDGPU sg length as following: WARNING: CPU: 6 PID: 1964 at kernel/dma/debug.c:1178 debug_dma_map_sg+0x2dc/0x370 [ 364.049444] Modules linked in: veth amdgpu(OE) amdxcp drm_exec gpu_sched drm_buddy drm_ttm_helper ttm(OE) drm_suballoc_helper drm_display_helper drm_kms_helper i2c_algo_bit rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace netfs xt_conntrack xt_MASQUERADE nf_conntrack_netlink xfrm_user xfrm_algo iptable_nat xt_addrtype iptable_filter br_netfilter nvme_fabrics overlay nfnetlink_cttimeout nfnetlink openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bridge stp llc amd_atl intel_rapl_msr intel_rapl_common sunrpc sch_fq_codel snd_hda_codec_realtek snd_hda_codec_generic snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg edac_mce_amd binfmt_misc snd_hda_codec snd_pci_acp6x snd_hda_core snd_acp_config snd_hwdep snd_soc_acpi kvm_amd snd_pcm kvm snd_seq_midi snd_seq_midi_event crct10dif_pclmul ghash_clmulni_intel sha512_ssse3 snd_rawmidi sha256_ssse3 sha1_ssse3 aesni_intel snd_seq nls_iso8859_1 crypto_simd snd_seq_device cryptd snd_timer rapl input_leds snd [ 364.049532] ipmi_devintf wmi_bmof ccp serio_raw k10temp sp5100_tco soundcore ipmi_msghandler cm32181 industrialio mac_hid msr parport_pc ppdev lp parport drm efi_pstore ip_tables x_tables pci_stub crc32_pclmul nvme ahci libahci i2c_piix4 r8169 nvme_core i2c_designware_pci realtek i2c_ccgx_ucsi video wmi hid_generic cdc_ether usbnet usbhid hid r8152 mii [ 364.049576] CPU: 6 PID: 1964 Comm: rocminfo Tainted: G OE 6.10.0-custom #492 [ 364.049579] Hardware name: AMD Majolica-RN/Majolica-RN, BIOS RMJ1009A 06/13/2021 [ 364.049582] RIP: 0010:debug_dma_map_sg+0x2dc/0x370 [ 364.049585] Code: 89 4d b8 e8 36 b1 86 00 8b 4d b8 48 8b 55 b0 44 8b 45 a8 4c 8b 4d a0 48 89 c6 48 c7 c7 00 4b 74 bc 4c 89 4d b8 e8 b4 73 f3 ff <0f> 0b 4c 8b 4d b8 8b 15 c8 2c b8 01 85 d2 0f 85 ee fd ff ff 8b 05 [ 364.049588] RSP: 0018:ffff9ca600b57ac0 EFLAGS: 00010286 [ 364.049590] RAX: 0000000000000000 RBX: ffff88b7c132b0c8 RCX: 0000000000000027 [ 364.049592] RDX: ffff88bb0f521688 RSI: 0000000000000001 RDI: ffff88bb0f521680 [ 364.049594] RBP: ffff9ca600b57b20 R08: 000000000000006f R09: ffff9ca600b57930 [ 364.049596] R10: ffff9ca600b57928 R11: ffffffffbcb46328 R12: 0000000000000000 [ 364.049597] R13: 0000000000000001 R14: ffff88b7c19c0700 R15: ffff88b7c9059800 [ 364.049599] FS: 00007fb2d3516e80(0000) GS:ffff88bb0f500000(0000) knlGS:0000000000000000 [ 364.049601] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 364.049603] CR2: 000055610bd03598 CR3: 00000001049f6000 CR4: 0000000000350ef0 [ 364.049605] Call Trace: [ 364.049607] [ 364.049609] ? show_regs+0x6d/0x80 [ 364.049614] ? __warn+0x8c/0x140 [ 364.049618] ? debug_dma_map_sg+0x2dc/0x370 [ 364.049621] ? report_bug+0x193/0x1a0 [ 364.049627] ? handle_bug+0x46/0x80 [ 364.049631] ? exc_invalid_op+0x1d/0x80 [ 364.049635] ? asm_exc_invalid_op+0x1f/0x30 [ 364.049642] ? debug_dma_map_sg+0x2dc/0x370 [ 364.049647] __dma_map_sg_attrs+0x90/0xe0 [ 364.049651] dma_map_sgtable+0x25/0x40 [ 364.049654] amdgpu_bo_move+0x59a/0x850 [amdgpu] [ 364.049935] ? srso_return_thunk+0x5/0x5f [ 364.049939] ? amdgpu_ttm_tt_populate+0x5d/0xc0 [amdgpu] [ 364.050095] ttm_bo_handle_move_mem+0xc3/0x180 [ttm] [ 364.050103] ttm_bo_validate+0xc1/0x160 [ttm] [ 364.050108] ? amdgpu_ttm_tt_get_user_pages+0xe5/0x1b0 [amdgpu] [ 364.050263] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0xa12/0xc90 [amdgpu] [ 364.050473] kfd_ioctl_alloc_memory_of_gpu+0x16b/0x3b0 [amdgpu] [ 364.050680] kfd_ioctl+0x3c2/0x530 [amdgpu] [ 364.050866] ? __pfx_kfd_ioctl_alloc_memory_of_gpu+0x10/0x10 [amdgpu] [ 364.051054] ? srso_return_thunk+0x5/0x5f [ 364.051057] ? tomoyo_file_ioctl+0x20/0x30 [ 364.051063] __x64_sys_ioctl+0x9c/0xd0 [ 364.051068] x64_sys_call+0x1219/0x20d0 [ 364.051073] do_syscall_64+0x51/0x120 [ 364.051077] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 364.051081] RIP: 0033:0x7fb2d2f1a94f Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0637414fc70e02..9f922ec50ea2dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1851,6 +1851,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) mutex_init(&adev->mman.gtt_window_lock); + dma_set_max_seg_size(adev->dev, UINT_MAX); /* No others user of address space so set it to 0 */ r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev, adev_to_drm(adev)->anon_inode->i_mapping, From bc566781845bced474109289f6fc03f669efedd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Aug 2024 14:00:34 +0200 Subject: [PATCH 75/78] drm/amdgpu: stop syncing PRT map operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Requested by both Bas and Friedrich. Mapping PTEs as PRT doesn't need to sync for anything. Signed-off-by: Christian König Reviewed-by: Friedrich Vock Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b855810ee86e8..8d9bf7a0857fde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1161,7 +1161,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, int r; amdgpu_sync_create(&sync); - if (clear || !bo) { + if (clear) { mem = NULL; /* Implicitly sync to command submissions in the same VM before @@ -1176,6 +1176,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (r) goto error_free; } + } else if (!bo) { + mem = NULL; + + /* PRT map operations don't need to sync to anything. */ } else { struct drm_gem_object *obj = &bo->tbo.base; From c0cfd2e652553d607b910be47d0cc5a7f3a78641 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:37:52 -0400 Subject: [PATCH 76/78] drm/amdgpu: Adjust debugfs register access permissions Regular users shouldn't have read access. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index c446bfccea590f..a635a0a62e97e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1648,7 +1648,7 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], - S_IFREG | 0444, root, + S_IFREG | 0400, root, adev, debugfs_regs[i]); if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); From 7ba9395430f611cfc101b1c2687732baafa239d5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:39:36 -0400 Subject: [PATCH 77/78] drm/amdgpu: Adjust debugfs eviction and IB access permissions Users should not be able to run these. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a635a0a62e97e8..0cb3ba448c6cf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2105,11 +2105,11 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_securedisplay_debugfs_init(adev); amdgpu_fw_attestation_debugfs_init(adev); - debugfs_create_file("amdgpu_evict_vram", 0444, root, adev, + debugfs_create_file("amdgpu_evict_vram", 0400, root, adev, &amdgpu_evict_vram_fops); - debugfs_create_file("amdgpu_evict_gtt", 0444, root, adev, + debugfs_create_file("amdgpu_evict_gtt", 0400, root, adev, &amdgpu_evict_gtt_fops); - debugfs_create_file("amdgpu_test_ib", 0444, root, adev, + debugfs_create_file("amdgpu_test_ib", 0400, root, adev, &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); From f5d873f5825b40d886d03bd2aede91d4cf002434 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2024 16:52:08 -0400 Subject: [PATCH 78/78] drm/amdgpu: add missing size check in amdgpu_debugfs_gprwave_read() Avoid a possible buffer overflow if size is larger than 4K. Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 0cb3ba448c6cf1..a68338cb7b4afb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -402,7 +402,7 @@ static ssize_t amdgpu_debugfs_gprwave_read(struct file *f, char __user *buf, siz int r; uint32_t *data, x; - if (size & 0x3 || *pos & 0x3) + if (size > 4096 || size & 0x3 || *pos & 0x3) return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev);