Skip to content

Commit

Permalink
Merge tag 'amd-drm-fixes-6.9-2024-03-21' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-fixes-6.9-2024-03-21:

amdgpu:
- Freesync fixes
- UAF IOCTL fixes
- Fix mmhub client ID mapping
- IH 7.0 fix
- DML2 fixes
- VCN 4.0.6 fix
- GART bind fix
- GPU reset fix
- SR-IOV fix
- OD table handling fixes
- Fix TA handling on boards without display hardware
- DML1 fix
- ABM fix
- eDP panel fix
- DPPCLK fix
- HDCP fix
- Revert incorrect error case handling in ioremap
- VPE fix
- HDMI fixes
- SDMA 4.4.2 fix
- Other misc fixes

amdkfd:
- Fix duplicate BO handling in process restore

Signed-off-by: Dave Airlie <[email protected]>

From: Alex Deucher <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
airlied committed Mar 22, 2024
2 parents 921074a + bc55c34 commit cafd86c
Show file tree
Hide file tree
Showing 64 changed files with 456 additions and 215 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
{
int ret;

if (!adev->kfd.init_complete)
if (!adev->kfd.init_complete || adev->kfd.client.dev)
return 0;

ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
Expand Down
14 changes: 10 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2869,14 +2869,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *

mutex_lock(&process_info->lock);

drm_exec_init(&exec, 0, 0);
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
if (unlikely(ret)) {
pr_err("Locking VM PD failed, ret: %d\n", ret);
goto ttm_reserve_fail;
}
}

/* Reserve all BOs and page tables/directory. Add all BOs from
Expand All @@ -2889,8 +2891,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
gobj = &mem->bo->tbo.base;
ret = drm_exec_prepare_obj(&exec, gobj, 1);
drm_exec_retry_on_contention(&exec);
if (unlikely(ret))
if (unlikely(ret)) {
pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
goto ttm_reserve_fail;
}
}
}

Expand Down Expand Up @@ -2950,8 +2954,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
* validations above would invalidate DMABuf imports again.
*/
ret = process_validate_vms(process_info, &exec.ticket);
if (ret)
if (ret) {
pr_debug("Validating VMs failed, ret: %d\n", ret);
goto validate_map_fail;
}

/* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
Expand Down
16 changes: 6 additions & 10 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -4040,10 +4040,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
* early on during init and before calling to RREG32.
*/
adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
if (!adev->reset_domain) {
r = -ENOMEM;
goto unmap_memory;
}
if (!adev->reset_domain)
return -ENOMEM;

/* detect hw virtualization here */
amdgpu_detect_virtualization(adev);
Expand All @@ -4053,20 +4051,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
r = amdgpu_device_get_job_timeout_settings(adev);
if (r) {
dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
goto unmap_memory;
return r;
}

amdgpu_device_set_mcbp(adev);

/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
goto unmap_memory;
return r;

/* Get rid of things like offb */
r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
if (r)
goto unmap_memory;
return r;

/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
Expand All @@ -4076,7 +4074,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->gmc.xgmi.supported) {
r = adev->gfxhub.funcs->get_xgmi_info(adev);
if (r)
goto unmap_memory;
return r;
}

/* enable PCIE atomic ops */
Expand Down Expand Up @@ -4345,8 +4343,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
failed:
amdgpu_vf_error_trans_all(adev);

unmap_memory:
iounmap(adev->rmmio);
return r;
}

Expand Down
5 changes: 4 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2479,8 +2479,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
}
for (i = 0; i < mgpu_info.num_dgpu; i++) {
adev = mgpu_info.gpu_ins[i].adev;
if (!adev->kfd.init_complete)
if (!adev->kfd.init_complete) {
kgd2kfd_init_zone_device(adev);
amdgpu_amdkfd_device_init(adev);
amdgpu_amdkfd_drm_client_create(adev);
}
amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
DRM_ERROR("KCQ enable failed\n");
DRM_ERROR("KGQ enable failed\n");

return r;
}
Expand Down
20 changes: 16 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,25 @@ static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
*/
int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
{
int r;

if (bo->kfd_bo)
return mmu_interval_notifier_insert(&bo->notifier, current->mm,
r = mmu_interval_notifier_insert(&bo->notifier, current->mm,
addr, amdgpu_bo_size(bo),
&amdgpu_hmm_hsa_ops);
return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
amdgpu_bo_size(bo),
&amdgpu_hmm_gfx_ops);
else
r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
amdgpu_bo_size(bo),
&amdgpu_hmm_gfx_ops);
if (r)
/*
* Make sure amdgpu_hmm_unregister() doesn't call
* mmu_interval_notifier_remove() when the notifier isn't properly
* initialized.
*/
bo->notifier.mm = NULL;

return r;
}

/**
Expand Down
18 changes: 18 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,10 @@ static int psp_hdcp_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;

/* bypass hdcp initialization if dmu is harvested */
if (!amdgpu_device_has_display_hardware(psp->adev))
return 0;

if (!psp->hdcp_context.context.bin_desc.size_bytes ||
!psp->hdcp_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n");
Expand Down Expand Up @@ -1862,6 +1866,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;

if (!psp->hdcp_context.context.initialized)
return 0;

return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context);
}

Expand Down Expand Up @@ -1897,6 +1904,10 @@ static int psp_dtm_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;

/* bypass dtm initialization if dmu is harvested */
if (!amdgpu_device_has_display_hardware(psp->adev))
return 0;

if (!psp->dtm_context.context.bin_desc.size_bytes ||
!psp->dtm_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n");
Expand Down Expand Up @@ -1929,6 +1940,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
if (amdgpu_sriov_vf(psp->adev))
return 0;

if (!psp->dtm_context.context.initialized)
return 0;

return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context);
}

Expand Down Expand Up @@ -2063,6 +2077,10 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev))
return 0;

/* bypass securedisplay initialization if dmu is harvested */
if (!amdgpu_device_has_display_hardware(psp->adev))
return 0;

if (!psp->securedisplay_context.context.bin_desc.size_bytes ||
!psp->securedisplay_context.context.bin_desc.start_addr) {
dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n");
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
}
gtt->bound = true;
}

/*
Expand Down
38 changes: 25 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
#define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin"
#define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin"
#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin"
#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin"
#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin"

MODULE_FIRMWARE(FIRMWARE_RAVEN);
Expand All @@ -85,6 +86,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_3);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_4);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_5);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6);
MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1);
MODULE_FIRMWARE(FIRMWARE_VCN5_0_0);

static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
Expand All @@ -93,14 +95,22 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev)
{
char ucode_prefix[30];
char fw_name[40];
int r;
int r, i;

amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name);
if (r)
amdgpu_ucode_release(&adev->vcn.fw);
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix));
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix);
if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6) &&
i == 1) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i);
}

r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], fw_name);
if (r) {
amdgpu_ucode_release(&adev->vcn.fw[i]);
return r;
}
}
return r;
}

Expand Down Expand Up @@ -141,7 +151,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
}
}

hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data;
adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);

/* Bit 20-23, it is encode major and non-zero for new naming convention.
Expand Down Expand Up @@ -256,9 +266,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)

for (i = 0; i < adev->vcn.num_enc_rings; ++i)
amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);

amdgpu_ucode_release(&adev->vcn.fw[j]);
}

amdgpu_ucode_release(&adev->vcn.fw);
mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
mutex_destroy(&adev->vcn.vcn_pg_lock);

Expand Down Expand Up @@ -354,11 +365,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
const struct common_firmware_header *hdr;
unsigned int offset;

hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
memcpy_toio(adev->vcn.inst[i].cpu_addr,
adev->vcn.fw[i]->data + offset,
le32_to_cpu(hdr->ucode_size_bytes));
drm_dev_exit(idx);
}
Expand Down Expand Up @@ -1043,19 +1055,19 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
const struct common_firmware_header *hdr;

hdr = (const struct common_firmware_header *)adev->vcn.fw->data;

for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
if (adev->vcn.harvest_config & (1 << i))
continue;

hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data;
/* currently only support 2 FW instances */
if (i >= 2) {
dev_info(adev->dev, "More then 2 VCN FW instances!\n");
break;
}
idx = AMDGPU_UCODE_ID_VCN + i;
adev->firmware.ucode[idx].ucode_id = idx;
adev->firmware.ucode[idx].fw = adev->vcn.fw;
adev->firmware.ucode[idx].fw = adev->vcn.fw[i];
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ struct amdgpu_vcn_ras {
struct amdgpu_vcn {
unsigned fw_version;
struct delayed_work idle_work;
const struct firmware *fw; /* VCN firmware */
const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */
unsigned num_enc_rings;
enum amd_powergating_state cur_state;
bool indirect_sram;
Expand Down
3 changes: 0 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
Original file line number Diff line number Diff line change
Expand Up @@ -575,9 +575,6 @@ static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
{
unsigned int ret;

if (ring->adev->vpe.collaborate_mode)
return ~0;

amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
amdgpu_ring_write(ring, lower_32_bits(addr));
amdgpu_ring_write(ring, upper_32_bits(addr));
Expand Down
8 changes: 6 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -3657,6 +3657,9 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev)

static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev))
return;

switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
soc15_program_register_sequence(adev,
Expand Down Expand Up @@ -4982,7 +4985,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev)
u32 tmp;
int i;

WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
if (!amdgpu_sriov_vf(adev))
WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);

gfx_v10_0_setup_rb(adev);
gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info);
Expand Down Expand Up @@ -7163,7 +7167,7 @@ static int gfx_v10_0_hw_init(void *handle)
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0))
gfx_v10_3_program_pbb_mode(adev);

if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev))
gfx_v10_3_set_power_brake_sequence(adev);

return r;
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;

if (amdgpu_sriov_vf(adev))
return;

/* Program the AGP BAR */
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
Expand Down
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,12 @@ static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev,
tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);

/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
* can be detected.
*/
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp);
out:
return (wptr & ih->ptr_mask);
}
Expand Down
Loading

0 comments on commit cafd86c

Please sign in to comment.