Skip to content

Commit

Permalink
Merge tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm…
Browse files Browse the repository at this point in the history
…/kernel

Pull drm fixes from Dave Airlie:
 "Regular fixes for the week to end the merge window, i915 and xe have a
  few each, amdgpu makes up most of it with a bunch of SR-IOV related
  fixes amongst others.

  i915:
   - Fix BMG support to UHBR13.5
   - Two PSR fixes
   - Fix colorimetry detection for DP

  xe:
   - Fix macro for checking minimum GuC version
   - Fix CCS offset calculation for some BMG SKUs
   - Fix locking on memory usage reporting via fdinfo and BO destroy
   - Fix GPU page fault handler on a closed VM
   - Fix overflow in oa batch buffer

  amdgpu:
   - MES 12 fix
   - KFD fence sync fix
   - SR-IOV fixes
   - VCN 4.0.6 fix
   - SDMA 7.x fix
   - Bump driver version to note cleared VRAM support
   - SWSMU fix
   - CU occupancy logic fix
   - SDMA queue fix"

* tag 'drm-next-2024-09-28' of https://gitlab.freedesktop.org/drm/kernel: (79 commits)
  drm/amd/pm: update workload mask after the setting
  drm/amdgpu: bump driver version for cleared VRAM
  drm/amdgpu: fix vbios fetching for SR-IOV
  drm/amdgpu: fix PTE copy corruption for sdma 7
  drm/amdkfd: Add SDMA queue quantum support for GFX12
  drm/amdgpu/vcn: enable AV1 on both instances
  drm/amdkfd: Fix CU occupancy for GFX 9.4.3
  drm/amdkfd: Update logic for CU occupancy calculations
  drm/amdgpu: skip coredump after job timeout in SRIOV
  drm/amdgpu: sync to KFD fences before clearing PTEs
  drm/amdgpu/mes12: set enable_level_process_quantum_check
  drm/i915/dp: Fix colorimetry detection
  drm/amdgpu/mes12: reduce timeout
  drm/amdgpu/mes11: reduce timeout
  drm/amdgpu: use GEM references instead of TTMs v2
  drm/amd/display: Allow backlight to go below `AMDGPU_DM_DEFAULT_MIN_BACKLIGHT`
  drm/amd/display: Fix kdoc entry for 'tps' in 'dc_process_dmub_dpia_set_tps_notification'
  drm/amdgpu: update golden regs for gfx12
  drm/amdgpu: clean up vbios fetching code
  drm/amd/display: handle nulled pipe context in DCE110's set_drr()
  ...
  • Loading branch information
torvalds committed Sep 28, 2024
2 parents 894b3c3 + e7268dd commit 994aeac
Show file tree
Hide file tree
Showing 111 changed files with 1,097 additions and 862 deletions.
4 changes: 0 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -1083,10 +1083,6 @@ struct amdgpu_device {

struct amdgpu_virt virt;

/* link all shadow bo */
struct list_head shadow_list;
struct mutex shadow_list_lock;

/* record hw reset is performed */
bool has_hw_reset;
u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
return -EINVAL;
}

/* udpate aca bank to aca source error_cache first */
/* update aca bank to aca source error_cache first */
ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
if (ret)
return ret;
Expand Down
108 changes: 45 additions & 63 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -950,28 +950,30 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
* @inst: xcc's instance number on a multi-XCC setup
*/
static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
int *wave_cnt, int *vmid, uint32_t inst)
struct kfd_cu_occupancy *queue_cnt, uint32_t inst)
{
int pipe_idx;
int queue_slot;
unsigned int reg_val;

unsigned int wave_cnt;
/*
* Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID
* parameters to read out waves in flight. Get VMID if there are
* non-zero waves in flight.
*/
*vmid = 0xFF;
*wave_cnt = 0;
pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe;
queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst);
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, mmSPI_CSQ_WF_ACTIVE_COUNT_0) +
queue_slot);
*wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (*wave_cnt != 0)
*vmid = (RREG32_SOC15(GC, inst, mmCP_HQD_VMID) &
CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT;
soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst));
reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst),
mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot);
wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK;
if (wave_cnt != 0) {
queue_cnt->wave_cnt += wave_cnt;
queue_cnt->doorbell_off =
(RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) &
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >>
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
}
}

/**
Expand All @@ -981,9 +983,8 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* or more queues running and submitting waves to compute units.
*
* @adev: Handle of device from which to get number of waves in flight
* @pasid: Identifies the process for which this query call is invoked
* @pasid_wave_cnt: Output parameter updated with number of waves in flight that
* belong to process with given pasid
* @cu_occupancy: Array that gets filled with wave_cnt and doorbell offset
* for comparison later.
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
* @inst: xcc's instance number on a multi-XCC setup
Expand Down Expand Up @@ -1011,34 +1012,28 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
* number of waves that are in flight for the queue at specified index. The
* index ranges from 0 to 7.
*
* If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID
* of the wave(s).
* If non-zero waves are in flight, store the corresponding doorbell offset
* of the queue, along with the wave count.
*
* Determine if VMID from above step maps to pasid provided as parameter. If
* it matches agrregate the wave count. That the VMID will not match pasid is
* a normal condition i.e. a device is expected to support multiple queues
* from multiple proceses.
* Determine if the queue belongs to the process by comparing the doorbell
* offset against the process's queues. If it matches, aggregate the wave
* count for the process.
*
* Reading registers referenced above involves programming GRBM appropriately
*/
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst)
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
struct kfd_cu_occupancy *cu_occupancy,
int *max_waves_per_cu, uint32_t inst)
{
int qidx;
int vmid;
int se_idx;
int sh_idx;
int se_cnt;
int sh_cnt;
int wave_cnt;
int queue_map;
int pasid_tmp;
int max_queue_cnt;
int vmid_wave_cnt = 0;
DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES);

lock_spi_csq_mutexes(adev);
soc15_grbm_select(adev, 1, 0, 0, 0, inst);
soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst));

/*
* Iterate through the shader engines and arrays of the device
Expand All @@ -1048,51 +1043,38 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
AMDGPU_MAX_QUEUES);
max_queue_cnt = adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
sh_cnt = adev->gfx.config.max_sh_per_se;
se_cnt = adev->gfx.config.max_shader_engines;
for (se_idx = 0; se_idx < se_cnt; se_idx++) {
for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst);
queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS);

/*
* Assumption: queue map encodes following schema: four
* pipes per each micro-engine, with each pipe mapping
* eight queues. This schema is true for GFX9 devices
* and must be verified for newer device families
*/
for (qidx = 0; qidx < max_queue_cnt; qidx++) {
/* Skip qeueus that are not associated with
* compute functions
*/
if (!test_bit(qidx, cp_queue_bitmap))
continue;

amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, inst);
queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS);
if (!(queue_map & (1 << qidx)))
continue;

/*
* Assumption: queue map encodes following schema: four
* pipes per each micro-engine, with each pipe mapping
* eight queues. This schema is true for GFX9 devices
* and must be verified for newer device families
*/
for (qidx = 0; qidx < max_queue_cnt; qidx++) {

/* Skip qeueus that are not associated with
* compute functions
*/
if (!test_bit(qidx, cp_queue_bitmap))
continue;

if (!(queue_map & (1 << qidx)))
continue;

/* Get number of waves in flight and aggregate them */
get_wave_count(adev, qidx, &wave_cnt, &vmid,
inst);
if (wave_cnt != 0) {
pasid_tmp =
RREG32(SOC15_REG_OFFSET(OSSSYS, inst,
mmIH_VMID_0_LUT) + vmid);
if (pasid_tmp == pasid)
vmid_wave_cnt += wave_cnt;
}
}
/* Get number of waves in flight and aggregate them */
get_wave_count(adev, qidx, &cu_occupancy[qidx],
inst);
}
}

amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst);
soc15_grbm_select(adev, 0, 0, 0, 0, inst);
soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst));
unlock_spi_csq_mutexes(adev);

/* Update the output parameters and return */
*pasid_wave_cnt = vmid_wave_cnt;
*max_waves_per_cu = adev->gfx.cu_info.simd_per_cu *
adev->gfx.cu_info.max_waves_per_simd;
}
Expand Down
5 changes: 3 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
struct kfd_cu_occupancy *cu_occupancy,
int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,7 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
}
}

ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
ret = amdgpu_bo_pin(bo, domain);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);

Expand Down
64 changes: 51 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,9 @@ static bool check_atom_bios(uint8_t *bios, size_t size)
* part of the system bios. On boot, the system bios puts a
* copy of the igp rom at the start of vram if a discrete card is
* present.
* For SR-IOV, the vbios image is also put in VRAM in the VF.
*/
static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev)
{
uint8_t __iomem *bios;
resource_size_t vram_base;
Expand Down Expand Up @@ -284,10 +285,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
acpi_status status;
bool found = false;

/* ATRM is for the discrete card only */
if (adev->flags & AMD_IS_APU)
return false;

/* ATRM is for on-platform devices only */
if (dev_is_removable(&adev->pdev->dev))
return false;
Expand Down Expand Up @@ -343,11 +340,8 @@ static inline bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)

static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev)
{
if (adev->flags & AMD_IS_APU)
return igp_read_bios_from_vram(adev);
else
return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
false : amdgpu_asic_read_disabled_bios(adev);
return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ?
false : amdgpu_asic_read_disabled_bios(adev);
}

#ifdef CONFIG_ACPI
Expand Down Expand Up @@ -414,7 +408,36 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev)
}
#endif

bool amdgpu_get_bios(struct amdgpu_device *adev)
static bool amdgpu_get_bios_apu(struct amdgpu_device *adev)
{
if (amdgpu_acpi_vfct_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VFCT\n");
goto success;
}

if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}

if (amdgpu_read_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ROM BAR\n");
goto success;
}

if (amdgpu_read_platform_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from platform\n");
goto success;
}

dev_err(adev->dev, "Unable to locate a BIOS ROM\n");
return false;

success:
return true;
}

static bool amdgpu_get_bios_dgpu(struct amdgpu_device *adev)
{
if (amdgpu_atrm_get_bios(adev)) {
dev_info(adev->dev, "Fetched VBIOS from ATRM\n");
Expand All @@ -426,7 +449,8 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
goto success;
}

if (igp_read_bios_from_vram(adev)) {
/* this is required for SR-IOV */
if (amdgpu_read_bios_from_vram(adev)) {
dev_info(adev->dev, "Fetched VBIOS from VRAM BAR\n");
goto success;
}
Expand Down Expand Up @@ -455,10 +479,24 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
return false;

success:
adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;
return true;
}

bool amdgpu_get_bios(struct amdgpu_device *adev)
{
bool found;

if (adev->flags & AMD_IS_APU)
found = amdgpu_get_bios_apu(adev);
else
found = amdgpu_get_bios_dgpu(adev);

if (found)
adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10;

return found;
}

/* helper function for soc15 and onwards to read bios from rom */
bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
u8 *bios, u32 length_bytes)
Expand Down
Loading

0 comments on commit 994aeac

Please sign in to comment.