Skip to content

Commit

Permalink
[AMDGPU] Move register related queries to subtarget class
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D29318


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294440 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
kzhuravl committed Feb 8, 2017
1 parent 61d1158 commit c478d35
Show file tree
Hide file tree
Showing 7 changed files with 299 additions and 315 deletions.
52 changes: 27 additions & 25 deletions lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -473,26 +473,12 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ExtraSGPRs = 6;
}

// Record first reserved register and reserved register count fields, and
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
// requested.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);

// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
// attribute was requested.
if (STM.debuggerEmitPrologue()) {
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
RI->getHWRegIndex(MFI->getScratchRSrcReg());
}
unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);

// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
Expand All @@ -507,23 +493,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

// Account for extra SGPRs and VGPRs reserved for debugger use.
MaxSGPR += ExtraSGPRs;
MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
MaxVGPR += ExtraVGPRs;

// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
ProgInfo.NumVGPR = MaxVGPR + 1;

// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));

if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
if (ProgInfo.NumSGPR > MaxNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm to use the
// registers which are usually reserved for vcc etc.
Expand Down Expand Up @@ -560,13 +546,29 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

// SGPRBlocks is actual number of SGPR blocks minus 1.
ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
RI->getSGPRAllocGranule());
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
STM.getSGPRAllocGranule());
ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPRAllocGranule() - 1;

// VGPRBlocks is actual number of VGPR blocks minus 1.
ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
RI->getVGPRAllocGranule());
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
STM.getVGPRAllocGranule());
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPRAllocGranule() - 1;

// Record first reserved register and reserved register count fields, and
// update max register counts if "amdgpu-debugger-reserve-regs" attribute was
// requested.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);

// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
// attribute was requested.
if (STM.debuggerEmitPrologue()) {
ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
ProgInfo.DebuggerPrivateSegmentBufferSGPR =
RI->getHWRegIndex(MFI->getScratchRSrcReg());
}

// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
Expand Down
178 changes: 173 additions & 5 deletions lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//

#include "AMDGPUSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/Target/TargetFrameLowering.h"
Expand Down Expand Up @@ -322,12 +323,179 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}

unsigned SISubtarget::getMaxNumSGPRs() const {
unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 0;
case 8: return 81;
default: return 97;
}
} else {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 49;
case 8: return 57;
case 7: return 65;
case 6: return 73;
case 5: return 81;
default: return 97;
}
}
}

unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
bool Addressable) const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WavesPerEU) {
case 0: return 80;
case 10: return 80;
case 9: return 80;
case 8: return 96;
default: return Addressable ? getAddressableNumSGPRs() : 112;
}
} else {
switch (WavesPerEU) {
case 0: return 48;
case 10: return 48;
case 9: return 56;
case 8: return 64;
case 7: return 72;
case 6: return 80;
case 5: return 96;
default: return getAddressableNumSGPRs();
}
}
}

unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
return 4; // FLAT_SCRATCH, VCC (in that order).
}

if (isXNACKEnabled())
return 4; // XNACK, VCC (in that order).
return 2; // VCC.
}

unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

// Compute maximum number of SGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);

// Check if maximum number of SGPRs was explicitly requested using
// "amdgpu-num-sgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-sgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-sgpr", MaxNumSGPRs);

// Make sure requested value does not violate subtarget's specifications.
if (Requested && (Requested <= getReservedNumSGPRs(MF)))
Requested = 0;

// If more SGPRs are required to support the input user/system SGPRs,
// increase to accommodate them.
//
// FIXME: This really ends up using the requested number of SGPRs + number
// of reserved special registers in total. Theoretically you could re-use
// the last input registers for these special registers, but this would
// require a lot of complexity to deal with the weird aliasing.
unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
if (Requested && Requested < InputNumSGPRs)
Requested = InputNumSGPRs;

// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
Requested = 0;

if (Requested)
MaxNumSGPRs = Requested;
}

if (hasSGPRInitBug())
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;

return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
MaxAddressableNumSGPRs);
}

unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 0;
case 10: return 0;
case 9: return 25;
case 8: return 29;
case 7: return 33;
case 6: return 37;
case 5: return 41;
case 4: return 49;
case 3: return 65;
case 2: return 85;
default: return 129;
}
}

if (getGeneration() >= VOLCANIC_ISLANDS)
return 102;
unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
switch (WavesPerEU) {
case 0: return 24;
case 10: return 24;
case 9: return 28;
case 8: return 32;
case 7: return 36;
case 6: return 40;
case 5: return 48;
case 4: return 64;
case 3: return 84;
case 2: return 128;
default: return getTotalNumVGPRs();
}
}

unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

// Compute maximum number of VGPRs function can use using default/requested
// minimum number of waves per execution unit.
std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);

// Check if maximum number of VGPRs was explicitly requested using
// "amdgpu-num-vgpr" attribute.
if (F.hasFnAttribute("amdgpu-num-vgpr")) {
unsigned Requested = AMDGPU::getIntegerAttribute(
F, "amdgpu-num-vgpr", MaxNumVGPRs);

// Make sure requested value does not violate subtarget's specifications.
if (Requested && Requested <= getReservedNumVGPRs(MF))
Requested = 0;

// Make sure requested value is compatible with values implied by
// default/requested minimum/maximum number of waves per execution unit.
if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
Requested = 0;
if (WavesPerEU.second &&
Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
Requested = 0;

if (Requested)
MaxNumVGPRs = Requested;
}

return 104;
return MaxNumVGPRs - getReservedNumVGPRs(MF);
}
80 changes: 79 additions & 1 deletion lib/Target/AMDGPU/AMDGPUSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,85 @@ class SISubtarget final : public AMDGPUSubtarget {
return true;
}

unsigned getMaxNumSGPRs() const;
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return 8;
}

/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 800;
return 512;
}

/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
if (hasSGPRInitBug())
return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
if (getGeneration() >= VOLCANIC_ISLANDS)
return 102;
return 104;
}

/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const;

/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;

/// \returns Reserved number of SGPRs for given function \p MF.
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;

/// \returns Maximum number of SGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of SGPRs explicitly
/// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumSGPRs(const MachineFunction &MF) const;

/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
return 4;
}

/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
return 256;
}

/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
return getTotalNumVGPRs();
}

/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const;

/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;

/// \returns Reserved number of VGPRs for given function \p MF.
unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
return debuggerReserveRegs() ? 4 : 0;
}

/// \returns Maximum number of VGPRs that meets number of waves per execution
/// unit requirement for function \p MF, or number of VGPRs explicitly
/// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
///
/// \returns Value that meets number of waves per execution unit requirement
/// if explicitly requested value cannot be converted to integer, violates
/// subtarget's specifications, or does not meet number of waves per execution
/// unit requirement.
unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
};

} // end namespace llvm
Expand Down
4 changes: 2 additions & 2 deletions lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
unsigned VGPRExcessLimit =
Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
unsigned SGPRCriticalLimit = ST.getMaxNumSGPRs(MaxWaves, true);
unsigned VGPRCriticalLimit = ST.getMaxNumVGPRs(MaxWaves);

ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
Expand Down
Loading

0 comments on commit c478d35

Please sign in to comment.