Skip to content

Commit

Permalink
R600/SI: Only create one instruction when spilling/restoring register v3
Browse files Browse the repository at this point in the history
The register spiller assumes that only one new instruction is created
when spilling and restoring registers, so we need to emit pseudo
instructions for vector register spills and lower them after
register allocation.

v2:
  - Fix calculation of lane index
  - Extend VGPR liveness to end of program.

v3:
  - Use SIMM16 field of S_NOP to specify multiple NOPs.

https://bugs.freedesktop.org/show_bug.cgi?id=75005

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@207843 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
tstellarAMD committed May 2, 2014
1 parent ecc1896 commit 9b22626
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 36 deletions.
163 changes: 138 additions & 25 deletions lib/Target/R600/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();

if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
MFI->SpillTracker.LaneVGPR)
unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());

BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
} else if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for vector
// registers.
//
// Reserve a spot in the spill tracker for each sub-register of
// the vector register.
unsigned NumSubRegs = RC->getSize() / 4;
unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
Lane);
} else {
for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
.addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
&AMDGPU::SReg_32RegClass, TRI);
FirstLane);

unsigned Opcode;
switch (RC->getSize() * 8) {
case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
default: llvm_unreachable("Cannot spill register class");
}

BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
.addReg(SrcReg)
.addImm(FrameIndex);
} else {
llvm_unreachable("VGPR spilling not supported");
}
}

Expand All @@ -216,30 +234,125 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
SIMachineFunctionInfo::SpilledReg Spill =
SIMachineFunctionInfo::SpilledReg Spill =
MFI->SpillTracker.getSpilledReg(FrameIndex);
assert(Spill.VGPR);
BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
insertNOPs(MI, 3);
} else if (RI.isSGPRClass(RC)){
unsigned Opcode;
switch(RC->getSize() * 8) {
case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
default: llvm_unreachable("Cannot spill register class");
}

SIMachineFunctionInfo::SpilledReg Spill =
MFI->SpillTracker.getSpilledReg(FrameIndex);

BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addReg(Spill.VGPR)
.addImm(FrameIndex);
insertNOPs(MI, 3);
} else {
for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
unsigned Flags = RegState::Define;
if (i == 0) {
Flags |= RegState::Undef;
}
unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
&AMDGPU::SReg_32RegClass, TRI);
BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
.addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
.addReg(SubReg);
llvm_unreachable("VGPR spilling not supported");
}
}

static unsigned getNumSubRegsForSpillOp(unsigned Op) {

switch (Op) {
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S512_RESTORE:
return 16;
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S256_RESTORE:
return 8;
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S128_RESTORE:
return 4;
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
return 2;
default: llvm_unreachable("Invalid spill opcode");
}
}

void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
int Count) const {
while (Count > 0) {
int Arg;
if (Count >= 8)
Arg = 7;
else
Arg = Count - 1;
Count -= 8;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
.addImm(Arg);
}
}

bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
SIMachineFunctionInfo *MFI =
MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);

// SGPR register spill
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S128_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned FrameIndex = MI->getOperand(2).getImm();

for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
SIMachineFunctionInfo::SpilledReg Spill;
unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
&AMDGPU::SGPR_32RegClass, i);
Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);

BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
MI->getOperand(0).getReg())
.addReg(SubReg)
.addImm(Spill.Lane + i);
}
MI->eraseFromParent();
break;
}

// SGPR register restore
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());

for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
SIMachineFunctionInfo::SpilledReg Spill;
unsigned FrameIndex = MI->getOperand(2).getImm();
unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
&AMDGPU::SGPR_32RegClass, i);
Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);

BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(MI->getOperand(1).getReg())
.addImm(Spill.Lane + i);
}
MI->eraseFromParent();
break;
}
}
return true;
}

MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/R600/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class SIInstrInfo : public AMDGPUInstrInfo {
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;

virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;

unsigned commuteOpcode(unsigned Opcode) const;

MachineInstr *commuteInstruction(MachineInstr *MI,
Expand Down Expand Up @@ -165,6 +167,8 @@ class SIInstrInfo : public AMDGPUInstrInfo {

void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
unsigned SavReg, unsigned IndexReg) const;

void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
};

namespace AMDGPU {
Expand Down
23 changes: 22 additions & 1 deletion lib/Target/R600/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;

let Predicates = [isSI] in {

//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>;

let isTerminator = 1 in {

Expand Down Expand Up @@ -1574,6 +1574,27 @@ def V_SUB_F64 : InstSI <

} // end usesCustomInserter

multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {

def _SAVE : InstSI <
(outs VReg_32:$dst),
(ins sgpr_class:$src, i32imm:$frame_idx),
"", []
>;

def _RESTORE : InstSI <
(outs sgpr_class:$dst),
(ins VReg_32:$src, i32imm:$frame_idx),
"", []
>;

}

defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;

} // end IsCodeGenOnly, isPseudo

def : Pat<
Expand Down
57 changes: 48 additions & 9 deletions lib/Target/R600/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@


#include "SIMachineFunctionInfo.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"

#define MAX_LANES 64

Expand All @@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PSInputAddr(0),
SpillTracker() { }

static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);

// We need to add this register as live out for the function, in order to
// have the live range calculated directly.
//
// When register spilling begins, we have already calculated the live
// live intervals for all the registers. Since we are spilling SGPRs to
// VGPRs, we need to update the Lane VGPR's live interval every time we
// spill or restore a register.
//
// Unfortunately, there is no good way to update the live interval as
// the TargetInstrInfo callbacks for spilling and restoring don't give
// us access to the live interval information.
//
// We are lucky, though, because the InlineSpiller calls
// LiveRangeEdit::calculateRegClassAndHint() which iterates through
// all the new register that have been created when restoring a register
// and calls LiveIntervals::getInterval(), which creates and computes
// the live interval for the newly created register. However, once this
// live intervals is created, it doesn't change and since we usually reuse
// the Lane VGPR multiple times, this means any uses after the first aren't
// added to the live interval.
//
// To work around this, we add Lane VGPRs to the functions live out list,
// so that we can guarantee its live range will cover all of its uses.

for (MachineBasicBlock &MBB : *MF) {
if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
return VGPR;
}
}
MF->getFunction()->getContext().emitError(
"Could not found S_ENGPGM instrtuction.");
return VGPR;
}

unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
unsigned StartLane = CurrentLane;
CurrentLane += NumRegs;
if (!LaneVGPR) {
LaneVGPR = createLaneVGPR(MRI);
LaneVGPR = createLaneVGPR(MRI, MF);
} else {
CurrentLane++;
if (CurrentLane == MAX_LANES) {
CurrentLane = 0;
LaneVGPR = createLaneVGPR(MRI);
if (CurrentLane >= MAX_LANES) {
StartLane = CurrentLane = 0;
LaneVGPR = createLaneVGPR(MRI, MF);
}
}
return CurrentLane;
return StartLane;
}

void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
Expand Down
7 changes: 6 additions & 1 deletion lib/Target/R600/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
public:
unsigned LaneVGPR;
RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
unsigned getNextLane(MachineRegisterInfo &MRI);
/// \p NumRegs The number of consecutive registers what need to be spilled.
/// This function will ensure that all registers are stored in
/// the same VGPR.
/// \returns The lane to be used for storing the first register.
unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
unsigned NumRegs = 1);
void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
const SpilledReg& getSpilledReg(unsigned FrameIndex);
bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
Expand Down
7 changes: 7 additions & 0 deletions lib/Target/R600/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::VGPR_32RegClass;
}
}

unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
unsigned Index = getHWRegIndex(Reg);
return SubRC->getRegister(Index + Channel);
}
6 changes: 6 additions & 0 deletions lib/Target/R600/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// be returned.
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;

/// \p Channel This is the register channel (e.g. a value from 0-16), not the
/// SubReg index.
/// \returns The sub-register of Reg that is in Channel.
unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
unsigned Channel) const;
};

} // End namespace llvm
Expand Down

0 comments on commit 9b22626

Please sign in to comment.