Skip to content

Commit

Permalink
AMDGPU/SI: Don't move copies of immediates to the VALU
Browse files Browse the repository at this point in the history
Summary:
If we write an immediate to a VGPR and then copy the VGPR to an
SGPR, we can replace the copy with a S_MOV_B32 sgpr, imm, rather than
moving the copy to the SALU.

Reviewers: arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, llvm-commits, tony-tye

Differential Revision: https://reviews.llvm.org/D27272

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288849 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
tstellarAMD committed Dec 6, 2016
1 parent 2c23a5b commit 4fae32e
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 1 deletion.
44 changes: 43 additions & 1 deletion lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,38 @@ static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
return false;
}

static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
const MachineInstr *MoveImm,
const SIInstrInfo *TII,
unsigned &SMovOp,
int64_t &Imm) {

if (!MoveImm->isMoveImmediate())
return false;

const MachineOperand *ImmOp =
TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
if (!ImmOp->isImm())
return false;

// FIXME: Handle copies with sub-regs.
if (Copy->getOperand(0).getSubReg())
return false;

switch (MoveImm->getOpcode()) {
default:
return false;
case AMDGPU::V_MOV_B32_e32:
SMovOp = AMDGPU::S_MOV_B32;
break;
case AMDGPU::V_MOV_B64_PSEUDO:
SMovOp = AMDGPU::S_MOV_B64;
break;
}
Imm = ImmOp->getImm();
return true;
}

bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
Expand Down Expand Up @@ -323,7 +355,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
unsigned SMovOp;
int64_t Imm;
// If we are just copying an immediate, we can replace the copy with
// s_mov_b32.
if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
MI.getOperand(1).ChangeToImmediate(Imm);
MI.addImplicitDefUseOperands(MF);
MI.setDesc(TII->get(SMovOp));
break;
}
TII->moveToVALU(MI);
}

Expand Down
25 changes: 25 additions & 0 deletions test/CodeGen/AMDGPU/salu-to-valu.ll
Original file line number Diff line number Diff line change
Expand Up @@ -478,5 +478,30 @@ bb4:
br label %bb1
}

; GCN-LABEL: {{^}}phi_imm_in_sgprs
; GCN: s_movk_i32 [[A:s[0-9]+]], 0x400
; GCN: s_movk_i32 [[B:s[0-9]+]], 0x400
; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]:
; GCN: s_xor_b32 [[B]], [[B]], [[A]]
; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]]
define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) {
entry:
br label %loop

loop:
%i = phi i32 [0, %entry], [%i.add, %loop]
%offset = phi i32 [1024, %entry], [%offset.xor, %loop]
%offset.xor = xor i32 %offset, 1024
%offset.i = add i32 %offset.xor, %i
%ptr = getelementptr i32, i32 addrspace(3)* %out, i32 %offset.i
store i32 0, i32 addrspace(3)* %ptr
%i.add = add i32 %i, 1
%cmp = icmp ult i32 %i.add, %cond
br i1 %cmp, label %loop, label %exit

exit:
ret void
}

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

0 comments on commit 4fae32e

Please sign in to comment.