From d91581240303c8815a3dc9c8ab86a7c6b3236d5c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 15 May 2018 17:57:09 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Implement select() for G_FCONSTANT Summary: Also clean up G_CONSTANT selection. Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D46170 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332379 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 62 ++++++++++++++----- .../GlobalISel/inst-select-constant.mir | 61 ++++++++++++++++++ .../GlobalISel/inst-select-load-smrd.mir | 12 ++-- 3 files changed, 114 insertions(+), 21 deletions(-) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index bac467928e45..3d255a8c9faf 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -204,36 +204,67 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineOperand &ImmOp = I.getOperand(1); + + // The AMDGPU backend only supports Imm operands and not CImm or FPImm. + if (ImmOp.isFPImm()) { + const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); + ImmOp.ChangeToImmediate(Imm.getZExtValue()); + } else if (ImmOp.isCImm()) { + ImmOp.ChangeToImmediate(ImmOp.getCImm()->getZExtValue()); + } + unsigned DstReg = I.getOperand(0).getReg(); - unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned Size; + bool IsSgpr; + const RegisterBank *RB = MRI.getRegBankOrNull(I.getOperand(0).getReg()); + if (RB) { + IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; + Size = MRI.getType(DstReg).getSizeInBits(); + } else { + const TargetRegisterClass *RC = TRI.getRegClassForReg(MRI, DstReg); + IsSgpr = TRI.isSGPRClass(RC); + Size = RC->MC->getPhysRegSize() * 8; + } + + if (Size != 32 && Size != 64) + return false; + unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; if (Size == 32) { - I.setDesc(TII.get(AMDGPU::S_MOV_B32)); + I.setDesc(TII.get(Opcode)); + I.addImplicitDefUseOperands(*MF); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } - assert(Size == 64); - DebugLoc DL = I.getDebugLoc(); - unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - const APInt &Imm = I.getOperand(1).getCImm()->getValue(); + const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass : + &AMDGPU::VGPR_32RegClass; + unsigned LoReg = MRI.createVirtualRegister(RC); + unsigned HiReg = MRI.createVirtualRegister(RC); + const APInt &Imm = APInt(Size, I.getOperand(1).getImm()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) + BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) .addImm(Imm.trunc(32).getZExtValue()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) + BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) .addImm(Imm.ashr(32).getZExtValue()); - BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) - .addReg(LoReg) - .addImm(AMDGPU::sub0) - .addReg(HiReg) - .addImm(AMDGPU::sub1); + const MachineInstr *RS = + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + // We can't call constrainSelectedInstRegOperands here, because it doesn't // work for target independent opcodes I.eraseFromParent(); - return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); + const TargetRegisterClass *DstRC = + TRI.getConstrainedRegClassForOperand(RS->getOperand(0), MRI); + if (!DstRC) + return true; + return RBI.constrainGenericRegister(DstReg, *DstRC, MRI); } static bool isConstant(const MachineInstr &MI) { @@ -484,6 +515,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, case TargetOpcode::G_BITCAST: return selectCOPY(I); case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: return selectG_CONSTANT(I); case TargetOpcode::G_GEP: return selectG_GEP(I); diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir new file mode 100644 index 000000000000..f848edaf6675 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -0,0 +1,61 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define amdgpu_kernel void @constant(i32 addrspace(1)* %global0, i64 addrspace(1)* %global1) {ret void} +... +--- + +name: constant +legalized: true +regBankSelected: true + + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GCN-LABEL: name: constant + %0:vgpr(s64) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + + ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1 + %2:sreg_32(s32) = G_CONSTANT i32 1 + + ; GCN: [[LO0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; GCN: [[HI0:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 + ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO0]], %subreg.sub0, [[HI0]], %subreg.sub1 + %3:sgpr(s64) = G_CONSTANT i64 4294967296 + + ; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216 + %4:sgpr(s32) = G_FCONSTANT float 1.0 + + ; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 + ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248 + ; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1 + %5:sgpr(s64) = G_FCONSTANT double 1.0 + + ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1 + %6:vgpr(s32) = G_CONSTANT i32 1 + + ; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 + ; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1 + ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1 + %7:vgpr(s64) = G_CONSTANT i64 4294967296 + + ; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216 + %8:vgpr(s32) = G_FCONSTANT float 1.0 + + ; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0 + ; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248 + ; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1 + %9:vgpr(s64) = G_FCONSTANT double 1.0 + + G_STORE %2, %0 :: (volatile store 4 into %ir.global0) + G_STORE %4, %0 :: (volatile store 4 into %ir.global0) + G_STORE %6, %0 :: (volatile store 4 into %ir.global0) + G_STORE %8, %0 :: (volatile store 4 into %ir.global0) + G_STORE %3, %1 :: (volatile store 8 into %ir.global1) + G_STORE %5, %1 :: (volatile store 8 into %ir.global1) + G_STORE %7, %1 :: (volatile store 8 into %ir.global1) + G_STORE %9, %1 :: (volatile store 8 into %ir.global1) +... +--- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index 49238aed17da..1320d30ef14d 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -42,8 +42,8 @@ regBankSelected: true # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 # Max immediate for CI -# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 -# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 @@ -56,8 +56,8 @@ regBankSelected: true # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 # Immediate overflow for CI -# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 -# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 +# GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 @@ -74,8 +74,8 @@ regBankSelected: true # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 # Overflow 32-bit byte offset -# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 -# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0