diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 78f91c06adfb..9e9f1904b260 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -42,6 +42,21 @@ static bool isDivFMas(unsigned Opcode) { return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64; } +static bool isSGetReg(unsigned Opcode) { + return Opcode == AMDGPU::S_GETREG_B32; +} + +static bool isSSetReg(unsigned Opcode) { + return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32; +} + +static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { + + const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, + AMDGPU::OpName::simm16); + return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_; +} + ScheduleHazardRecognizer::HazardType GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { MachineInstr *MI = SU->getInstr(); @@ -58,6 +73,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0) return NoopHazard; + if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -78,6 +96,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (isDivFMas(MI->getOpcode())) return std::max(0, checkDivFMasHazards(MI)); + if (isSGetReg(MI->getOpcode())) + return std::max(0, checkGetRegHazards(MI)); + return 0; } @@ -137,6 +158,19 @@ int GCNHazardRecognizer::getWaitStatesSinceDef( return std::numeric_limits::max(); } +int GCNHazardRecognizer::getWaitStatesSinceSetReg( + function_ref IsHazard) { + + int WaitStates = -1; + for (MachineInstr *MI : EmittedInstrs) { + ++WaitStates; + if (!MI || !isSSetReg(MI->getOpcode()) || !IsHazard(MI)) + continue; + return WaitStates; + } + return std::numeric_limits::max(); +} + //===----------------------------------------------------------------------===// // No-op Hazard Detection //===----------------------------------------------------------------------===// @@ -284,3 +318,16 @@ int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) { return DivFMasWaitStates - WaitStatesNeeded; } + +int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) { + const SIInstrInfo *TII = ST.getInstrInfo(); + unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr); + + const int GetRegWaitStates = 2; + auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) { + return GetRegHWReg == getHWReg(TII, *MI); + }; + int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn); + + return GetRegWaitStates - WaitStatesNeeded; +} diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h index 1d87f3a05831..dbcdde817252 100644 --- a/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -38,12 +38,14 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int getWaitStatesSinceDef(unsigned Reg, function_ref IsHazardDef = [](MachineInstr *) { return true; }); + int getWaitStatesSinceSetReg(function_ref IsHazard); int checkSMEMSoftClauseHazards(MachineInstr *SMEM); int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); int checkDPPHazards(MachineInstr *DPP); int checkDivFMasHazards(MachineInstr *DivFMas); + int checkGetRegHazards(MachineInstr *GetRegInstr); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. diff --git a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir index 9003454c3583..1f283ab9483f 100644 --- a/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir @@ -1,5 +1,12 @@ # RUN: llc -march=amdgcn -run-pass post-RA-hazard-rec %s -o - | FileCheck %s +--- | + define void @div_fmas() { ret void } + define void @s_getreg() { ret void } +... +--- +# CHECK-LABEL: name: div_fmas + # CHECK-LABEL: bb.0: # CHECK: S_MOV_B64 # CHECK-NOT: S_NOP @@ -28,11 +35,7 @@ # CHECK: S_NOP # CHECK: S_NOP # CHECK: V_DIV_FMAS_F32 ---- | - define void @test0() { ret void } -... ---- -name: test0 +name: div_fmas body: | bb.0: @@ -57,4 +60,58 @@ body: | %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec S_ENDPGM + +... + +... +--- +# CHECK-LABEL: name: s_getreg + +# CHECK-LABEL: bb.0: +# CHECK: S_SETREG +# CHECK: S_NOP 0 +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.1: +# CHECK: S_SETREG_IMM32 +# CHECK: S_NOP 0 +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.2: +# CHECK: S_SETREG +# CHECK: S_NOP 0 +# CHECK: S_GETREG + +# CHECK-LABEL: bb.3: +# CHECK: S_SETREG +# CHECK-NEXT: S_GETREG + +name: s_getreg + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + S_SETREG_IMM32_B32 0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_MOV_B32 0 + %sgpr2 = S_GETREG_B32 1 + S_BRANCH %bb.3 + + bb.3: + S_SETREG_B32 %sgpr0, 0 + %sgpr1 = S_GETREG_B32 1 + S_ENDPGM ...