Skip to content

Commit

Permalink
AMDGPU: Support commuting with immediate in src0
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280970 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Sep 8, 2016
1 parent 86159cb commit d764af3
Show file tree
Hide file tree
Showing 32 changed files with 161 additions and 177 deletions.
168 changes: 71 additions & 97 deletions lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}

int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();

int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;

// Try to map original to commuted opcode
Expand Down Expand Up @@ -908,91 +906,89 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}

/// Commutes the operands in the given instruction.
/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
///
/// Do not call this method for a non-commutable instruction or for
/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
/// Even though the instruction is commutable, the method may still
/// fail to commute the operands, null pointer is returned in such cases.
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx0,
unsigned OpIdx1) const {
int CommutedOpcode = commuteOpcode(MI);
if (CommutedOpcode == -1)
return nullptr;
bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0,
unsigned Src0OpName,
MachineOperand &Src1,
unsigned Src1OpName) const {
MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
if (!Src0Mods)
return false;

int Src0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
if (!Src0.isReg())
MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
assert(Src1Mods &&
"All commutable instructions have both src0 and src1 modifiers");

int Src0ModsVal = Src0Mods->getImm();
int Src1ModsVal = Src1Mods->getImm();

Src1Mods->setImm(Src0ModsVal);
Src0Mods->setImm(Src1ModsVal);
return true;
}

static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
MachineOperand &RegOp,
MachineOperand &ImmOp) {
// TODO: Handle other immediate like types.
if (!ImmOp.isImm())
return nullptr;

int Src1Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
int64_t ImmVal = ImmOp.getImm();
ImmOp.ChangeToRegister(RegOp.getReg(), false, false,
RegOp.isKill(), RegOp.isDead(), RegOp.isUndef(),
RegOp.isDebug());
ImmOp.setSubReg(RegOp.getSubReg());
RegOp.ChangeToImmediate(ImmVal);
return &MI;
}

if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
(OpIdx0 != static_cast<unsigned>(Src1Idx) ||
OpIdx1 != static_cast<unsigned>(Src0Idx)))
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned Src0Idx,
unsigned Src1Idx) const {
assert(!NewMI && "this should never be used");

unsigned Opc = MI.getOpcode();
int CommutedOpcode = commuteOpcode(Opc);
if (CommutedOpcode == -1)
return nullptr;

MachineOperand &Src1 = MI.getOperand(Src1Idx);
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
static_cast<int>(Src1Idx) &&
"inconsistency with findCommutedOpIndices");

if (isVOP2(MI) || isVOPC(MI)) {
const MCInstrDesc &InstrDesc = MI.getDesc();
// For VOP2 and VOPC instructions, any operand type is valid to use for
// src0. Make sure we can use the src0 as src1.
//
// We could be stricter here and only allow commuting if there is a reason
// to do so. i.e. if both operands are VGPRs there is no real benefit,
// although MachineCSE attempts to find matches by commuting.
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
return nullptr;
}
MachineOperand &Src0 = MI.getOperand(Src0Idx);
MachineOperand &Src1 = MI.getOperand(Src1Idx);

MachineInstr *CommutedMI = &MI;
if (!Src1.isReg()) {
// Allow commuting instructions with Imm operands.
if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) {
return nullptr;
}
// Be sure to copy the source modifiers to the right place.
if (MachineOperand *Src0Mods =
getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) {
MachineOperand *Src1Mods =
getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);

int Src0ModsVal = Src0Mods->getImm();
if (!Src1Mods && Src0ModsVal != 0)
return nullptr;

// XXX - This assert might be a lie. It might be useful to have a neg
// modifier with 0.0.
int Src1ModsVal = Src1Mods->getImm();
assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");

Src1Mods->setImm(Src0ModsVal);
Src0Mods->setImm(Src1ModsVal);
MachineInstr *CommutedMI = nullptr;
if (Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0)) {
// Be sure to copy the source modifiers to the right place.
CommutedMI
= TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
}

unsigned Reg = Src0.getReg();
unsigned SubReg = Src0.getSubReg();
if (Src1.isImm())
Src0.ChangeToImmediate(Src1.getImm());
else
llvm_unreachable("Should only have immediates");

Src1.ChangeToRegister(Reg, false);
Src1.setSubReg(SubReg);
} else if (Src0.isReg() && !Src1.isReg()) {
// src0 should always be able to support any operand type, so no need to
// check operand legality.
CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
} else if (!Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
} else {
CommutedMI =
TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
// FIXME: Found two non registers to commute. This does happen.
return nullptr;
}

if (CommutedMI)

if (CommutedMI) {
swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
Src1, AMDGPU::OpName::src1_modifiers);

CommutedMI->setDesc(get(CommutedOpcode));
}

return CommutedMI;
}
Expand All @@ -1002,40 +998,18 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
unsigned &SrcOpIdx1) const {
const MCInstrDesc &MCID = MI.getDesc();
if (!MCID.isCommutable())
if (!MI.isCommutable())
return false;

unsigned Opc = MI.getOpcode();
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
return false;

// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
// immediate. Also, immediate src0 operand is not handled in
// SIInstrInfo::commuteInstruction();
if (!MI.getOperand(Src0Idx).isReg())
return false;

int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
if (Src1Idx == -1)
return false;

MachineOperand &Src1 = MI.getOperand(Src1Idx);
if (Src1.isImm()) {
// SIInstrInfo::commuteInstruction() does support commuting the immediate
// operand src1 in 2 and 3 operand instructions.
if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode()))
return false;
} else if (Src1.isReg()) {
// If any source modifiers are set, the generic instruction commuting won't
// understand how to copy the source modifiers.
if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))
return false;
} else
return false;

return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
}

Expand Down
11 changes: 10 additions & 1 deletion lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;

protected:
bool swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0, unsigned Src0OpName,
MachineOperand &Src1, unsigned Src1OpName) const;

MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned OpIdx0,
unsigned OpIdx1) const override;
Expand Down Expand Up @@ -144,7 +148,12 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;

LLVM_READONLY
int commuteOpcode(const MachineInstr &MI) const;
int commuteOpcode(unsigned Opc) const;

LLVM_READONLY
inline int commuteOpcode(const MachineInstr &MI) const {
return commuteOpcode(MI.getOpcode());
}

bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/AMDGPU/addrspacecast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]

; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
Expand All @@ -34,7 +34,7 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]

; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]]
; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1
; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]]
; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
Expand Down Expand Up @@ -79,7 +79,7 @@ define void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
Expand All @@ -96,7 +96,7 @@ define void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
; HSA: enable_sgpr_queue_ptr = 0

; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}
; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ declare i1 @llvm.amdgcn.class.f32(float, i32)
; Produces error after adding an implicit def to v_cndmask_b32

; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
; GCN: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
; GCN: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}}
; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
; GCN: v_cndmask_b32_e64 v1, 0, 1, s{{\[[0-9]+:[0-9]+\]}}
define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) {
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/AMDGPU/commute_modifiers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone

; FUNC-LABEL: @commute_add_imm_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
; SI: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
Expand All @@ -20,7 +20,7 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(

; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0
; SI: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
Expand Down Expand Up @@ -51,7 +51,7 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
; FUNC-LABEL: @commute_add_lit_fabs_f32
; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]|
; SI: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #1
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/AMDGPU/ctlz.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

Expand All @@ -19,7 +19,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: {{^}}s_ctlz_i32:
; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}}
; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
; SI: buffer_store_dword [[RESULT]]
Expand Down Expand Up @@ -112,7 +112,7 @@ define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %

; FUNC-LABEL: {{^}}s_ctlz_i64:
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
Expand Down
4 changes: 2 additions & 2 deletions test/CodeGen/AMDGPU/ctlz_zero_undef.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

Expand Down Expand Up @@ -92,7 +92,7 @@ define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)

; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}}
; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

declare float @llvm.fabs.f32(float) #1
Expand All @@ -18,7 +18,7 @@ define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
}

; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}}
; SI: v_add_f32_e64 [[TMP:v[0-9]+]], s{{[0-9]+}}, 1.0
; SI-SAFE-NOT: v_cvt_flr_i32_f32
; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
; SI: s_endpgm
Expand Down
4 changes: 2 additions & 2 deletions test/CodeGen/AMDGPU/fabs.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
; SI-LABEL: {{^}}fabs_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
; SI: s_endpgm
define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @llvm.fabs.f64(double %in0)
Expand All @@ -67,7 +67,7 @@ define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1)
; SI-LABEL: {{^}}fabs_fn_fold_f64:
; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NOT: and
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|
; SI: s_endpgm
define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
%fabs = call double @fabs(double %in0)
Expand Down
Loading

0 comments on commit d764af3

Please sign in to comment.