Skip to content

Commit

Permalink
AMDGPU: Skip fneg/select combine if it can fold into other
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291792 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Jan 12, 2017
1 parent 9db1ec3 commit cd00258
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 29 deletions.
69 changes: 40 additions & 29 deletions lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Target Information
//===----------------------------------------------------------------------===//

static bool fnegFoldsIntoOp(unsigned Opc) {
switch (Opc) {
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMA:
case ISD::FMAD:
case ISD::FSIN:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:
case AMDGPUISD::FMUL_LEGACY:
return true;
default:
return false;
}
}

MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
Expand Down Expand Up @@ -2738,20 +2756,31 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
SDValue NewLHS = LHS.getOperand(0);
SDValue NewRHS = RHS;

// TODO: Skip for operations where other combines can absord the fneg.
// Careful: if the neg can be folded up, don't try to pull it back down.
bool ShouldFoldNeg = true;

if (LHS.getOpcode() == ISD::FNEG)
NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
else if (CRHS->isNegative())
return SDValue();
if (NewLHS.hasOneUse()) {
unsigned Opc = NewLHS.getOpcode();
if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
ShouldFoldNeg = false;
if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
ShouldFoldNeg = false;
}

if (Inv)
std::swap(NewLHS, NewRHS);
if (ShouldFoldNeg) {
if (LHS.getOpcode() == ISD::FNEG)
NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
else if (CRHS->isNegative())
return SDValue();

SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
Cond, NewLHS, NewRHS);
DCI.AddToWorklist(NewSelect.getNode());
return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
if (Inv)
std::swap(NewLHS, NewRHS);

SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
Cond, NewLHS, NewRHS);
DCI.AddToWorklist(NewSelect.getNode());
return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
}
}

return SDValue();
Expand Down Expand Up @@ -2806,24 +2835,6 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
}

static bool fnegFoldsIntoOp(unsigned Opc) {
switch (Opc) {
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FMA:
case ISD::FMAD:
case ISD::FSIN:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:
case AMDGPUISD::FMUL_LEGACY:
return true;
default:
return false;
}
}

SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down
46 changes: 46 additions & 0 deletions test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s

; --------------------------------------------------------------------------------
; Don't fold if fneg can fold into the source
; --------------------------------------------------------------------------------

; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_legacy_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]

; GCN: v_rcp_legacy_f32_e32 [[RCP:v[0-9]+]], [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
define void @select_fneg_posk_src_rcp_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%rcp = call float @llvm.amdgcn.rcp.legacy(float %x)
%fneg = fsub float -0.0, %rcp
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}select_fneg_posk_src_mul_legacy_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]

; GCN: v_mul_legacy_f32_e32 [[MUL:v[0-9]+]], 4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[MUL]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
define void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%mul = call float @llvm.amdgcn.fmul.legacy(float %x, float 4.0)
%fneg = fsub float -0.0, %mul
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

declare float @llvm.amdgcn.rcp.legacy(float) #1
declare float @llvm.amdgcn.fmul.legacy(float, float) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
113 changes: 113 additions & 0 deletions test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,120 @@ define void @mul_select_negk_negfabs_f32(i32 %c) #0 {
ret void
}

; --------------------------------------------------------------------------------
; Don't fold if fneg can fold into the source
; --------------------------------------------------------------------------------

; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Y:v[0-9]+]]

; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_add_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%add = fadd float %x, 4.0
%fneg = fsub float -0.0, %add
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]

; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%add = fsub float %x, 4.0
%fneg = fsub float -0.0, %add
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]

; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%mul = fmul float %x, 4.0
%fneg = fsub float -0.0, %mul
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Z:v[0-9]+]]

; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
%fneg = fsub float -0.0, %fma
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]
; GCN: buffer_load_dword [[Z:v[0-9]+]]

; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
; GCN-NEXT: buffer_store_dword [[SELECT]]
define void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%z = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
%fneg = fsub float -0.0, %fmad
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

; FIXME: This one should fold to rcp
; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
; GCN: buffer_load_dword [[X:v[0-9]+]]

; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[X]]
; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -2.0, [[RCP]], vcc
; GCN: v_xor_b32_e32 [[NEG_SELECT:v[0-9]+]], 0x80000000, [[SELECT]]
; GCN-NEXT: buffer_store_dword [[NEG_SELECT]]
define void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
%x = load volatile float, float addrspace(1)* undef
%y = load volatile float, float addrspace(1)* undef
%cmp = icmp eq i32 %c, 0
%rcp = call float @llvm.amdgcn.rcp.f32(float %x)
%fneg = fsub float -0.0, %rcp
%select = select i1 %cmp, float %fneg, float 2.0
store volatile float %select, float addrspace(1)* undef
ret void
}

declare float @llvm.fabs.f32(float) #1
declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare float @llvm.amdgcn.rcp.f32(float) #1
declare float @llvm.amdgcn.rcp.legacy(float) #1
declare float @llvm.amdgcn.fmul.legacy(float, float) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

0 comments on commit cd00258

Please sign in to comment.