From 161b88e517089db1134ce3e9fe9e4a25bc1f204d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Thu, 29 Aug 2019 10:50:09 +0000 Subject: [PATCH] [X86][CodeGen][NFC] Delay `combineIncDecVector()` from DAGCombine to X86DAGToDAGISel Summary: We were previously doing it in DAGCombine. But we also want to do `sub %x, C` -> `add %x, (sub 0, C)` for vectors in DAGCombine. So if we had `sub %x, -1`, we'll transform it to `add %x, 1`, which `combineIncDecVector()` will immediately transform back into `sub %x, -1`, and here we go again... I've marked this as NFC since not a single test changes, but since that 'changes' DAGCombine, probably this isn't fully NFC. Reviewers: RKSimon, craig.topper, spatel Reviewed By: craig.topper Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62327 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370327 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 48 ++++++++++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 49 +++------------------ lib/Target/X86/X86ISelLowering.h | 3 ++ test/CodeGen/X86/stack-folding-int-avx1.ll | 2 +- test/CodeGen/X86/stack-folding-int-sse42.ll | 2 +- 5 files changed, 60 insertions(+), 44 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index e0a462471a71..eaedf14a9d8d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -507,6 +507,7 @@ namespace { bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); @@ -3775,6 +3776,49 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { return true; } +/// Convert vector increment or decrement to sub/add with an all-ones constant: +/// add X, <1, 1...> --> sub X, <-1, -1...> +/// sub X, <1, 1...> --> add X, <-1, -1...> +/// The all-ones vector constant can be materialized using a pcmpeq instruction +/// that is commonly recognized as an idiom (has no register dependency), so +/// that's better/smaller than loading a splat 1 constant. +bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) { + assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) && + "Unexpected opcode for increment/decrement transform"); + + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Should only be called for vectors."); + + SDValue X = Node->getOperand(0); + SDValue OneVec = Node->getOperand(1); + + APInt SplatVal; + if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue()) + return false; + + SDLoc DL(Node); + SDValue AllOnesVec; + + APInt Ones = APInt::getAllOnesValue(32); + assert(VT.getSizeInBits() % 32 == 0 && + "Expected bit count to be a multiple of 32"); + unsigned NumElts = VT.getSizeInBits() / 32; + assert(NumElts > 0 && "Expected to get non-empty vector."); + AllOnesVec = + CurDAG->getConstant(Ones, DL, MVT::getVectorVT(MVT::i32, NumElts)); + insertDAGNode(*CurDAG, X, AllOnesVec); + + AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec); + insertDAGNode(*CurDAG, X, AllOnesVec); + + unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; + SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec); + + ReplaceNode(Node, NewNode.getNode()); + SelectCode(NewNode.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -4347,6 +4391,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { + if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() && + combineIncDecVector(Node)) + return; + // Try to avoid folding immediates with multiple uses for optsize. // This code tries to select to register form directly to avoid going // through the isel table which might fold the immediate. We can't change diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6453d3ad0bc5..3f37cb5ad8c8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6215,7 +6215,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; } -static bool isConstantSplat(SDValue Op, APInt &SplatVal) { +namespace llvm { +namespace X86 { +bool isConstantSplat(SDValue Op, APInt &SplatVal) { APInt UndefElts; SmallVector EltBits; if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), @@ -6238,6 +6240,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) { return false; } +} // namespace X86 +} // namespace llvm static bool getTargetShuffleMaskIndices(SDValue MaskNode, unsigned MaskEltSizeInBits, @@ -18115,7 +18119,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, std::swap(Op0, Op1); APInt APIntShiftAmt; - if (isConstantSplat(Amt, APIntShiftAmt)) { + if (X86::isConstantSplat(Amt, APIntShiftAmt)) { uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8)); @@ -25337,7 +25341,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // Optimize shl/srl/sra with constant shift amount. APInt APIntShiftAmt; - if (!isConstantSplat(Amt, APIntShiftAmt)) + if (!X86::isConstantSplat(Amt, APIntShiftAmt)) return SDValue(); // If the shift amount is out of range, return undef. @@ -43750,39 +43754,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags); } -/// Convert vector increment or decrement to sub/add with an all-ones constant: -/// add X, <1, 1...> --> sub X, <-1, -1...> -/// sub X, <1, 1...> --> add X, <-1, -1...> -/// The all-ones vector constant can be materialized using a pcmpeq instruction -/// that is commonly recognized as an idiom (has no register dependency), so -/// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { - assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && - "Unexpected opcode for increment/decrement transform"); - - // Delay this until legalize ops to avoid interfering with early DAG combines - // that may expect canonical adds. - // FIXME: We may want to consider moving this to custom lowering or all the - // way to isel, but lets start here. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Pseudo-legality check: getOnesVector() expects one of these types, so bail - // out and wait for legalization if we have an unsupported vector length. - EVT VT = N->getValueType(0); - if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) - return SDValue(); - - APInt SplatVal; - if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue()) - return SDValue(); - - SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); - unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; - return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec); -} - static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, const SDLoc &DL, EVT VT, const X86Subtarget &Subtarget) { @@ -44045,9 +44016,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG, DCI)) - return V; - return combineAddOrSubToADCOrSBB(N, DAG); } @@ -44176,9 +44144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG, DCI)) - return V; - // Try to create PSUBUS if SUB's argument is max/min if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) return V; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 53bdc84cee13..05e4f16fc49d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -683,6 +683,9 @@ namespace llvm { bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO); + /// If Op is a constant whose elements are all the same constant or + /// undefined, return true and return the constant value in \p SplatVal. + bool isConstantSplat(SDValue Op, APInt &SplatVal); } // end namespace X86 //===--------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll index b92a70b85227..46fd1ab5dfb6 100644 --- a/test/CodeGen/X86/stack-folding-int-avx1.ll +++ b/test/CodeGen/X86/stack-folding-int-avx1.ll @@ -540,8 +540,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll index 873be133b3c2..3efaeddfbd64 100644 --- a/test/CodeGen/X86/stack-folding-int-sse42.ll +++ b/test/CodeGen/X86/stack-folding-int-sse42.ll @@ -724,8 +724,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-NEXT: #APP ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: psubb %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()