Skip to content

Commit

Permalink
[X86][CodeGen][NFC] Delay combineIncDecVector() from DAGCombine to …
Browse files Browse the repository at this point in the history
…X86DAGToDAGISel

Summary:
We were previously doing it in DAGCombine.
But we also want to do `sub %x, C` -> `add %x, (sub 0, C)` for vectors in DAGCombine.
So if we had `sub %x, -1`, we'll transform it to `add %x, 1`,
which `combineIncDecVector()` will immediately transform back into `sub %x, -1`,
and here we go again...

I've marked this as NFC since not a single test changes,
but since that 'changes' DAGCombine, probably this isn't fully NFC.

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: craig.topper

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62327

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370327 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
LebedevRI committed Aug 29, 2019
1 parent ae86cd4 commit 161b88e
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 44 deletions.
48 changes: 48 additions & 0 deletions lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,7 @@ namespace {
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
bool tryShiftAmountMod(SDNode *N);
bool combineIncDecVector(SDNode *Node);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);

Expand Down Expand Up @@ -3775,6 +3776,49 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
return true;
}

/// Convert vector increment or decrement to sub/add with an all-ones constant:
/// add X, <1, 1...> --> sub X, <-1, -1...>
/// sub X, <1, 1...> --> add X, <-1, -1...>
/// The all-ones vector constant can be materialized using a pcmpeq instruction
/// that is commonly recognized as an idiom (has no register dependency), so
/// that's better/smaller than loading a splat 1 constant.
bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
"Unexpected opcode for increment/decrement transform");

EVT VT = Node->getValueType(0);
assert(VT.isVector() && "Should only be called for vectors.");

SDValue X = Node->getOperand(0);
SDValue OneVec = Node->getOperand(1);

APInt SplatVal;
if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
return false;

SDLoc DL(Node);
SDValue AllOnesVec;

APInt Ones = APInt::getAllOnesValue(32);
assert(VT.getSizeInBits() % 32 == 0 &&
"Expected bit count to be a multiple of 32");
unsigned NumElts = VT.getSizeInBits() / 32;
assert(NumElts > 0 && "Expected to get non-empty vector.");
AllOnesVec =
CurDAG->getConstant(Ones, DL, MVT::getVectorVT(MVT::i32, NumElts));
insertDAGNode(*CurDAG, X, AllOnesVec);

AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
insertDAGNode(*CurDAG, X, AllOnesVec);

unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);

ReplaceNode(Node, NewNode.getNode());
SelectCode(NewNode.getNode());
return true;
}

/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
Expand Down Expand Up @@ -4347,6 +4391,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
combineIncDecVector(Node))
return;

// Try to avoid folding immediates with multiple uses for optsize.
// This code tries to select to register form directly to avoid going
// through the isel table which might fold the immediate. We can't change
Expand Down
49 changes: 7 additions & 42 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6215,7 +6215,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
}

static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
namespace llvm {
namespace X86 {
bool isConstantSplat(SDValue Op, APInt &SplatVal) {
APInt UndefElts;
SmallVector<APInt, 16> EltBits;
if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
Expand All @@ -6238,6 +6240,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) {

return false;
}
} // namespace X86
} // namespace llvm

static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
Expand Down Expand Up @@ -18115,7 +18119,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
std::swap(Op0, Op1);

APInt APIntShiftAmt;
if (isConstantSplat(Amt, APIntShiftAmt)) {
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
Expand Down Expand Up @@ -25337,7 +25341,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,

// Optimize shl/srl/sra with constant shift amount.
APInt APIntShiftAmt;
if (!isConstantSplat(Amt, APIntShiftAmt))
if (!X86::isConstantSplat(Amt, APIntShiftAmt))
return SDValue();

// If the shift amount is out of range, return undef.
Expand Down Expand Up @@ -43750,39 +43754,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags);
}

/// Convert vector increment or decrement to sub/add with an all-ones constant:
/// add X, <1, 1...> --> sub X, <-1, -1...>
/// sub X, <1, 1...> --> add X, <-1, -1...>
/// The all-ones vector constant can be materialized using a pcmpeq instruction
/// that is commonly recognized as an idiom (has no register dependency), so
/// that's better/smaller than loading a splat 1 constant.
static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
"Unexpected opcode for increment/decrement transform");

// Delay this until legalize ops to avoid interfering with early DAG combines
// that may expect canonical adds.
// FIXME: We may want to consider moving this to custom lowering or all the
// way to isel, but lets start here.
if (DCI.isBeforeLegalizeOps())
return SDValue();

// Pseudo-legality check: getOnesVector() expects one of these types, so bail
// out and wait for legalization if we have an unsupported vector length.
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
return SDValue();

APInt SplatVal;
if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
return SDValue();

SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
}

static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
const SDLoc &DL, EVT VT,
const X86Subtarget &Subtarget) {
Expand Down Expand Up @@ -44045,9 +44016,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
HADDBuilder);
}

if (SDValue V = combineIncDecVector(N, DAG, DCI))
return V;

return combineAddOrSubToADCOrSBB(N, DAG);
}

Expand Down Expand Up @@ -44176,9 +44144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
HSUBBuilder);
}

if (SDValue V = combineIncDecVector(N, DAG, DCI))
return V;

// Try to create PSUBUS if SUB's argument is max/min
if (SDValue V = combineSubToSubus(N, DAG, Subtarget))
return V;
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,9 @@ namespace llvm {
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool GuaranteeTCO);

/// If Op is a constant whose elements are all the same constant or
/// undefined, return true and return the constant value in \p SplatVal.
bool isConstantSplat(SDValue Op, APInt &SplatVal);
} // end namespace X86

//===--------------------------------------------------------------------===//
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/stack-folding-int-avx1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpsubb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/stack-folding-int-sse42.ll
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: psubb %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
Expand Down

0 comments on commit 161b88e

Please sign in to comment.