Skip to content

Commit

Permalink
[PowerPC, DAGCombiner] Fold a << (b % (sizeof(a) * 8)) back to a sing…
Browse files Browse the repository at this point in the history
…le instruction

Summary:
This is the corresponding llvm change to D28037 to ensure no performance
regression.

Reviewers: bogner, kbarton, hfinkel, iteratee, echristo

Subscribers: nemanjai, llvm-commits

Differential Revision: https://reviews.llvm.org/D28329

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301990 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
timshen91 committed May 3, 2017
1 parent c7dd63d commit 85fd68b
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 39 deletions.
8 changes: 8 additions & 0 deletions include/llvm/Target/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -2061,6 +2061,14 @@ class TargetLoweringBase {
return false;
}

// Return true if the instruction that performs a << b actually performs
// a << (b % (sizeof(a) * 8)).
virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
"Expect a shift instruction");
return false;
}

//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
Expand Down
33 changes: 33 additions & 0 deletions lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5294,6 +5294,17 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
}

// If the target supports masking y in (shl, y),
// fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
if (TLI.isOperationLegal(ISD::SHL, VT) &&
TLI.supportsModuloShift(ISD::SHL, VT) && N1->getOpcode() == ISD::AND) {
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
if (Mask->getZExtValue() == OpSizeInBits - 1) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
}
}
}

ConstantSDNode *N1C = isConstOrConstSplat(N1);

// fold (shl c1, c2) -> c1<<c2
Expand Down Expand Up @@ -5492,6 +5503,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();

// If the target supports masking y in (sra, y),
// fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
if (TLI.isOperationLegal(ISD::SRA, VT) &&
TLI.supportsModuloShift(ISD::SRA, VT) && N1->getOpcode() == ISD::AND) {
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
if (Mask->getZExtValue() == OpSizeInBits - 1) {
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
}
}
}

// Arithmetic shifting an all-sign-bit value is a no-op.
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
return N0;
Expand Down Expand Up @@ -5650,6 +5672,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();

// If the target supports masking y in (srl, y),
// fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
if (TLI.isOperationLegal(ISD::SRL, VT) &&
TLI.supportsModuloShift(ISD::SRL, VT) && N1->getOpcode() == ISD::AND) {
if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
if (Mask->getZExtValue() == OpSizeInBits - 1) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
}
}
}

// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
Expand Down
8 changes: 8 additions & 0 deletions lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,14 @@ namespace llvm {
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;

bool supportsModuloShift(ISD::NodeType Inst,
EVT ReturnType) const override {
assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
"Expect a shift instruction");
assert(isOperationLegal(Inst, ReturnType));
return ReturnType.isVector();
}
};

namespace PPC {
Expand Down
39 changes: 0 additions & 39 deletions test/CodeGen/PowerPC/shift_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ define i64 @test003(i64 %a, i64 %b) {
define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test010:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, 7
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslb 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
Expand All @@ -61,8 +59,6 @@ define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test011:
; CHECK: # BB#0:
; CHECK-NEXT: vspltish 4, 15
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslh 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
Expand All @@ -73,10 +69,6 @@ define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test012:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisw 4, -16
; CHECK-NEXT: vspltisw 5, 15
; CHECK-NEXT: vsubuwm 4, 5, 4
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
Expand All @@ -87,11 +79,6 @@ define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test013:
; CHECK: # BB#0:
; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxswapd 36, 0
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsld 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
Expand Down Expand Up @@ -148,8 +135,6 @@ define i64 @test103(i64 %a, i64 %b) {
define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test110:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, 7
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrb 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
Expand All @@ -160,8 +145,6 @@ define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test111:
; CHECK: # BB#0:
; CHECK-NEXT: vspltish 4, 15
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrh 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
Expand All @@ -172,10 +155,6 @@ define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test112:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisw 4, -16
; CHECK-NEXT: vspltisw 5, 15
; CHECK-NEXT: vsubuwm 4, 5, 4
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
Expand All @@ -186,11 +165,6 @@ define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test113:
; CHECK: # BB#0:
; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxswapd 36, 0
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrd 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
Expand Down Expand Up @@ -247,8 +221,6 @@ define i64 @test203(i64 %a, i64 %b) {
define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test210:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisb 4, 7
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrab 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
Expand All @@ -259,8 +231,6 @@ define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test211:
; CHECK: # BB#0:
; CHECK-NEXT: vspltish 4, 15
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrah 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
Expand All @@ -271,10 +241,6 @@ define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test212:
; CHECK: # BB#0:
; CHECK-NEXT: vspltisw 4, -16
; CHECK-NEXT: vspltisw 5, 15
; CHECK-NEXT: vsubuwm 4, 5, 4
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
Expand All @@ -285,11 +251,6 @@ define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test213:
; CHECK: # BB#0:
; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l
; CHECK-NEXT: lxvd2x 0, 0, 3
; CHECK-NEXT: xxswapd 36, 0
; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrad 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
Expand Down

0 comments on commit 85fd68b

Please sign in to comment.