Skip to content
This repository has been archived by the owner on Jan 1, 2023. It is now read-only.

Commit

Permalink
PR16726: extend rol/ror matching
Browse files Browse the repository at this point in the history
C-like languages promote types like unsigned short to unsigned int before
performing an arithmetic operation. Currently the rotate matcher in the
DAGCombiner does not consider this situation.

This commit extends the DAGCombiner in the way that the pattern

(or (shl ([az]ext x), (*ext y)), (srl ([az]ext x), (*ext (sub 32, y))))

is folded into

([az]ext (rotl x, y))

The matching is restricted to aext and zext because in this cases the upper
bits are either undefined or known. Test case is included.

This fixes PR16726.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191049 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
redstar committed Sep 19, 2013
1 parent deb8e29 commit 9ac27b1
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 2 deletions.
39 changes: 37 additions & 2 deletions lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3341,6 +3341,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
unsigned OpSizeInBits = VT.getSizeInBits();
SDValue LHSShiftArg = LHSShift.getOperand(0);
SDValue LHSShiftAmt = LHSShift.getOperand(1);
SDValue RHSShiftArg = RHSShift.getOperand(0);
SDValue RHSShiftAmt = RHSShift.getOperand(1);

// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
Expand Down Expand Up @@ -3420,10 +3421,27 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// (rotr x, (sub 32, y))
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0)))
if (SUBC->getAPIntValue() == OpSizeInBits)
if (SUBC->getAPIntValue() == OpSizeInBits) {
return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg,
HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
} else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
// fold (or (shl (*ext x), (*ext y)),
// (srl (*ext x), (*ext (sub 32, y)))) ->
// (*ext (rotl x, y))
// fold (or (shl (*ext x), (*ext y)),
// (srl (*ext x), (*ext (sub 32, y)))) ->
// (*ext (rotr x, (sub 32, y)))
SDValue LArgExtOp0 = LHSShiftArg.getOperand(0);
EVT LArgVT = LArgExtOp0.getValueType();
if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
SDValue V = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, LArgVT,
LArgExtOp0,
HasROTL ? LHSShiftAmt : RHSShiftAmt);
return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode();
}
}
} else if (LExtOp0.getOpcode() == ISD::SUB &&
RExtOp0 == LExtOp0.getOperand(1)) {
// fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
Expand All @@ -3432,10 +3450,27 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// (rotl x, (sub 32, y))
if (ConstantSDNode *SUBC =
dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0)))
if (SUBC->getAPIntValue() == OpSizeInBits)
if (SUBC->getAPIntValue() == OpSizeInBits) {
return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
LHSShiftArg,
HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
} else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND ||
RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) {
// fold (or (shl (*ext x), (*ext (sub 32, y))),
// (srl (*ext x), (*ext y))) ->
// (*ext (rotl x, y))
// fold (or (shl (*ext x), (*ext (sub 32, y))),
// (srl (*ext x), (*ext y))) ->
// (*ext (rotr x, (sub 32, y)))
SDValue RArgExtOp0 = RHSShiftArg.getOperand(0);
EVT RArgVT = RArgExtOp0.getValueType();
if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) {
SDValue V = DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, RArgVT,
RArgExtOp0,
HasROTR ? RHSShiftAmt : LHSShiftAmt);
return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode();
}
}
}
}

Expand Down
76 changes: 76 additions & 0 deletions test/CodeGen/X86/rotate3.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y)
; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the
; argument is zero extended. Fix for PR16726.

; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s

define zeroext i8 @rolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i8 %x_arg to i32
%tmp3 = shl i32 %tmp1, %nBits_arg
%tmp8 = sub i32 8, %nBits_arg
%tmp10 = lshr i32 %tmp1, %tmp8
%tmp11 = or i32 %tmp3, %tmp10
%tmp12 = trunc i32 %tmp11 to i8
ret i8 %tmp12
}
; CHECK: rolb %cl, %{{[a-z0-9]+}}


define zeroext i8 @rorbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i8 %x_arg to i32
%tmp3 = lshr i32 %tmp1, %nBits_arg
%tmp8 = sub i32 8, %nBits_arg
%tmp10 = shl i32 %tmp1, %tmp8
%tmp11 = or i32 %tmp3, %tmp10
%tmp12 = trunc i32 %tmp11 to i8
ret i8 %tmp12
}
; CHECK: rorb %cl, %{{[a-z0-9]+}}

define zeroext i16 @rolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i16 %x_arg to i32
%tmp3 = shl i32 %tmp1, %nBits_arg
%tmp8 = sub i32 16, %nBits_arg
%tmp10 = lshr i32 %tmp1, %tmp8
%tmp11 = or i32 %tmp3, %tmp10
%tmp12 = trunc i32 %tmp11 to i16
ret i16 %tmp12
}
; CHECK: rolw %cl, %{{[a-z0-9]+}}

define zeroext i16 @rorword(i32 %nBits_arg, i16 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i16 %x_arg to i32
%tmp3 = lshr i32 %tmp1, %nBits_arg
%tmp8 = sub i32 16, %nBits_arg
%tmp10 = shl i32 %tmp1, %tmp8
%tmp11 = or i32 %tmp3, %tmp10
%tmp12 = trunc i32 %tmp11 to i16
ret i16 %tmp12
}
; CHECK: rorw %cl, %{{[a-z0-9]+}}

define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i32 %x_arg to i64
%tmp3 = shl i64 %tmp1, %nBits_arg
%tmp8 = sub i64 32, %nBits_arg
%tmp10 = lshr i64 %tmp1, %tmp8
%tmp11 = or i64 %tmp3, %tmp10
ret i64 %tmp11
}
; CHECK: roll %cl, %{{[a-z0-9]+}}

define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone {
entry:
%tmp1 = zext i32 %x_arg to i64
%tmp3 = lshr i64 %tmp1, %nBits_arg
%tmp8 = sub i64 32, %nBits_arg
%tmp10 = shl i64 %tmp1, %tmp8
%tmp11 = or i64 %tmp3, %tmp10
ret i64 %tmp11
}
; CHECK: rorl %cl, %{{[a-z0-9]+}}

0 comments on commit 9ac27b1

Please sign in to comment.