Skip to content

Commit

Permalink
Revert r369626 "[ARM] Fix lsrl with a 128/256 bit shift amount or a s…
Browse files Browse the repository at this point in the history
…hift of 32"

It broke the bots, see e.g. http://lab.llvm.org:8011/builders/clang-cuda-build/builds/36275/

> This patch fixes shifts by a 128/256 bit shift amount. It also fixes
> codegen for shifts of 32 by delegating to LLVM's default optimisation
> instead of emitting a long shift.
>
> Tests that used to generate long shifts of 32 are updated to check for the
> more optimised codegen.
>
> Differential revision: https://reviews.llvm.org/D66519
>
> llvm-svn: 369626

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369636 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
zmodem committed Aug 22, 2019
1 parent b836e95 commit 046a919
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 105 deletions.
13 changes: 6 additions & 7 deletions lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5938,15 +5938,14 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
unsigned ShPartsOpc = ARMISD::LSLL;
ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);

// If the shift amount is greater than 32 or has a greater bitwidth than 64
// then do the default optimisation
if (ShAmt->getValueType(0).getSizeInBits() > 64 ||
(Con && Con->getZExtValue() >= 32))
// If the shift amount is greater than 32 then do the default optimisation
if (Con && Con->getZExtValue() > 32)
return SDValue();

// Extract the lower 32 bits of the shift amount if it's not an i32
if (ShAmt->getValueType(0) != MVT::i32)
ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
// Extract the lower 32 bits of the shift amount if it's an i64
if (ShAmt->getValueType(0) == MVT::i64)
ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
DAG.getConstant(0, dl, MVT::i32));

if (ShOpc == ISD::SRL) {
if (!Con)
Expand Down
65 changes: 2 additions & 63 deletions test/CodeGen/ARM/shift_parts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ entry:
define i64 @shift_left_imm_big2(i64 %x) {
; CHECK-MVE-LABEL: shift_left_imm_big2:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: mov r1, r0
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: lsll r0, r1, #32
; CHECK-MVE-NEXT: bx lr
;
; CHECK-NON-MVE-LABEL: shift_left_imm_big2:
Expand Down Expand Up @@ -129,8 +128,7 @@ entry:
define i64 @shift_right_imm_big2(i64 %x) {
; CHECK-MVE-LABEL: shift_right_imm_big2:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: mov r0, r1
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: lsrl r0, r1, #32
; CHECK-MVE-NEXT: bx lr
;
; CHECK-NON-MVE-LABEL: shift_right_imm_big2:
Expand Down Expand Up @@ -221,62 +219,3 @@ entry:
store i40 %bf.clear, i40* %0, align 1
ret void
}

%struct.a = type { i96 }

define void @lsll_128bit_shift(%struct.a* nocapture %x) local_unnamed_addr #0 {
; CHECK-MVE-LABEL: lsll_128bit_shift:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: strd r1, r1, [r0]
; CHECK-MVE-NEXT: str r1, [r0, #8]
; CHECK-MVE-NEXT: bx lr
;
; CHECK-NON-MVE-LABEL: lsll_128bit_shift:
; CHECK-NON-MVE: @ %bb.0: @ %entry
; CHECK-NON-MVE-NEXT: movs r1, #0
; CHECK-NON-MVE-NEXT: str r1, [r0]
; CHECK-NON-MVE-NEXT: str r1, [r0, #4]
; CHECK-NON-MVE-NEXT: str r1, [r0, #8]
; CHECK-NON-MVE-NEXT: bx lr
entry:
%0 = bitcast %struct.a* %x to i128*
%bf.load = load i128, i128* %0, align 8
%bf.clear4 = and i128 %bf.load, -79228162514264337593543950336
store i128 %bf.clear4, i128* %0, align 8
ret void
}

%struct.b = type { i184 }

define void @lsll_256bit_shift(%struct.b* nocapture %x) local_unnamed_addr #0 {
; CHECK-MVE-LABEL: lsll_256bit_shift:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: str r1, [r0, #16]
; CHECK-MVE-NEXT: strd r1, r1, [r0, #8]
; CHECK-MVE-NEXT: strd r1, r1, [r0]
; CHECK-MVE-NEXT: ldrb r1, [r0, #23]
; CHECK-MVE-NEXT: lsls r1, r1, #24
; CHECK-MVE-NEXT: str r1, [r0, #20]
; CHECK-MVE-NEXT: bx lr
;
; CHECK-NON-MVE-LABEL: lsll_256bit_shift:
; CHECK-NON-MVE: @ %bb.0: @ %entry
; CHECK-NON-MVE-NEXT: movs r1, #0
; CHECK-NON-MVE-NEXT: str r1, [r0, #16]
; CHECK-NON-MVE-NEXT: str r1, [r0, #8]
; CHECK-NON-MVE-NEXT: str r1, [r0, #12]
; CHECK-NON-MVE-NEXT: str r1, [r0]
; CHECK-NON-MVE-NEXT: str r1, [r0, #4]
; CHECK-NON-MVE-NEXT: ldrb r1, [r0, #23]
; CHECK-NON-MVE-NEXT: lsls r1, r1, #24
; CHECK-NON-MVE-NEXT: str r1, [r0, #20]
; CHECK-NON-MVE-NEXT: bx lr
entry:
%0 = bitcast %struct.b* %x to i192*
%bf.load = load i192, i192* %0, align 8
%bf.clear4 = and i192 %bf.load, -24519928653854221733733552434404946937899825954937634816
store i192 %bf.clear4, i192* %0, align 8
ret void
}
58 changes: 31 additions & 27 deletions test/CodeGen/Thumb2/mve-abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,39 +40,43 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: abs_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: vmov r12, s2
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: rsbs.w r3, r12, #0
; CHECK-NEXT: sbc.w lr, r2, r0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: rsbs.w lr, r12, #0
; CHECK-NEXT: sbc.w r5, r0, r3
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r2, lr
; CHECK-NEXT: lsrl r2, r5, #32
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: movmi r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq lr, r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: rsbs r5, r4, #0
; CHECK-NEXT: sbc.w r6, r2, r0
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: moveq r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: rsbs r4, r1, #0
; CHECK-NEXT: mov r6, r4
; CHECK-NEXT: sbc.w r7, r0, r3
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: lsrl r6, r7, #32
; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: itt eq
; CHECK-NEXT: moveq r6, r0
; CHECK-NEXT: moveq r5, r4
; CHECK-NEXT: vmov.32 q0[0], r5
; CHECK-NEXT: movmi r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite eq
; CHECK-NEXT: moveq r6, r3
; CHECK-NEXT: movne r1, r4
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 q0[1], r6
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq r3, r12
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK-NEXT: moveq lr, r12
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%0 = icmp slt <2 x i64> %s1, zeroinitializer
%1 = sub nsw <2 x i64> zeroinitializer, %s1
Expand Down
12 changes: 8 additions & 4 deletions test/CodeGen/Thumb2/mve-div-expand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -755,12 +755,14 @@ define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
; CHECK-NEXT: vmov r2, s18
; CHECK-NEXT: vmov r3, s19
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __aeabi_uldivmod
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
Expand Down Expand Up @@ -788,12 +790,14 @@ define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
; CHECK-NEXT: vmov r2, s18
; CHECK-NEXT: vmov r3, s19
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: bl __aeabi_ldivmod
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
Expand Down
12 changes: 8 additions & 4 deletions test/CodeGen/Thumb2/mve-vcvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -382,12 +382,14 @@ define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
Expand All @@ -408,12 +410,14 @@ define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: lsrl r0, r1, #32
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
Expand Down

0 comments on commit 046a919

Please sign in to comment.