Skip to content

Commit

Permalink
[AARCH64][NEON] Add support for ISD::ABS lowering
Browse files Browse the repository at this point in the history
Update int_aarch64_neon_abs intrinsic to use the ISD::ABS opcode directly

Differential Revision: https://reviews.llvm.org/D32940

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302415 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed May 8, 2017
1 parent 13b9798 commit b7a5134
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 40 deletions.
6 changes: 6 additions & 0 deletions lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,9 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);

if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);

// [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
Expand Down Expand Up @@ -2482,6 +2485,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs:
return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
Expand Down
56 changes: 16 additions & 40 deletions lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2734,60 +2734,36 @@ defm FMOV : FPMoveImmediate<"fmov">;
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
int_aarch64_neon_uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
(zext (extract_high_v16i8 V128:$opB))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
(zext (extract_high_v16i8 V128:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
(v4i32 (add (sub (zext (v4i16 V64:$opA)),
(zext (v4i16 V64:$opB))),
(AArch64vashr v4i32:$src, (i32 31))))),
def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
(zext (v4i16 V64:$opB))))),
(UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
(v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)),
(zext (extract_high_v8i16 V128:$opB))),
(AArch64vashr v4i32:$src, (i32 31))))),
def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
(zext (extract_high_v8i16 V128:$opB))))),
(UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
(v2i64 (add (sub (zext (v2i32 V64:$opA)),
(zext (v2i32 V64:$opB))),
(AArch64vashr v2i64:$src, (i32 63))))),
def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
(zext (v2i32 V64:$opB))))),
(UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
(v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)),
(zext (extract_high_v4i32 V128:$opB))),
(AArch64vashr v2i64:$src, (i32 63))))),
def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
(zext (extract_high_v4i32 V128:$opB))))),
(UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;

defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
(v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
(ABSv8i8 V64:$src)>;
def : Pat<(xor (v4i16 (AArch64vashr V64:$src, (i32 15))),
(v4i16 (add V64:$src, (AArch64vashr V64:$src, (i32 15))))),
(ABSv4i16 V64:$src)>;
def : Pat<(xor (v2i32 (AArch64vashr V64:$src, (i32 31))),
(v2i32 (add V64:$src, (AArch64vashr V64:$src, (i32 31))))),
(ABSv2i32 V64:$src)>;
def : Pat<(xor (v16i8 (AArch64vashr V128:$src, (i32 7))),
(v16i8 (add V128:$src, (AArch64vashr V128:$src, (i32 7))))),
(ABSv16i8 V128:$src)>;
def : Pat<(xor (v8i16 (AArch64vashr V128:$src, (i32 15))),
(v8i16 (add V128:$src, (AArch64vashr V128:$src, (i32 15))))),
(ABSv8i16 V128:$src)>;
def : Pat<(xor (v4i32 (AArch64vashr V128:$src, (i32 31))),
(v4i32 (add V128:$src, (AArch64vashr V128:$src, (i32 31))))),
(ABSv4i32 V128:$src)>;
def : Pat<(xor (v2i64 (AArch64vashr V128:$src, (i32 63))),
(v2i64 (add V128:$src, (AArch64vashr V128:$src, (i32 63))))),
(ABSv2i64 V128:$src)>;

defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
Expand Down Expand Up @@ -3359,7 +3335,7 @@ def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
// Advanced SIMD two scalar instructions.
//===----------------------------------------------------------------------===//

defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>;
defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>;
defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
Expand Down

0 comments on commit b7a5134

Please sign in to comment.