Skip to content

Commit

Permalink
[ARM] Lower UDIV+UREM to UDIV+MLS (and the same for SREM)
Browse files Browse the repository at this point in the history
Summary:
This saves a library call to __aeabi_uidivmod. However, the
processor must feature hardware division in order to benefit from
the transformation.

Reviewers: scott-0, jmolloy, compnerd, rengolin

Subscribers: t.p.northover, compnerd, aemerson, rengolin, samparker, llvm-commits

Differential Revision: https://reviews.llvm.org/D24133

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280808 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
pbarrio committed Sep 7, 2016
1 parent 2fdf2bf commit ba3ea4d
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
19 changes: 18 additions & 1 deletion lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12098,6 +12098,24 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
bool isSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
SDLoc dl(Op);

// If the target has hardware divide, use divide + multiply + subtract:
// div = a / b
// rem = a - b * div
// return {div, rem}
// This should be lowered into UDIV/SDIV + MLS later on.
if (Subtarget->hasDivide()) {
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
const SDValue Dividend = Op->getOperand(0);
const SDValue Divisor = Op->getOperand(1);
SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);

SDValue Values[2] = {Div, Rem};
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
}

RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
VT.getSimpleVT().SimpleTy);
Expand All @@ -12111,7 +12129,6 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {

Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);

SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
Expand Down
43 changes: 43 additions & 0 deletions test/CodeGen/ARM/urem-opt-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
; expanded to a sequence of umull, lsrs, muls and sub instructions, but
; just a call to __aeabi_uidivmod.
;
; When the processor features hardware division, UDIV + UREM can be turned
; into UDIV + MLS. This prevents the library function __aeabi_uidivmod to be
; pulled into the binary. The test uses ARMv7-M.
;
; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | FileCheck %s
; RUN: llc -mtriple=thumbv7m-eabi -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=V7M

target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv7m-arm-none-eabi"
Expand All @@ -28,18 +33,56 @@ entry:
ret i32 %div
}

; Test for unsigned remainder
define i32 @foo3() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo3:
; CHECK: __aeabi_uidivmod
; CHECK-NOT: umull
; V7M-LABEL: foo3:
; V7M: udiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_uidivmod
%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
%rem = urem i32 %call, 1000000
%cmp = icmp eq i32 %rem, 0
%conv = zext i1 %cmp to i32
ret i32 %conv
}

; Test for signed remainder
define i32 @foo4() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo4:
; CHECK:__aeabi_idivmod
; V7M-LABEL: foo4:
; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_idivmod
%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
%rem = srem i32 %call, 1000000
ret i32 %rem
}

; Check that doing a sdiv+srem has the same effect as only the srem,
; as the division needs to be computed anyway in order to calculate
; the remainder (i.e. make sure we don't end up with two divisions).
define i32 @foo5() local_unnamed_addr #0 {
entry:
; CHECK-LABEL: foo5:
; CHECK:__aeabi_idivmod
; V7M-LABEL: foo5:
; V7M: sdiv [[R2:r[0-9]+]], [[R0:r[0-9]+]], [[R1:r[0-9]+]]
; V7M-NOT: sdiv
; V7M: mls {{r[0-9]+}}, [[R2]], [[R1]], [[R0]]
; V7M-NOT: __aeabi_idivmod
%call = tail call i32 bitcast (i32 (...)* @GetValue to i32 ()*)()
%div = sdiv i32 %call, 1000000
%rem = srem i32 %call, 1000000
%add = add i32 %div, %rem
ret i32 %add
}

declare i32 @GetValue(...) local_unnamed_addr

attributes #0 = { minsize nounwind optsize }

0 comments on commit ba3ea4d

Please sign in to comment.