Skip to content

Commit

Permalink
ARM: use AAPCS-style prologues for embedded MachO.
Browse files Browse the repository at this point in the history
Darwin prologues save their GPRs in two stages: a narrow push of r0-r7 & lr,
followed by a wide push of the remaining registers if there are any. AAPCS uses
a single push.w instruction.

It turns out that, on average, enough registers get pushed that code is smaller
in the AAPCS prologue, which is a nice property for M-class programmers. They
also have other options available for back-traces, so can hopefully deal with
the fact that FP & LR aren't adjacent in memory.

rdar://problem/15909583

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209895 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
TNorthover committed May 30, 2014
1 parent 98f8bc9 commit 645c5b9
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 28 deletions.
9 changes: 6 additions & 3 deletions lib/Target/ARM/ARMBaseRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,12 @@ using namespace llvm;

ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
if (STI.isTargetMachO())
FramePtr = ARM::R7;
else if (STI.isTargetWindows())
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin() || STI.isThumb1Only())
FramePtr = ARM::R7;
else
FramePtr = ARM::R11;
} else if (STI.isTargetWindows())
FramePtr = ARM::R11;
else // ARM EABI
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
Expand Down
14 changes: 7 additions & 7 deletions lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin()) {
GPRCS2Size += 4;
break;
}
Expand Down Expand Up @@ -380,7 +380,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO())
if (STI.isTargetDarwin())
break;
// fallthrough
case ARM::R0:
Expand Down Expand Up @@ -445,7 +445,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin()) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
Expand Down Expand Up @@ -810,7 +810,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (!(Func)(Reg, STI.isTargetMachO())) continue;
if (!(Func)(Reg, STI.isTargetDarwin())) continue;

// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
Expand Down Expand Up @@ -888,7 +888,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (!(Func)(Reg, STI.isTargetMachO())) continue;
if (!(Func)(Reg, STI.isTargetDarwin())) continue;

// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
Expand Down Expand Up @@ -1438,7 +1438,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;

if (!STI.isTargetMachO()) {
if (!STI.isTargetDarwin()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
Expand All @@ -1460,7 +1460,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
if (!STI.isTargetMachO()) {
if (!STI.isTargetDarwin()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
Expand Down
16 changes: 8 additions & 8 deletions test/CodeGen/ARM/fold-stack-adjust.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ declare void @bar(i8*)

define void @check_simple() minsize {
; CHECK-LABEL: check_simple:
; CHECK: push {r3, r4, r5, r6, r7, lr}
; CHECK: push.w {r7, r8, r9, r10, r11, lr}
; CHECK-NOT: sub sp, sp,
; ...
; CHECK-NOT: add sp, sp,
; CHECK: pop {r0, r1, r2, r3, r7, pc}
; CHECK: pop.w {r0, r1, r2, r3, r11, pc}

; CHECK-T1-LABEL: check_simple:
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
Expand Down Expand Up @@ -44,11 +44,11 @@ define void @check_simple() minsize {

define void @check_simple_too_big() minsize {
; CHECK-LABEL: check_simple_too_big:
; CHECK: push {r7, lr}
; CHECK: push.w {r11, lr}
; CHECK: sub sp,
; ...
; CHECK: add sp,
; CHECK: pop {r7, pc}
; CHECK: pop.w {r11, pc}
%var = alloca i8, i32 64
call void @bar(i8* %var)
ret void
Expand Down Expand Up @@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
; folded in except that doing so would clobber the value being returned.
define i64 @check_no_return_clobber() minsize {
; CHECK-LABEL: check_no_return_clobber:
; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NOT: sub sp,
; ...
; CHECK: add sp, #24
; CHECK: pop {r7, pc}
; CHECK: pop.w {r11, pc}

; Just to keep iOS FileCheck within previous function:
; CHECK-IOS-LABEL: check_no_return_clobber:
Expand Down Expand Up @@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {

; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16
; CHECK: push {r5, r6, r7, lr}
; CHECK: push.w {r9, r10, r11, lr}
; ...
; CHECK: pop.w {r2, r3, r7, lr}
; CHECK: pop.w {r2, r3, r11, lr}
; CHECK: add sp, #16
; CHECK: bx lr

Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/ARM/interrupt-attr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
; appropriate sentinel so no special return needed).
; CHECK-M-LABEL: irq_fn:
; CHECK-M: push {r4, r6, r7, lr}
; CHECK-M: add r7, sp, #8
; CHECK-M: push.w {r4, r10, r11, lr}
; CHECK-M: add.w r11, sp, #8
; CHECK-M: mov r4, sp
; CHECK-M: bic r4, r4, #7
; CHECK-M: mov sp, r4
; CHECK-M: blx _bar
; CHECK-M: sub.w r4, r7, #8
; CHECK-M: sub.w r4, r11, #8
; CHECK-M: mov sp, r4
; CHECK-M: pop {r4, r6, r7, pc}
; CHECK-M: pop.w {r4, r10, r11, pc}

call arm_aapcscc void @bar()
ret void
Expand Down
10 changes: 4 additions & 6 deletions test/CodeGen/ARM/none-macho.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ define i32 @test_frame_ptr() {
; CHECK-LABEL: test_frame_ptr:
call void @test_trap()

; Frame pointer is r7 as for Darwin
; CHECK: mov r7, sp
; Frame pointer is r11.
; CHECK: mov r11, sp
ret i32 42
}

Expand All @@ -63,11 +63,9 @@ define void @test_two_areas(%big_arr* %addr) {
; This goes with the choice of r7 as FP (largely). FP and LR have to be stored
; consecutively on the stack for the frame record to be valid, which means we
; need the 2 register-save areas employed by iOS.
; CHECK-NON-FAST: push {r4, r5, r6, r7, lr}
; CHECK-NON-FAST: push.w {r8, r9, r10, r11}
; CHECK-NON-FAST: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ...
; CHECK-NON-FAST: pop.w {r8, r9, r10, r11}
; CHECK-NON-FAST: pop {r4, r5, r6, r7, pc}
; CHECK-NON-FAST: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
ret void
}

Expand Down

0 comments on commit 645c5b9

Please sign in to comment.