forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[X86] Emit fewer instructions to allocate >16GB stack frames
Summary: Use this code pattern when RAX is live, instead of emitting up to 2 billion adjustments: pushq %rax movabsq +-$Offset+-8, %rax addq %rsp, %rax xchg %rax, (%rsp) movq (%rsp), %rsp Try to clean this code up a bit while I'm here. In particular, hoist the logic that handles the entire adjustment with `movabsq $imm, %rax` out of the loop. This negates the offset in the prologue and uses ADD because X86 only has a two operand subtract which always subtracts from the destination register, which can no longer be RSP. Fixes PR31962 Reviewers: majnemer, sdardis Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D30052 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298116 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
2 changed files
with
128 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK | ||
|
||
; Test how we handle pathologically large stack frames when RAX is live through | ||
; the prologue and epilogue. | ||
|
||
declare void @bar(i8*) | ||
declare void @llvm.va_start(i8*) | ||
|
||
; For stack frames between 2GB and 16GB, do multiple adjustments. | ||
|
||
define i32 @stack_frame_8gb(i32 %x, ...) nounwind { | ||
; CHECK-LABEL: stack_frame_8gb: | ||
; CHECK: subq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: subq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: subq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: subq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: subq ${{.*}}, %rsp | ||
; CHECK: callq bar | ||
; CHECK: addq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: addq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: addq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: addq ${{.*}}, %rsp # imm = 0x7FFFFFFF | ||
; CHECK: addq ${{.*}}, %rsp | ||
; CHECK: retq | ||
%1 = alloca [u0x200000000 x i8] | ||
%va = alloca i8, i32 24 | ||
call void @llvm.va_start(i8* %va) | ||
%2 = getelementptr inbounds [u0x200000000 x i8], [u0x200000000 x i8]* %1, i32 0, i32 0 | ||
call void @bar(i8* %2) | ||
ret i32 %x | ||
} | ||
|
||
; For stack frames larger than 16GB, spill EAX instead of doing a linear number | ||
; of adjustments. | ||
|
||
; This function should have a frame size of 0x4000000D0. The 0xD0 is 208 bytes | ||
; from 24 bytes of va_list, 176 bytes of spilled varargs regparms, and 8 bytes | ||
; of alignment. We subtract 8 less and add 8 more in the prologue and epilogue | ||
; respectively to account for the PUSH. | ||
|
||
define i32 @stack_frame_16gb(i32 %x, ...) nounwind { | ||
; CHECK-LABEL: stack_frame_16gb: | ||
; CHECK: pushq %rax | ||
; CHECK-NEXT: movabsq ${{.*}}, %rax # imm = 0xFFFFFFFBFFFFFF38 | ||
; CHECK-NEXT: addq %rsp, %rax | ||
; CHECK-NEXT: xchgq %rax, (%rsp) | ||
; CHECK-NEXT: movq (%rsp), %rsp | ||
; CHECK: callq bar | ||
; CHECK: pushq %rax | ||
; CHECK-NEXT: movabsq ${{.*}}, %rax # imm = 0x4000000D8 | ||
; CHECK-NEXT: addq %rsp, %rax | ||
; CHECK-NEXT: xchgq %rax, (%rsp) | ||
; CHECK-NEXT: movq (%rsp), %rsp | ||
; CHECK: retq | ||
%1 = alloca [u0x400000000 x i8] | ||
%va = alloca i8, i32 24 | ||
call void @llvm.va_start(i8* %va) | ||
%2 = getelementptr inbounds [u0x400000000 x i8], [u0x400000000 x i8]* %1, i32 0, i32 0 | ||
call void @bar(i8* %2) | ||
ret i32 %x | ||
} | ||
|