Skip to content

Commit

Permalink
Move the segmented stack switch to a function attribute
Browse files Browse the repository at this point in the history
This removes the -segmented-stacks command line flag in favor of a
per-function "split-stack" attribute.

Patch by Luqman Aden and Alex Crichton!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205997 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
rnk committed Apr 10, 2014
1 parent ee66766 commit bc1fd91
Show file tree
Hide file tree
Showing 17 changed files with 137 additions and 95 deletions.
5 changes: 2 additions & 3 deletions docs/SegmentedStacks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ monolithic chunk (of some worst case size) at thread initialization. This is
done by allocating stack blocks (henceforth called *stacklets*) and linking them
into a doubly linked list. The function prologue is responsible for checking if
the current stacklet has enough space for the function to execute; and if not,
call into the libgcc runtime to allocate more stack space. When using ``llc``,
segmented stacks can be enabled by adding ``-segmented-stacks`` to the command
line.
call into the libgcc runtime to allocate more stack space. Segmented stacks are
enabled with the ``"split-stack"`` attribute on LLVM functions.

The runtime functionality is `already there in libgcc
<http://gcc.gnu.org/wiki/SplitStacks>`_.
Expand Down
6 changes: 0 additions & 6 deletions include/llvm/CodeGen/CommandFlags.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,6 @@ EnablePIE("enable-pie",
cl::desc("Assume the creation of a position independent executable."),
cl::init(false));

cl::opt<bool>
SegmentedStacks("segmented-stacks",
cl::desc("Use segmented stacks if possible."),
cl::init(false));

cl::opt<bool>
UseInitArray("use-init-array",
cl::desc("Use .init_array instead of .ctors."),
Expand Down Expand Up @@ -229,7 +224,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.StackAlignmentOverride = OverrideStackAlignment;
Options.TrapFuncName = TrapFuncName;
Options.PositionIndependentExecutable = EnablePIE;
Options.EnableSegmentedStacks = SegmentedStacks;
Options.UseInitArray = UseInitArray;
return Options;
}
Expand Down
3 changes: 3 additions & 0 deletions include/llvm/CodeGen/MachineFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,9 @@ class MachineFunction {
return MBBNumbering[N];
}

/// Should we be emitting segmented stack stuff for the function
bool shouldSplitStack();

/// getNumBlockIDs - Return the number of MBB ID's allocated.
///
unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }
Expand Down
5 changes: 1 addition & 4 deletions include/llvm/Target/TargetOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ namespace llvm {
JITEmitDebugInfoToDisk(false), GuaranteedTailCallOpt(false),
DisableTailCalls(false), StackAlignmentOverride(0),
EnableFastISel(false), PositionIndependentExecutable(false),
EnableSegmentedStacks(false), UseInitArray(false),
UseInitArray(false),
DisableIntegratedAS(false), CompressDebugSections(false),
TrapFuncName(""), FloatABIType(FloatABI::Default),
AllowFPOpFusion(FPOpFusion::Standard) {}
Expand Down Expand Up @@ -152,8 +152,6 @@ namespace llvm {
/// if the relocation model is anything other than PIC.
unsigned PositionIndependentExecutable : 1;

unsigned EnableSegmentedStacks : 1;

/// UseInitArray - Use .init_array instead of .ctors for static
/// constructors.
unsigned UseInitArray : 1;
Expand Down Expand Up @@ -217,7 +215,6 @@ inline bool operator==(const TargetOptions &LHS,
ARE_EQUAL(StackAlignmentOverride) &&
ARE_EQUAL(EnableFastISel) &&
ARE_EQUAL(PositionIndependentExecutable) &&
ARE_EQUAL(EnableSegmentedStacks) &&
ARE_EQUAL(UseInitArray) &&
ARE_EQUAL(TrapFuncName) &&
ARE_EQUAL(FloatABIType) &&
Expand Down
5 changes: 5 additions & 0 deletions lib/CodeGen/MachineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
return JumpTableInfo;
}

/// Should we be emitting segmented stack stuff for the function
bool MachineFunction::shouldSplitStack() {
return getFunction()->hasFnAttribute("split-stack");
}

/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
/// recomputes them. This guarantees that the MBB numbers are sequential,
/// dense, and match the ordering of the blocks within the function. If a
Expand Down
2 changes: 1 addition & 1 deletion lib/CodeGen/PrologEpilogInserter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
if (Fn.getTarget().Options.EnableSegmentedStacks)
if (Fn.shouldSplitStack())
TFI.adjustForSegmentedStacks(Fn);

// Emit additional code that is required to explicitly handle the stack in
Expand Down
1 change: 0 additions & 1 deletion lib/LTO/LTOCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
Options.StackAlignmentOverride = options.StackAlignmentOverride;
Options.TrapFuncName = options.TrapFuncName;
Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
Options.EnableSegmentedStacks = options.EnableSegmentedStacks;
Options.UseInitArray = options.UseInitArray;
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
!usesTheStack(MF) && // Don't push and pop.
!MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
!MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
Expand Down
63 changes: 46 additions & 17 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,15 +635,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);

if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
Expand Down Expand Up @@ -11102,13 +11095,50 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isOSWindows() ||
getTargetMachine().Options.EnableSegmentedStacks) &&
"This should be used only on Windows targets or when segmented stacks "
"are being used");
assert(!Subtarget->isTargetMacho() && "Not implemented");
MachineFunction &MF = DAG.getMachineFunction();
bool SplitStack = MF.shouldSplitStack();
bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) ||
SplitStack;
SDLoc dl(Op);

if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDNode* Node = Op.getNode();

unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
" not tell us which reg is the stack pointer!");
EVT VT = Node->getValueType(0);
SDValue Tmp1 = SDValue(Node, 0);
SDValue Tmp2 = SDValue(Node, 1);
SDValue Tmp3 = Node->getOperand(2);
SDValue Chain = Tmp1.getOperand(0);

// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
SDLoc(Node));

SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
DAG.getConstant(-(uint64_t)Align, VT));
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain

Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
DAG.getIntPtrConstant(0, true), SDValue(),
SDLoc(Node));

SDValue Ops[2] = { Tmp1, Tmp2 };
return DAG.getMergeValues(Ops, 2, dl);
}

// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
Expand All @@ -11118,8 +11148,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;

if (getTargetMachine().Options.EnableSegmentedStacks) {
MachineFunction &MF = DAG.getMachineFunction();
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();

if (Is64Bit) {
Expand Down Expand Up @@ -15796,7 +15825,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();

assert(getTargetMachine().Options.EnableSegmentedStacks);
assert(MF->shouldSplitStack());

unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
Expand Down
8 changes: 5 additions & 3 deletions test/CodeGen/ARM/debug-segmented-stacks.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs -filetype=asm | FileCheck %s -check-prefix=ARM-linux
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}

define void @test_basic() {
define void @test_basic() #0 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
Expand Down Expand Up @@ -78,3 +78,5 @@ define void @test_basic() {

; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

attributes #0 = { "split-stack" }
12 changes: 7 additions & 5 deletions test/CodeGen/ARM/segmented-stacks-dynamic.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj

; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define i32 @test_basic(i32 %l) {
define i32 @test_basic(i32 %l) #0 {
%mem = alloca i32, i32 %l
call void @dummy_use (i32* %mem, i32 %l)
%terminate = icmp eq i32 %l, 0
Expand Down Expand Up @@ -60,3 +60,5 @@ false:
; ARM-android: pop {r4, r5}

}

attributes #0 = { "split-stack" }
20 changes: 11 additions & 9 deletions test/CodeGen/ARM/segmented-stacks.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux
; RUN: llc < %s -mtriple=arm-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-android
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=ARM-linux

; We used to crash with filetype=obj
; RUN: llc < %s -mtriple=arm-linux-androideabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-androideabi -filetype=obj
; RUN: llc < %s -mtriple=arm-linux-unknown-gnueabi -filetype=obj


; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define void @test_basic() {
define void @test_basic() #0 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
Expand Down Expand Up @@ -54,7 +54,7 @@ define void @test_basic() {

}

define i32 @test_nested(i32 * nest %closure, i32 %other) {
define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
%addend = load i32 * %closure
%result = add i32 %other, %addend
ret i32 %result
Expand Down Expand Up @@ -99,7 +99,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {

}

define void @test_large() {
define void @test_large() #0 {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void
Expand Down Expand Up @@ -144,7 +144,7 @@ define void @test_large() {

}

define fastcc void @test_fastcc() {
define fastcc void @test_fastcc() #0 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
Expand Down Expand Up @@ -189,7 +189,7 @@ define fastcc void @test_fastcc() {

}

define fastcc void @test_fastcc_large() {
define fastcc void @test_fastcc_large() #0 {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void
Expand Down Expand Up @@ -233,3 +233,5 @@ define fastcc void @test_fastcc_large() {
; ARM-android: pop {r4, r5}

}

attributes #0 = { "split-stack" }
12 changes: 7 additions & 5 deletions test/CodeGen/Thumb/segmented-stacks-dynamic.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj

; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define i32 @test_basic(i32 %l) {
define i32 @test_basic(i32 %l) #0 {
%mem = alloca i32, i32 %l
call void @dummy_use (i32* %mem, i32 %l)
%terminate = icmp eq i32 %l, 0
Expand Down Expand Up @@ -61,3 +61,5 @@ false:
; Thumb-android: pop {r4, r5}

}

attributes #0 = { "split-stack" }
20 changes: 11 additions & 9 deletions test/CodeGen/Thumb/segmented-stacks.ll
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
; RUN: llc < %s -mtriple=thumb-linux-androideabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-androideabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-linux
; RUN: llc < %s -mtriple=thumb-linux-androideabi -filetype=obj
; RUN: llc < %s -mtriple=thumb-linux-unknown-gnueabi -filetype=obj


; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)

define void @test_basic() {
define void @test_basic() #0 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
Expand Down Expand Up @@ -54,7 +54,7 @@ define void @test_basic() {

}

define i32 @test_nested(i32 * nest %closure, i32 %other) {
define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
%addend = load i32 * %closure
%result = add i32 %other, %addend
ret i32 %result
Expand Down Expand Up @@ -101,7 +101,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {

}

define void @test_large() {
define void @test_large() #0 {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void
Expand Down Expand Up @@ -150,7 +150,7 @@ define void @test_large() {

}

define fastcc void @test_fastcc() {
define fastcc void @test_fastcc() #0 {
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
ret void
Expand Down Expand Up @@ -197,7 +197,7 @@ define fastcc void @test_fastcc() {

}

define fastcc void @test_fastcc_large() {
define fastcc void @test_fastcc_large() #0 {
%mem = alloca i32, i32 10000
call void @dummy_use (i32* %mem, i32 0)
ret void
Expand Down Expand Up @@ -245,3 +245,5 @@ define fastcc void @test_fastcc_large() {
; Thumb-linux: pop {r4, r5}

}

attributes #0 = { "split-stack" }
Loading

0 comments on commit bc1fd91

Please sign in to comment.