Skip to content

Commit

Permalink
Be careful about scheduling nodes above previous calls. It increase u…
Browse files Browse the repository at this point in the history
…sages of

more callee-saved registers and introduce copies. Only allows it if scheduling
a node above calls would end up lessen register pressure.

Call operands also has added ABI restrictions for register allocation, so be
extra careful with hoisting them above calls.

rdar://9329627


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@130245 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Evan Cheng committed Apr 26, 2011
1 parent 90fab0f commit 554daa6
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 27 deletions.
7 changes: 4 additions & 3 deletions include/llvm/CodeGen/ScheduleDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ namespace llvm {
unsigned short Latency; // Node latency.
bool isVRegCycle : 1; // May use and def the same vreg.
bool isCall : 1; // Is a function call.
bool isCallOp : 1; // Is a function call operand.
bool isTwoAddress : 1; // Is a two-address instruction.
bool isCommutable : 1; // Is a commutable instruction.
bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
Expand Down Expand Up @@ -280,7 +281,7 @@ namespace llvm {
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isTwoAddress(false),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
Expand All @@ -294,7 +295,7 @@ namespace llvm {
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isTwoAddress(false),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
Expand All @@ -307,7 +308,7 @@ namespace llvm {
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
isVRegCycle(false), isCall(false), isTwoAddress(false),
isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
isPending(false), isAvailable(false), isScheduled(false),
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
Expand Down
43 changes: 42 additions & 1 deletion lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1732,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// If SU does not have a register def, schedule it close to its uses
// because it does not lengthen any live ranges.
return 0;
#if 1
return SethiUllmanNumbers[SU->NodeNum];
#else
unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
if (SU->isCallOp) {
// FIXME: This assumes all of the defs are used as call operands.
int NP = (int)Priority - SU->getNode()->getNumValues();
return (NP > 0) ? NP : 0;
}
return Priority;
#endif
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -2238,11 +2248,35 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);

// Be really careful about hoisting call operands above previous calls.
// Only allows it if it would reduce register pressure.
if (left->isCall && right->isCallOp) {
unsigned RNumVals = right->getNode()->getNumValues();
RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
}
if (right->isCall && left->isCallOp) {
unsigned LNumVals = left->getNode()->getNumValues();
LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
}

if (LPriority != RPriority) {
DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
}

// One or both of the nodes are calls and their sethi-ullman numbers are the
// same, then keep source order.
if (left->isCall || right->isCall) {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);

// Prefer an ordering where the lower the non-zero order number, the higher
// the preference.
if ((LOrder || ROrder) && LOrder != ROrder)
return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
}

// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
// t1 = op t2, c1
Expand Down Expand Up @@ -2275,7 +2309,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return LScratch > RScratch;
}

if (!DisableSchedCycles) {
// Comparing latency against a call makes little sense unless the node
// is register pressure-neutral.
if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
return (left->NodeQueueId > right->NodeQueueId);

// Do not compare latencies when one or both of the nodes are calls.
if (!DisableSchedCycles &&
!(left->isCall || right->isCall)) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
Expand Down
19 changes: 19 additions & 0 deletions lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SU->Latency = Old->Latency;
SU->isVRegCycle = Old->isVRegCycle;
SU->isCall = Old->isCall;
SU->isCallOp = Old->isCallOp;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
Expand Down Expand Up @@ -285,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());

SmallVector<SUnit*, 8> CallSUnits;
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();

Expand Down Expand Up @@ -337,6 +339,9 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
if (!HasGlueUse) break;
}

if (NodeSUnit->isCall)
CallSUnits.push_back(NodeSUnit);

// Schedule zero-latency TokenFactor below any nodes that may increase the
// schedule height. Otherwise, ancestors of the TokenFactor may appear to
// have false stalls.
Expand All @@ -356,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}

// Find all call operands.
while (!CallSUnits.empty()) {
SUnit *SU = CallSUnits.pop_back_val();
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->getOpcode() != ISD::CopyToReg)
continue;
SDNode *SrcN = SUNode->getOperand(2).getNode();
if (isPassiveNode(SrcN)) continue; // Not scheduled.
SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
SrcSU->isCallOp = true;
}
}
}

void ScheduleDAGSDNodes::AddSchedEdges() {
Expand Down
70 changes: 70 additions & 0 deletions test/CodeGen/ARM/2011-04-26-SchedTweak.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s

; Do not move the umull above previous call which would require use of
; more callee-saved registers and introduce copies.
; rdar://9329627

%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }

@FuncPtr = external hidden unnamed_addr global %struct.FF*
@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
@G = external unnamed_addr global i32
@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
@.str3 = external hidden unnamed_addr constant [58 x i8], align 4

define i32 @test() nounwind optsize ssp {
entry:
; CHECK: test:
; CHECK: push
; CHECK-NOT: push
%block_size = alloca i32, align 4
%block_count = alloca i32, align 4
%index_cache = alloca i32, align 4
store i32 0, i32* %index_cache, align 4
%tmp = load i32* @G, align 4
%tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
switch i32 %tmp1, label %bb8 [
i32 0, label %bb
i32 536870913, label %bb4
i32 536870914, label %bb6
]

bb:
%tmp2 = load i32* @G, align 4
%tmp4 = icmp eq i32 %tmp2, 0
br i1 %tmp4, label %bb1, label %bb8

bb1:
; CHECK: %bb1
; CHECK-NOT: umull
; CHECK: blx _Get
; CHECK: umull
; CHECK: blx _foo
%tmp5 = load i32* %block_size, align 4
%tmp6 = load i32* %block_count, align 4
%tmp7 = call %struct.FF* @Get() nounwind
store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
%tmp10 = zext i32 %tmp6 to i64
%tmp11 = zext i32 %tmp5 to i64
%tmp12 = mul nsw i64 %tmp10, %tmp11
%tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
br label %bb8

bb4:
ret i32 0

bb6:
ret i32 1

bb8:
ret i32 -1
}

declare i32 @printf(i8*, ...)

declare %struct.FF* @Get()

declare i32 @foo(i8*, i64, i32)

declare i32 @bar(i32, i32, i32)
2 changes: 1 addition & 1 deletion test/CodeGen/ARM/shifter_operand.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ entry:
; A8: str r2, [r0, r1, lsl #2]

; A9: test4:
; A9: add r0, r0, r4, lsl #2
; A9: add r0, r0, r{{[0-9]+}}, lsl #2
; A9: ldr r1, [r0]
; A9: str r1, [r0]
%0 = tail call i8* (...)* @malloc(i32 undef) nounwind
Expand Down
20 changes: 0 additions & 20 deletions test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll

This file was deleted.

2 changes: 1 addition & 1 deletion test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<
; CHECK: _ZNKSs7compareERKSs:
; CHECK: it eq
; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: ldmia.w sp!,
entry:
%0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
%1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/pic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ entry:
; LINUX-NEXT: .L3$pb:
; LINUX: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
; LINUX: movl pfoo@GOT(%[[REG3]]),
; LINUX: calll afoo@PLT
; LINUX: movl pfoo@GOT(%[[REG3]]),
; LINUX: calll *
}

Expand Down

0 comments on commit 554daa6

Please sign in to comment.