forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The current Intel Atom microarchitecture has a feature whereby when a function returns early then it is slightly faster to execute a sequence of NOP instructions to wait until the return address is ready, as opposed to simply stalling on the ret instruction until the return address is ready. When compiling for X86 Atom only, this patch will run a pass, called "X86PadShortFunction" which will add NOP instructions where less than four cycles elapse between function entry and return. It includes tests. This patch has been updated to address Nadav's review comments - Optimize only at >= O1 and don't do optimization if -Os is set - Stores MachineBasicBlock* instead of BBNum - Uses DenseMap instead of std::map - Fixes placement of braces Patch by Andy Zhang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171879 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
11 changed files
with
277 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
//===-------- X86PadShortFunction.cpp - pad short functions -----------===// | ||
// | ||
// The LLVM Compiler Infrastructure | ||
// | ||
// This file is distributed under the University of Illinois Open Source | ||
// License. See LICENSE.TXT for details. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This file defines the pass which will pad short functions to prevent | ||
// a stall if a function returns before the return address is ready. This | ||
// is needed for some Intel Atom processors. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <algorithm> | ||
|
||
#define DEBUG_TYPE "x86-pad-short-functions" | ||
#include "X86.h" | ||
#include "X86InstrInfo.h" | ||
#include "llvm/ADT/Statistic.h" | ||
#include "llvm/CodeGen/MachineFunctionPass.h" | ||
#include "llvm/CodeGen/MachineInstrBuilder.h" | ||
#include "llvm/CodeGen/MachineRegisterInfo.h" | ||
#include "llvm/CodeGen/Passes.h" | ||
#include "llvm/IR/Function.h" | ||
#include "llvm/Support/Debug.h" | ||
#include "llvm/Support/raw_ostream.h" | ||
#include "llvm/Target/TargetInstrInfo.h" | ||
|
||
using namespace llvm; | ||
|
||
STATISTIC(NumBBsPadded, "Number of basic blocks padded"); | ||
|
||
namespace { | ||
struct PadShortFunc : public MachineFunctionPass { | ||
static char ID; | ||
PadShortFunc() : MachineFunctionPass(ID) | ||
, Threshold(4), TM(0), TII(0) {} | ||
|
||
virtual bool runOnMachineFunction(MachineFunction &MF); | ||
|
||
virtual const char *getPassName() const { | ||
return "X86 Atom pad short functions"; | ||
} | ||
|
||
private: | ||
void findReturns(MachineBasicBlock *MBB, | ||
unsigned int Cycles = 0); | ||
|
||
bool cyclesUntilReturn(MachineBasicBlock *MBB, | ||
unsigned int &Cycles, | ||
MachineBasicBlock::iterator *Location = 0); | ||
|
||
void addPadding(MachineBasicBlock *MBB, | ||
MachineBasicBlock::iterator &MBBI, | ||
unsigned int NOOPsToAdd); | ||
|
||
const unsigned int Threshold; | ||
DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs; | ||
|
||
const TargetMachine *TM; | ||
const TargetInstrInfo *TII; | ||
}; | ||
|
||
char PadShortFunc::ID = 0; | ||
} | ||
|
||
FunctionPass *llvm::createX86PadShortFunctions() { | ||
return new PadShortFunc(); | ||
} | ||
|
||
/// runOnMachineFunction - Loop over all of the basic blocks, inserting | ||
/// NOOP instructions before early exits. | ||
bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { | ||
bool OptForSize = MF.getFunction()->getAttributes(). | ||
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); | ||
|
||
if (OptForSize) | ||
return false; | ||
|
||
TM = &MF.getTarget(); | ||
TII = TM->getInstrInfo(); | ||
|
||
// Search through basic blocks and mark the ones that have early returns | ||
ReturnBBs.clear(); | ||
findReturns(MF.begin()); | ||
|
||
bool MadeChange = false; | ||
|
||
MachineBasicBlock::iterator ReturnLoc; | ||
MachineBasicBlock *MBB; | ||
unsigned int Cycles = 0; | ||
unsigned int BBCycles; | ||
|
||
// Pad the identified basic blocks with NOOPs | ||
for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin(); | ||
I != ReturnBBs.end(); ++I) { | ||
MBB = I->first; | ||
Cycles = I->second; | ||
|
||
if (Cycles < Threshold) { | ||
if (!cyclesUntilReturn(MBB, BBCycles, &ReturnLoc)) | ||
continue; | ||
|
||
addPadding(MBB, ReturnLoc, Threshold - Cycles); | ||
NumBBsPadded++; | ||
MadeChange = true; | ||
} | ||
} | ||
|
||
return MadeChange; | ||
} | ||
|
||
/// findReturn - Starting at MBB, follow control flow and add all | ||
/// basic blocks that contain a return to ReturnBBs. | ||
void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { | ||
// If this BB has a return, note how many cycles it takes to get there. | ||
bool hasReturn = cyclesUntilReturn(MBB, Cycles); | ||
if (Cycles >= Threshold) | ||
return; | ||
|
||
if (hasReturn) { | ||
ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles); | ||
return; | ||
} | ||
|
||
// Follow branches in BB and look for returns | ||
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); | ||
I != MBB->succ_end(); ++I) { | ||
findReturns(*I, Cycles); | ||
} | ||
} | ||
|
||
/// cyclesUntilReturn - if the MBB has a return instruction, set Location | ||
/// to the instruction and return true. Return false otherwise. | ||
/// Cycles will be incremented by the number of cycles taken to reach the | ||
/// return or the end of the BB, whichever occurs first. | ||
bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB, | ||
unsigned int &Cycles, | ||
MachineBasicBlock::iterator *Location) { | ||
for (MachineBasicBlock::iterator MBBI = MBB->begin(); | ||
MBBI != MBB->end(); ++MBBI) { | ||
MachineInstr *MI = MBBI; | ||
// Mark basic blocks with a return instruction. Calls to other | ||
// functions do not count because the called function will be padded, | ||
// if necessary. | ||
if (MI->isReturn() && !MI->isCall()) { | ||
if (Location) | ||
*Location = MBBI; | ||
return true; | ||
} | ||
|
||
Cycles += TII->getInstrLatency(TM->getInstrItineraryData(), MI); | ||
} | ||
|
||
return false; | ||
} | ||
|
||
/// addPadding - Add the given number of NOOP instructions to the function | ||
/// just prior to the return at MBBI | ||
void PadShortFunc::addPadding(MachineBasicBlock *MBB, | ||
MachineBasicBlock::iterator &MBBI, | ||
unsigned int NOOPsToAdd) { | ||
DebugLoc DL = MBBI->getDebugLoc(); | ||
|
||
while (NOOPsToAdd-- > 0) { | ||
BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); | ||
BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
; RUN: llc < %s -O1 -mcpu=atom -mtriple=i686-linux | FileCheck %s | ||
|
||
declare void @external_function(...) | ||
|
||
define i32 @test_return_val(i32 %a) nounwind { | ||
; CHECK: test_return_val | ||
; CHECK: movl | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: ret | ||
ret i32 %a | ||
} | ||
|
||
define i32 @test_optsize(i32 %a) nounwind optsize { | ||
; CHECK: test_optsize | ||
; CHECK: movl | ||
; CHECK-NEXT: ret | ||
ret i32 %a | ||
} | ||
|
||
define i32 @test_add(i32 %a, i32 %b) nounwind { | ||
; CHECK: test_add | ||
; CHECK: addl | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: ret | ||
%result = add i32 %a, %b | ||
ret i32 %result | ||
} | ||
|
||
define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind { | ||
; CHECK: @test_multiple_ret | ||
; CHECK: je | ||
|
||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: ret | ||
|
||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: ret | ||
|
||
br i1 %c, label %bb1, label %bb2 | ||
|
||
bb1: | ||
ret i32 %a | ||
|
||
bb2: | ||
ret i32 %b | ||
} | ||
|
||
define void @test_call_others(i32 %x) nounwind | ||
{ | ||
; CHECK: test_call_others | ||
; CHECK: je | ||
%tobool = icmp eq i32 %x, 0 | ||
br i1 %tobool, label %if.end, label %true.case | ||
|
||
; CHECK: jmp external_function | ||
true.case: | ||
tail call void bitcast (void (...)* @external_function to void ()*)() nounwind | ||
br label %if.end | ||
|
||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: nop | ||
; CHECK: ret | ||
if.end: | ||
ret void | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters