Skip to content

Commit

Permalink
For the current Atom processor, the fastest way to handle a call
Browse files Browse the repository at this point in the history
indirect through a memory address is to load the memory address into
a register and then call indirect through the register.

This patch implements this improvement by modifying SelectionDAG to
force a function address which is a memory reference to be loaded
into a virtual register.

Patch by Sriram Murali.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178171 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
pgurd committed Mar 27, 2013
1 parent e915047 commit 1edadea
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 1 deletion.
7 changes: 6 additions & 1 deletion lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;

//===----------------------------------------------------------------------===//
// X86 processors supported.
Expand Down Expand Up @@ -181,7 +184,9 @@ def : ProcessorModel<"penryn", SandyBridgeModel,
def : ProcessorModel<"atom", AtomModel,
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
FeatureSlowDivide, FeaturePadShortFunctions]>;
FeatureSlowDivide,
FeatureCallRegIndirect,
FeaturePadShortFunctions]>;

// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
Expand Down
13 changes: 13 additions & 0 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2629,6 +2629,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}

// Use indirect reference through register, when CALL uses a memory reference.
if (Subtarget->callRegIndirect() &&
Callee.getOpcode() == ISD::LOAD) {
const TargetRegisterClass *AddrRegClass =
getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned VReg = MRI.createVirtualRegister(AddrRegClass);
SDValue tempValue = DAG.getCopyFromReg(Callee,
dl, VReg, Callee.getValueType());
Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag);
InFlag = Chain.getValue(1);
}

Ops.push_back(Chain);
Ops.push_back(Callee);

Expand Down
1 change: 1 addition & 0 deletions lib/Target/X86/X86Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ void X86Subtarget::initializeEnvironment() {
HasSlowDivide = false;
PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/X86/X86Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ class X86Subtarget : public X86GenSubtargetInfo {
/// a stall when returning too early.
bool PadShortFunctions;

/// CallRegIndirect - True if the Calls with memory reference should be converted
/// to a register-based indirect call.
bool CallRegIndirect;

/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
Expand Down Expand Up @@ -269,6 +273,7 @@ class X86Subtarget : public X86GenSubtargetInfo {
bool useLeaForSP() const { return UseLeaForSP; }
bool hasSlowDivide() const { return HasSlowDivide; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }

bool isAtom() const { return X86ProcFamily == IntelAtom; }

Expand Down
45 changes: 45 additions & 0 deletions test/CodeGen/X86/atom-call-reg-indirect.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM32 %s
; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s
; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM64 %s
; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s


; fn_ptr.ll
%class.A = type { i32 (...)** }

define i32 @test1() #0 {
;ATOM: test1
entry:
%call = tail call %class.A* @_Z3facv()
%0 = bitcast %class.A* %call to void (%class.A*)***
%vtable = load void (%class.A*)*** %0, align 8
%1 = load void (%class.A*)** %vtable, align 8
;ATOM32: movl (%ecx), %ecx
;ATOM32: calll *%ecx
;ATOM-NOT32: calll *(%ecx)
;ATOM64: movq (%rcx), %rcx
;ATOM64: callq *%rcx
;ATOM-NOT64: callq *(%rcx)
tail call void %1(%class.A* %call)
ret i32 0
}

declare %class.A* @_Z3facv() #1

; virt_fn.ll
@p = external global void (i32)**

define i32 @test2() #0 {
;ATOM: test2
entry:
%0 = load void (i32)*** @p, align 8
%1 = load void (i32)** %0, align 8
;ATOM32: movl (%eax), %eax
;ATOM32: calll *%eax
;ATOM-NOT: calll *(%eax)
;ATOM64: movq (%rax), %rax
;ATOM64: callq *%rax
;ATOM-NOT64: callq *(%rax)
tail call void %1(i32 2)
ret i32 0
}

0 comments on commit 1edadea

Please sign in to comment.