Skip to content

Commit

Permalink
Enhance the fix of PR17631
Browse files Browse the repository at this point in the history
- The fix to PR17631 fixes part of the cases where 'vzeroupper' should
  not be issued before 'call' insn. There're other cases where helper
  calls will be inserted not limited to epilog. These helper calls do
  not follow the standard calling convention and won't clobber any YMM
  registers. (So far, all call conventions will clobber any or part of
  YMM registers.)
  This patch enhances the previous fix to cover more cases 'vzerosupper' should
  not be inserted by checking if that function call won't clobber any YMM
  registers and skipping it if so.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196261 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
hliao2 committed Dec 3, 2013
1 parent 07fc484 commit 239ffb3
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 12 deletions.
35 changes: 27 additions & 8 deletions lib/Target/X86/X86VZeroUpper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}

/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
/// instruction.
static bool clobbersAnyYmmReg(MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isRegMask())
continue;
for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
if (MO.clobbersPhysReg(reg))
return true;
}
for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
if (MO.clobbersPhysReg(reg))
return true;
}
}
return false;
}

/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
Expand Down Expand Up @@ -234,14 +253,6 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
DebugLoc dl = I->getDebugLoc();
MachineInstr *MI = I;

// Don't need to check instructions added in prolog.
// In prolog, special function calls may be added for specific targets
// (e.g. on Windows, a prolog helper '_chkstk' is called when the local
// variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM
// registers.
if (MI->getFlag(MachineInstr::FrameSetup))
continue;

bool isControlFlow = MI->isCall() || MI->isReturn();

// Shortcut: don't need to check regular instructions in dirty state.
Expand All @@ -260,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
if (!isControlFlow)
continue;

// If the call won't clobber any YMM register, skip it as well. It usually
// happens on helper function calls (such as '_chkstk', '_ftol2') where
// standard calling convention is not used (RegMask is not used to mark
// register clobbered and register usage (def/imp-def/use) is well-dfined
// and explicitly specified.
if (MI->isCall() && !clobbersAnyYmmReg(MI))
continue;

BBHasCall = true;

// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
Expand Down
20 changes: 16 additions & 4 deletions test/CodeGen/X86/pr17631.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s

%struct_type = type { [64 x <8 x float>], <8 x float> }

; Function Attrs: nounwind readnone
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)

; Function Attrs: nounwind
define i32 @equal(<8 x i32> %A) {
allocas:
%first_alloc = alloca [64 x <8 x i32>]
%second_alloc = alloca %struct_type

%A1 = bitcast <8 x i32> %A to <8 x float>
%A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
ret i32 %A2
Expand All @@ -20,3 +20,15 @@ allocas:
; CHECK-NOT: vzeroupper
; CHECK: _chkstk
; CHECK: ret

define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
%i = fptoui double %x to i64
store i64 %i, i64* %p
%ret = fadd <8 x float> %y, %y
ret <8 x float> %ret
}

; CHECK: foo
; CHECK-NOT: vzeroupper
; CHECK: _ftol2
; CHECK: ret

0 comments on commit 239ffb3

Please sign in to comment.