Skip to content

Commit

Permalink
Avoid false dependencies of undef machine operands
Browse files Browse the repository at this point in the history
This patch helps avoid false dependencies on undef registers by updating the machine instructions' undef operand to use a register that the instruction is truly dependent on, or use a register with clearance higher than Pref.

Pseudo example:

loop:
xmm0 = ...
xmm1 = vcvtsi2sdl eax, xmm0<undef>
... = inst xmm0
jmp loop

In this example, selecting xmm0 as the undef register creates false dependency between loop iterations.
This false dependency cannot be solved by inserting an xor before vcvtsi2sdl because xmm0 is alive at the point of the vcvtsi2sdl instruction.
Selecting a different register instead of xmm0, especially a register that is not used in the loop, will eliminate this problem.

Differential Revision: https://reviews.llvm.org/D22466



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278321 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Marina Yatsina committed Aug 11, 2016
1 parent 9c53423 commit ac9ca3b
Show file tree
Hide file tree
Showing 8 changed files with 320 additions and 243 deletions.
53 changes: 53 additions & 0 deletions lib/CodeGen/ExecutionDepsFix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ class ExeDepsFix : public MachineFunctionPass {
void processDefs(MachineInstr*, bool Kill);
void visitSoftInstr(MachineInstr*, unsigned mask);
void visitHardInstr(MachineInstr*, unsigned domain);
void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
unsigned Pref);
bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
void processUndefReads(MachineBasicBlock*);
};
Expand Down Expand Up @@ -473,6 +475,56 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
processDefs(MI, !DomP.first);
}

/// \brief Helps avoid false dependencies on undef registers by updating the
/// machine instructions' undef operand to use a register that the instruction
/// is truly dependent on, or use a register with clearance higher than Pref.
void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");

unsigned OriginalReg = MO.getReg();

// Update only undef operands that are mapped to one register.
if (AliasMap[OriginalReg].size() != 1)
return;

// Get the undef operand's register class
const TargetRegisterClass *OpRC =
TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);

// If the instruction has a true dependency, we can hide the false depdency
// behind it.
for (MachineOperand &CurrMO : MI->operands()) {
if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
!OpRC->contains(CurrMO.getReg()))
continue;
// We found a true dependency - replace the undef register with the true
// dependency.
MO.setReg(CurrMO.getReg());
return;
}

// Go over all registers in the register class and find the register with
// max clearance or clearance higher than Pref.
unsigned MaxClearance = 0;
unsigned MaxClearanceReg = OriginalReg;
for (unsigned rx = 0; rx < OpRC->getNumRegs(); ++rx) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
if (Clearance <= MaxClearance)
continue;
MaxClearance = Clearance;
MaxClearanceReg = OpRC->getRegister(rx);

if (MaxClearance > Pref)
break;
}

// Update the operand if we found a register with better clearance.
if (MaxClearanceReg != OriginalReg)
MO.setReg(MaxClearanceReg);
}

/// \brief Return true to if it makes sense to break dependence on a partial def
/// or undef use.
bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
Expand Down Expand Up @@ -510,6 +562,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
unsigned OpNum;
unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
pickBestRegisterForUndef(MI, OpNum, Pref);
if (shouldBreakDependence(MI, OpNum, Pref))
UndefReads.push_back(std::make_pair(MI, OpNum));
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static cl::opt<unsigned>
UndefRegClearance("undef-reg-clearance",
cl::desc("How many idle instructions we would like before "
"certain undef register reads"),
cl::init(64), cl::Hidden);
cl::init(128), cl::Hidden);

enum {
// Select which memory operand is being unfolded.
Expand Down
61 changes: 27 additions & 34 deletions test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,27 @@ define <8 x double> @sltof864(<8 x i64> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; KNL-NEXT: vpextrq $1, %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; KNL-NEXT: vmovq %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; KNL-NEXT: vpextrq $1, %xmm2, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; KNL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; KNL-NEXT: vmovq %xmm2, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
; KNL-NEXT: vpextrq $1, %xmm2, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; KNL-NEXT: vmovq %xmm2, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
Expand All @@ -56,15 +55,14 @@ define <4 x double> @sltof464(<4 x i64> %a) {
; KNL: ## BB#0:
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpextrq $1, %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
; KNL-NEXT: vmovq %xmm1, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1
; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
Expand All @@ -81,12 +79,11 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
; KNL-LABEL: sltof2f32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; KNL-NEXT: retq
Expand All @@ -105,17 +102,16 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
; KNL: ## BB#0:
; KNL-NEXT: vmovdqu (%rdi), %ymm0
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; KNL-NEXT: retq
;
Expand Down Expand Up @@ -186,17 +182,16 @@ define <4 x float> @sltof432(<4 x i64> %a) {
; KNL-LABEL: sltof432:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; KNL-NEXT: retq
;
Expand Down Expand Up @@ -884,12 +879,11 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
; KNL-NEXT: movl $-1, %eax
; KNL-NEXT: movl $0, %edx
; KNL-NEXT: cmovnel %eax, %edx
; KNL-NEXT: vcvtsi2ssl %edx, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssl %edx, %xmm2, %xmm1
; KNL-NEXT: vmovq %xmm0, %rdx
; KNL-NEXT: testb $1, %dl
; KNL-NEXT: cmovnel %eax, %ecx
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: retq
;
Expand Down Expand Up @@ -1091,11 +1085,10 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm1
; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: retq
;
Expand Down
72 changes: 70 additions & 2 deletions test/CodeGen/X86/break-false-dep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ loop:
%i = phi i64 [ 1, %entry ], [ %inc, %loop ]
%s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
%fi = sitofp i64 %i to double
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
%vy = load double, double* %y
%fipy = fadd double %fi, %vy
%iipy = fptosi double %fipy to i64
Expand Down Expand Up @@ -174,6 +175,7 @@ for.body3:
store double %mul11, double* %arrayidx13, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
br i1 %exitcond, label %for.inc14, label %for.body3

for.inc14: ; preds = %for.body3
Expand All @@ -193,7 +195,7 @@ for.end16: ; preds = %for.inc14
;SSE-NEXT: movsd [[XMM0]],
;AVX-LABEL:@loopdep3
;AVX: vxorps [[XMM0:%xmm[0-9]+]], [[XMM0]]
;AVX-NEXT: vcvtsi2sdl {{.*}}, [[XMM0]], [[XMM0]]
;AVX-NEXT: vcvtsi2sdl {{.*}}, [[XMM0]], {{%xmm[0-9]+}}
;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
Expand All @@ -202,10 +204,76 @@ for.end16: ; preds = %for.inc14

define double @inlineasmdep(i64 %arg) {
top:
tail call void asm sideeffect "", "~{xmm0},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
%tmp1 = sitofp i64 %arg to double
ret double %tmp1
;AVX-LABEL:@inlineasmdep
;AVX: vxorps [[XMM0:%xmm[0-9]+]], [[XMM0]], [[XMM0]]
;AVX-NEXT: vcvtsi2sdq {{.*}}, [[XMM0]], {{%xmm[0-9]+}}
}

; Make sure we are making a smart choice regarding undef registers and
; hiding the false dependency behind a true dependency
define double @truedeps(float %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm4},~{xmm5},~{xmm7},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
%tmp1 = fpext float %arg to double
ret double %tmp1
;AVX-LABEL:@truedeps
;AVX-NOT: vxorps
;AVX: vcvtss2sd [[XMM0:%xmm[0-9]+]], [[XMM0]], {{%xmm[0-9]+}}
}

; Make sure we are making a smart choice regarding undef registers and
; choosing the register with the highest clearence
define double @clearence(i64 %arg) {
top:
tail call void asm sideeffect "", "~{xmm6},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm4},~{xmm5},~{xmm7},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
%tmp1 = sitofp i64 %arg to double
ret double %tmp1
;AVX-LABEL:@clearence
;AVX: vxorps [[XMM6:%xmm6]], [[XMM6]], [[XMM6]]
;AVX-NEXT: vcvtsi2sdq {{.*}}, [[XMM6]], {{%xmm[0-9]+}}
}

; Make sure we are making a smart choice regarding undef registers in order to
; avoid a cyclic dependence on a write to the same register in a previous
; iteration, especially when we cannot zero out the undef register because it
; is alive.
define i64 @loopclearence(i64* nocapture %x, double* nocapture %y) nounwind {
entry:
%vx = load i64, i64* %x
br label %loop
loop:
%i = phi i64 [ 1, %entry ], [ %inc, %loop ]
%s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
%fi = sitofp i64 %i to double
tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{dirflag},~{fpsr},~{flags}"()
tail call void asm sideeffect "", "~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"()
%vy = load double, double* %y
%fipy = fadd double %fi, %vy
%iipy = fptosi double %fipy to i64
%s2 = add i64 %s1, %iipy
%inc = add nsw i64 %i, 1
%exitcond = icmp eq i64 %inc, 156250000
br i1 %exitcond, label %ret, label %loop
ret:
ret i64 %s2
;AVX-LABEL:@loopclearence
;Registers 4-7 are not used and therefore one of them should be chosen
;AVX-NOT: {{%xmm[4-7]}}
;AVX: vcvtsi2sdq {{.*}}, [[XMM4_7:%xmm[4-7]]], {{%xmm[0-9]+}}
;AVX-NOT: [[XMM4_7]]
}
3 changes: 2 additions & 1 deletion test/CodeGen/X86/copy-propagation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ target triple = "x86_64-pc-win32-elf"
; Copy the result in a temporary.
; Note: Technically the regalloc could have been smarter and this move not required,
; which would have hidden the bug.
; CHECK-NEXT: vmovapd %xmm0, [[TMP:%xmm[0-9]+]]
; CHECK: vmovapd %xmm0, [[TMP:%xmm[0-9]+]]
; Crush xmm0.
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK: movl $339772768, %e[[INDIRECT_CALL2:[a-z]+]]
Expand All @@ -37,6 +37,7 @@ target triple = "x86_64-pc-win32-elf"
define double @foo(i64 %arg) {
top:
%tmp = call double inttoptr (i64 339752784 to double (double, double)*)(double 1.000000e+00, double 0.000000e+00)
tail call void asm sideeffect "", "x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(double %tmp)
%tmp1 = sitofp i64 %arg to double
call void inttoptr (i64 339772768 to void (double, double)*)(double %tmp, double %tmp1)
%tmp3 = fadd double %tmp1, %tmp
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/half.ll
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ define half @test_f80trunc_nodagcombine() #0 {
; CHECK-F16C-NEXT: movswl (%rsi), %eax
; CHECK-F16C-NEXT: vmovd %eax, %xmm0
; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm1
; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1
; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0
Expand Down
18 changes: 8 additions & 10 deletions test/CodeGen/X86/sse-fsignum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,15 @@ define void @signum64a(<2 x double>*) {
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; AVX1-NEXT: vmovq %xmm2, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vmovapd %xmm0, (%rdi)
Expand All @@ -60,16 +59,15 @@ define void @signum64a(<2 x double>*) {
; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm1
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
; AVX2-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0
; AVX2-NEXT: vmovapd %xmm0, (%rdi)
Expand Down
Loading

0 comments on commit ac9ca3b

Please sign in to comment.