Skip to content

Commit

Permalink
[X86] SET0 to use XMM registers where possible PR26018 PR32862
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D35839


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309298 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
dtemirbulatov committed Jul 27, 2017
1 parent 7b5f04f commit 15b834a
Show file tree
Hide file tree
Showing 85 changed files with 1,525 additions and 625 deletions.
19 changes: 14 additions & 5 deletions lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7705,9 +7705,14 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::FsFLD0SS:
case X86::FsFLD0SD:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
case X86::AVX_SET0:
case X86::AVX_SET0: {
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned SrcReg = MIB->getOperand(0).getReg();
unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
return Expand2AddrUndef(MIB, get(X86::VXORPSrr));
}
case X86::AVX512_128_SET0:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0SD: {
Expand All @@ -7726,9 +7731,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool HasVLX = Subtarget.hasVLX();
unsigned SrcReg = MIB->getOperand(0).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
return Expand2AddrUndef(MIB,
get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr));
if (HasVLX)
return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
if (TRI->getEncodingValue(SrcReg) < 16) {
unsigned XReg = TRI->getSubReg(SrcReg, X86::sub_xmm);
MIB->getOperand(0).setReg(XReg);
return Expand2AddrUndef(MIB, get(X86::VXORPSrr));
}
// Extended register without VLX. Use a larger XOR.
SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
MIB->getOperand(0).setReg(SrcReg);
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/2012-01-12-extract-sv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define void @endless_loop() {
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
; CHECK-NEXT: vmovaps %ymm0, (%eax)
Expand Down
3 changes: 2 additions & 1 deletion test/CodeGen/X86/2012-04-26-sdglue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define void @func() nounwind ssp {
; CHECK-LABEL: func:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovups 0, %xmm0
; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
; CHECK-NEXT: vbroadcastss 32, %xmm3
Expand All @@ -26,6 +26,7 @@ define void @func() nounwind ssp {
; CHECK-NEXT: vmovaps %ymm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
; CHECK-NEXT: ## -- End function
%tmp = load <4 x float>, <4 x float>* null, align 1
%tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2
%tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1
Expand Down
2 changes: 1 addition & 1 deletion test/CodeGen/X86/2012-1-10-buildvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
define void @bad_cast() {
; CHECK-LABEL: bad_cast:
; CHECK: # BB#0:
; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %xmm0, (%eax)
; CHECK-NEXT: movl $0, (%eax)
; CHECK-NEXT: vzeroupper
Expand Down
56 changes: 28 additions & 28 deletions test/CodeGen/X86/all-ones-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ define <32 x i8> @allones_v32i8() nounwind {
;
; X32-AVX1-LABEL: allones_v32i8:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -174,7 +174,7 @@ define <32 x i8> @allones_v32i8() nounwind {
;
; X64-AVX1-LABEL: allones_v32i8:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -194,7 +194,7 @@ define <16 x i16> @allones_v16i16() nounwind {
;
; X32-AVX1-LABEL: allones_v16i16:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -211,7 +211,7 @@ define <16 x i16> @allones_v16i16() nounwind {
;
; X64-AVX1-LABEL: allones_v16i16:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -231,7 +231,7 @@ define <8 x i32> @allones_v8i32() nounwind {
;
; X32-AVX1-LABEL: allones_v8i32:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -248,7 +248,7 @@ define <8 x i32> @allones_v8i32() nounwind {
;
; X64-AVX1-LABEL: allones_v8i32:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -268,7 +268,7 @@ define <4 x i64> @allones_v4i64() nounwind {
;
; X32-AVX1-LABEL: allones_v4i64:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -285,7 +285,7 @@ define <4 x i64> @allones_v4i64() nounwind {
;
; X64-AVX1-LABEL: allones_v4i64:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -305,7 +305,7 @@ define <4 x double> @allones_v4f64() nounwind {
;
; X32-AVX1-LABEL: allones_v4f64:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -322,7 +322,7 @@ define <4 x double> @allones_v4f64() nounwind {
;
; X64-AVX1-LABEL: allones_v4f64:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -342,7 +342,7 @@ define <4 x double> @allones_v4f64_optsize() nounwind optsize {
;
; X32-AVX1-LABEL: allones_v4f64_optsize:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -359,7 +359,7 @@ define <4 x double> @allones_v4f64_optsize() nounwind optsize {
;
; X64-AVX1-LABEL: allones_v4f64_optsize:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -379,7 +379,7 @@ define <8 x float> @allones_v8f32() nounwind {
;
; X32-AVX1-LABEL: allones_v8f32:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -396,7 +396,7 @@ define <8 x float> @allones_v8f32() nounwind {
;
; X64-AVX1-LABEL: allones_v8f32:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -416,7 +416,7 @@ define <8 x float> @allones_v8f32_optsize() nounwind optsize {
;
; X32-AVX1-LABEL: allones_v8f32_optsize:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
Expand All @@ -433,7 +433,7 @@ define <8 x float> @allones_v8f32_optsize() nounwind optsize {
;
; X64-AVX1-LABEL: allones_v8f32_optsize:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: retq
;
Expand All @@ -455,7 +455,7 @@ define <64 x i8> @allones_v64i8() nounwind {
;
; X32-AVX1-LABEL: allones_v64i8:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand Down Expand Up @@ -487,7 +487,7 @@ define <64 x i8> @allones_v64i8() nounwind {
;
; X64-AVX1-LABEL: allones_v64i8:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down Expand Up @@ -522,7 +522,7 @@ define <32 x i16> @allones_v32i16() nounwind {
;
; X32-AVX1-LABEL: allones_v32i16:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand Down Expand Up @@ -554,7 +554,7 @@ define <32 x i16> @allones_v32i16() nounwind {
;
; X64-AVX1-LABEL: allones_v32i16:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down Expand Up @@ -589,7 +589,7 @@ define <16 x i32> @allones_v16i32() nounwind {
;
; X32-AVX1-LABEL: allones_v16i32:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand All @@ -615,7 +615,7 @@ define <16 x i32> @allones_v16i32() nounwind {
;
; X64-AVX1-LABEL: allones_v16i32:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down Expand Up @@ -644,7 +644,7 @@ define <8 x i64> @allones_v8i64() nounwind {
;
; X32-AVX1-LABEL: allones_v8i64:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand All @@ -670,7 +670,7 @@ define <8 x i64> @allones_v8i64() nounwind {
;
; X64-AVX1-LABEL: allones_v8i64:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down Expand Up @@ -699,7 +699,7 @@ define <8 x double> @allones_v8f64() nounwind {
;
; X32-AVX1-LABEL: allones_v8f64:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand All @@ -725,7 +725,7 @@ define <8 x double> @allones_v8f64() nounwind {
;
; X64-AVX1-LABEL: allones_v8f64:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down Expand Up @@ -754,7 +754,7 @@ define <16 x float> @allones_v16f32() nounwind {
;
; X32-AVX1-LABEL: allones_v16f32:
; X32-AVX1: # BB#0:
; X32-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X32-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX1-NEXT: retl
Expand All @@ -780,7 +780,7 @@ define <16 x float> @allones_v16f32() nounwind {
;
; X64-AVX1-LABEL: allones_v16f32:
; X64-AVX1: # BB#0:
; X64-AVX1-NEXT: vxorps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; X64-AVX1-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX1-NEXT: retq
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/X86/avx-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define void @zero256() nounwind ssp {
; CHECK-LABEL: zero256:
; CHECK: ## BB#0:
; CHECK-NEXT: movq _x@{{.*}}(%rip), %rax
; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vmovaps %ymm0, (%rax)
; CHECK-NEXT: movq _y@{{.*}}(%rip), %rax
; CHECK-NEXT: vmovaps %ymm0, (%rax)
Expand All @@ -36,7 +36,7 @@ define void @zero256() nounwind ssp {
define void @ones([0 x float]* nocapture %RET, [0 x float]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones:
; CHECK: ## BB#0: ## %allocas
; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
Expand All @@ -54,7 +54,7 @@ float>* %ptr2vec615, align 32
define void @ones2([0 x i32]* nocapture %RET, [0 x i32]* nocapture %aFOO) nounwind {
; CHECK-LABEL: ones2:
; CHECK: ## BB#0: ## %allocas
; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; CHECK-NEXT: vmovaps %ymm0, (%rdi)
; CHECK-NEXT: vzeroupper
Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/X86/avx-cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castA:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX-NEXT: retq
%shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
Expand All @@ -21,7 +21,7 @@ define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castB:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX-NEXT: retq
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
Expand All @@ -34,14 +34,14 @@ define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
; AVX1-LABEL: castC:
; AVX1: ## BB#0:
; AVX1-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: castC:
; AVX2: ## BB#0:
; AVX2-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
Expand Down
Loading

0 comments on commit 15b834a

Please sign in to comment.