Skip to content

Commit

Permalink
X86 Tests: Update more isel tests with FastVariableShuffle feature
Browse files Browse the repository at this point in the history
Summary:
Added the FastVariableShuffle feature to cases that resembled processors
for which this fearure is on.
For AVX2 there are processors with and w/o this fearue enable.
For AVX512 only KNL does enable this feature so cases which only have
+avx512f were left without the FastVariableShuffle enabled.

Reviewers: RKSimon, craig.topper

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D41851

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322090 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Zvi Rackover committed Jan 9, 2018
1 parent 8d3a0bc commit a0c74d2
Show file tree
Hide file tree
Showing 29 changed files with 2,675 additions and 1,598 deletions.
50 changes: 34 additions & 16 deletions test/CodeGen/X86/avx2-conversions.ll
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X32,X32-SLOW
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X32,X32-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X64,X64-FAST

define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
; X32-LABEL: trunc4:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
; X32-SLOW-LABEL: trunc4:
; X32-SLOW: # %bb.0:
; X32-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X32-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X32-SLOW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X32-SLOW-NEXT: vzeroupper
; X32-SLOW-NEXT: retl
;
; X64-LABEL: trunc4:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X64-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X32-FAST-LABEL: trunc4:
; X32-FAST: # %bb.0:
; X32-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
; X32-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X32-FAST-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X32-FAST-NEXT: vzeroupper
; X32-FAST-NEXT: retl
;
; X64-SLOW-LABEL: trunc4:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; X64-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
; X64-SLOW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X64-SLOW-NEXT: vzeroupper
; X64-SLOW-NEXT: retq
;
; X64-FAST-LABEL: trunc4:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [0,2,4,6,4,6,6,7]
; X64-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
; X64-FAST-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
; X64-FAST-NEXT: vzeroupper
; X64-FAST-NEXT: retq
%B = trunc <4 x i64> %A to <4 x i32>
ret <4 x i32>%B
}
Expand Down
64 changes: 43 additions & 21 deletions test/CodeGen/X86/avx2-vector-shifts.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X32,X32-SLOW
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X32,X32-FAST
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-SLOW
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=X64,X64-FAST

; AVX2 Logical Shift Left

Expand Down Expand Up @@ -372,25 +374,45 @@ entry:
}

define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
; X32-LABEL: srl_trunc_and_v4i64:
; X32: # %bb.0:
; X32-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; X32-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X32-NEXT: vpand %xmm2, %xmm1, %xmm1
; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: srl_trunc_and_v4i64:
; X64: # %bb.0:
; X64-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X64-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
; X32-SLOW-LABEL: srl_trunc_and_v4i64:
; X32-SLOW: # %bb.0:
; X32-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X32-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; X32-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X32-SLOW-NEXT: vpand %xmm2, %xmm1, %xmm1
; X32-SLOW-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-SLOW-NEXT: vzeroupper
; X32-SLOW-NEXT: retl
;
; X32-FAST-LABEL: srl_trunc_and_v4i64:
; X32-FAST: # %bb.0:
; X32-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; X32-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; X32-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X32-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
; X32-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X32-FAST-NEXT: vzeroupper
; X32-FAST-NEXT: retl
;
; X64-SLOW-LABEL: srl_trunc_and_v4i64:
; X64-SLOW: # %bb.0:
; X64-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; X64-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; X64-SLOW-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X64-SLOW-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-SLOW-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-SLOW-NEXT: vzeroupper
; X64-SLOW-NEXT: retq
;
; X64-FAST-LABEL: srl_trunc_and_v4i64:
; X64-FAST: # %bb.0:
; X64-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7]
; X64-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
; X64-FAST-NEXT: vpbroadcastd {{.*#+}} xmm2 = [8,8,8,8]
; X64-FAST-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-FAST-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
; X64-FAST-NEXT: vzeroupper
; X64-FAST-NEXT: retq
%and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
%trunc = trunc <4 x i64> %and to <4 x i32>
%sra = lshr <4 x i32> %x, %trunc
Expand Down
36 changes: 18 additions & 18 deletions test/CodeGen/X86/avx512-extract-subvector-load-store.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -O2 | FileCheck %s --check-prefix=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl -O2 | FileCheck %s --check-prefix=AVX512NOTDQ
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+fast-variable-shuffle -O2 | FileCheck %s --check-prefix=AVX512NOTDQ

define void @load_v8i1_broadcast_4_v2i1(<8 x i1>* %a0,<2 x double> %a1,<2 x double> %a2,<2 x double>* %a3) {
; AVX512-LABEL: load_v8i1_broadcast_4_v2i1:
Expand Down Expand Up @@ -331,8 +331,8 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm2
; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
Expand All @@ -345,8 +345,8 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
Expand Down Expand Up @@ -541,8 +541,8 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm2
; AVX512-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
; AVX512-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vpmovd2m %ymm2, %k1
; AVX512-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512-NEXT: vmovaps %ymm1, (%rsi)
Expand All @@ -555,8 +555,8 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm2, %ymm2 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[3,3,2,3,7,7,6,7]
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,2,2,2]
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
; AVX512NOTDQ-NEXT: vpermd %ymm2, %ymm3, %ymm2
; AVX512NOTDQ-NEXT: vptestmd %ymm2, %ymm2, %k1
; AVX512NOTDQ-NEXT: vmovaps %ymm0, %ymm1 {%k1}
; AVX512NOTDQ-NEXT: vmovaps %ymm1, (%rsi)
Expand Down Expand Up @@ -1134,8 +1134,8 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
; AVX512-NEXT: kmovd (%rdi), %k0
; AVX512-NEXT: kshiftrd $24, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
Expand All @@ -1147,8 +1147,8 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
; AVX512NOTDQ-NEXT: kshiftrd $24, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
Expand Down Expand Up @@ -1369,8 +1369,8 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
; AVX512-NEXT: kmovq (%rdi), %k0
; AVX512-NEXT: kshiftrq $56, %k0, %k0
; AVX512-NEXT: vpmovm2d %k0, %ymm0
; AVX512-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
; AVX512-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovd2m %ymm0, %k0
; AVX512-NEXT: kmovb %k0, (%rsi)
; AVX512-NEXT: vzeroupper
Expand All @@ -1382,8 +1382,8 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
; AVX512NOTDQ-NEXT: kshiftrq $56, %k0, %k1
; AVX512NOTDQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512NOTDQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512NOTDQ-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,3,2,3,7,7,6,7]
; AVX512NOTDQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
; AVX512NOTDQ-NEXT: vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
; AVX512NOTDQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX512NOTDQ-NEXT: vptestmd %ymm0, %ymm0, %k0
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
Expand Down
38 changes: 19 additions & 19 deletions test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-shuffle %s -o - | FileCheck %s

; FIXME: fixing PR34394 should fix the i32x2 memory cases resulting in a simple vbroadcasti32x2 instruction.

Expand Down Expand Up @@ -459,8 +459,8 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i3
define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) {
; CHECK-LABEL: test_2xi32_to_8xi32_mem:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: retq
%vec = load <2 x i32>, <2 x i32>* %vp
Expand All @@ -470,8 +470,8 @@ define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) {
define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm1, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
Expand All @@ -486,8 +486,8 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i32>
define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask0:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
Expand All @@ -501,8 +501,8 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i3
define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm1, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
Expand All @@ -517,8 +517,8 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i32>
define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask1:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
Expand All @@ -532,8 +532,8 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i3
define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm1, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
Expand All @@ -548,8 +548,8 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i32>
define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask2:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
Expand All @@ -563,8 +563,8 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i3
define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i32> %default, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm2 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm1, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1]
Expand All @@ -579,8 +579,8 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i32>
define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i32> %mask) {
; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask3:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,7,4,5,6,7],zero,zero,zero,zero
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1]
Expand Down
Loading

0 comments on commit a0c74d2

Please sign in to comment.