Skip to content

Commit

Permalink
[X86][AVX2] Fix VPBROADCASTQ folding on 32-bit targets.
Browse files Browse the repository at this point in the history
As i64 isn't a value type on 32-bit targets, we need to fold the VZEXT_LOAD into VPBROADCASTQ.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295733 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
RKSimon committed Feb 21, 2017
1 parent eb973f7 commit 3f5e9f4
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
11 changes: 11 additions & 0 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -1030,7 +1030,18 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}

let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
(VPBROADCASTQZm addr:$src)>;
}

let Predicates = [HasVLX, HasBWI] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
(VPBROADCASTQZ128m addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
(VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
Expand Down
5 changes: 5 additions & 0 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -8265,6 +8265,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
v2i64, v4i64, NoVLX>;

let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
(VPBROADCASTQrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
(VPBROADCASTQYrm addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
Expand Down
6 changes: 2 additions & 4 deletions test/CodeGen/X86/vector-shuffle-combining-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -803,8 +803,7 @@ define <32 x i8> @combine_unpack_unpack_pshufb(<32 x i8> %a0) {
define <16 x i8> @combine_broadcast_pshufb_insertion_v2i64(i64 %a0) {
; X32-LABEL: combine_broadcast_pshufb_insertion_v2i64:
; X32: # BB#0:
; X32-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: vpbroadcastq %xmm0, %xmm0
; X32-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm0
; X32-NEXT: retl
;
; X64-LABEL: combine_broadcast_pshufb_insertion_v2i64:
Expand All @@ -821,8 +820,7 @@ define <16 x i8> @combine_broadcast_pshufb_insertion_v2i64(i64 %a0) {
define <8 x i32> @combine_broadcast_permd_insertion_v4i64(i64 %a0) {
; X32-LABEL: combine_broadcast_permd_insertion_v4i64:
; X32: # BB#0:
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-NEXT: vbroadcastsd %xmm0, %ymm0
; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: combine_broadcast_permd_insertion_v4i64:
Expand Down

0 comments on commit 3f5e9f4

Please sign in to comment.