Skip to content

Commit

Permalink
[X86][TD][vpmovm2 ] New TD pattern for the vpmovm2 instruction
Browse files Browse the repository at this point in the history
Up until now, vpmovm2 instruction described its destination operand size
by the source operand size. This patch adds new pattern for the vpmovm2
instruction. The node describes new expansion of the destination (from
{128|256} to 512).

Differential Revision: https://reviews.llvm.org/D30654


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298586 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
michaelz-eng committed Mar 23, 2017
1 parent 31f7be5 commit 414ca07
Show file tree
Hide file tree
Showing 6 changed files with 1,744 additions and 76 deletions.
16 changes: 5 additions & 11 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17747,17 +17747,10 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,

// SKX processor
if ((InVTElt == MVT::i1) &&
(((Subtarget.hasBWI() && Subtarget.hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
(((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) ||

((Subtarget.hasBWI() && VT.is512BitVector() &&
VTElt.getSizeInBits() <= 16)) ||
((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32))))

((Subtarget.hasDQI() && Subtarget.hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||

((Subtarget.hasDQI() && VT.is512BitVector() &&
VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);

unsigned NumElts = VT.getVectorNumElements();
Expand Down Expand Up @@ -18002,7 +17995,8 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
MVT VT = Op.getValueType().getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();

if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) ||
if ((Subtarget.hasBWI() && NumElts >= 32) ||
(Subtarget.hasDQI() && NumElts < 16) ||
NumElts == 16) {
// Load and extend - everything is legal
if (NumElts < 8) {
Expand Down Expand Up @@ -18031,7 +18025,7 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,

if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
unsigned NumBitsToLoad = NumElts < 8 ? 8 : NumElts;
unsigned NumBitsToLoad = 8;
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
Ld->getBasePtr(),
Expand Down
30 changes: 19 additions & 11 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -7953,6 +7953,17 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
}

// Use 512bit version to implement 128/256 bit in case NoVLX.
multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
X86VectorVTInfo _> {

def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
(X86Info.VT (EXTRACT_SUBREG
(_.VT (!cast<Instruction>(NAME#"Zrr")
(_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
X86Info.SubRegIdx))>;
}

multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
Expand All @@ -7962,20 +7973,17 @@ let Predicates = [prd] in
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
}
let Predicates = [prd, NoVLX] in {
defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
}

multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
HasBWI>;
defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
HasBWI>, VEX_W;
defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
HasDQI>;
defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
HasDQI>, VEX_W;
}

defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;

multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
Expand Down
12 changes: 4 additions & 8 deletions test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1107,11 +1107,9 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
; AVX512DQ-NEXT: vpmovm2q %k1, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm1
; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <16 x double> %a, zeroinitializer
Expand Down Expand Up @@ -1150,8 +1148,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <8 x double> %a, zeroinitializer
Expand Down Expand Up @@ -1192,8 +1189,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQ-NEXT: retq
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
Expand Down
Loading

0 comments on commit 414ca07

Please sign in to comment.