Skip to content

Commit

Permalink
AVX-512: All forms of VCOPMRESS VEXPAND instructions,
Browse files Browse the repository at this point in the history
encoding tests.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240272 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Elena Demikhovsky committed Jun 22, 2015
1 parent 948713a commit 4f1ddd3
Show file tree
Hide file tree
Showing 5 changed files with 535 additions and 79 deletions.
40 changes: 9 additions & 31 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15215,18 +15215,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue PassThru = Op.getOperand(2);
if (isAllOnes(Mask)) // return data as is
return Op.getOperand(1);
EVT VT = Op.getValueType();
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDLoc dl(Op);
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));

return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress,
PassThru);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
DataToCompress),
Mask, PassThru, Subtarget, DAG);
}
case BLEND: {
SDValue Mask = Op.getOperand(3);
Expand Down Expand Up @@ -15769,46 +15761,32 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);

EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));

SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask,
DataToCompress, DAG.getUNDEF(VT));
SDValue Compressed =
getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
Mask, DAG.getUNDEF(VT), Subtarget, DAG);
return DAG.getStore(Chain, dl, Compressed, Addr,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
}
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PathThru = Op.getOperand(3);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
EVT VT = Op.getValueType();

if (isAllOnes(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits()/8);
EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
VT.getVectorNumElements());
EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
Mask.getValueType().getSizeInBits());
SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
DAG.getBitcast(BitcastVT, Mask),
DAG.getIntPtrConstant(0, dl));

SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(),
false, false, false,
VT.getScalarSizeInBits()/8);

SDValue Results[] = {
DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru),
Chain};
getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
Mask, PassThru, Subtarget, DAG), Chain};
return DAG.getMergeValues(Results, dl);
}
}
Expand Down
61 changes: 19 additions & 42 deletions lib/Target/X86/X86InstrAVX512.td
Original file line number Diff line number Diff line change
Expand Up @@ -5868,26 +5868,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
//===----------------------------------------------------------------------===//
// AVX-512 - COMPRESS and EXPAND
//

multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
_.ImmAllZerosV)))]>, EVEX_KZ;

let Constraints = "$src0 = $dst" in
def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src,
_.RC:$src0)))]>, EVEX_K;
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;

let mayStore = 1 in {
def mr : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.RC:$src),
OpcodeStr # "\t{$src, $dst |$dst, $src}",
[]>, EVEX_CD8<_.EltSize, CD8VT1>;

def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)),
[(store (_.VT (vselect _.KRCWM:$mask,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
Expand Down Expand Up @@ -5915,37 +5913,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask, (_.VT _.RC:$src),
_.ImmAllZerosV)))]>, EVEX_KZ;

let Constraints = "$src0 = $dst" in
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT _.RC:$src), _.RC:$src0)))]>, EVEX_K;

let mayLoad = 1, Constraints = "$src0 = $dst" in
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT (bitconvert
(_.LdFrag addr:$src))),
_.RC:$src0)))]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;

let mayLoad = 1 in
def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src),
OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
[(set _.RC:$dst, (_.VT (X86expand _.KRCWM:$mask,
(_.VT (bitconvert (_.LdFrag addr:$src))),
_.ImmAllZerosV)))]>,
EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand (_.VT (bitconvert
(_.LdFrag addr:$src1)))))>,
AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
}

multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
Expand Down
10 changes: 4 additions & 6 deletions lib/Target/X86/X86InstrFragmentsSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;

def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3,
[SDTCisSameAs<0, 3>,
SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>;
def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;

def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
Expand Down
167 changes: 167 additions & 0 deletions test/MC/X86/avx512-encodings.s
Original file line number Diff line number Diff line change
Expand Up @@ -9514,3 +9514,170 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff]
vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20

// CHECK: vcompresspd %zmm9, (%rcx)
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x09]
vcompresspd %zmm9, (%rcx)

// CHECK: vcompresspd %zmm9, (%rcx) {%k4}
// CHECK: encoding: [0x62,0x72,0xfd,0x4c,0x8a,0x09]
vcompresspd %zmm9, (%rcx) {%k4}

// CHECK: vcompresspd %zmm9, 291(%rax,%r14,8)
// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x8a,0x8c,0xf0,0x23,0x01,0x00,0x00]
vcompresspd %zmm9, 291(%rax,%r14,8)

// CHECK: vcompresspd %zmm9, 1016(%rdx)
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x7f]
vcompresspd %zmm9, 1016(%rdx)

// CHECK: vcompresspd %zmm9, 1024(%rdx)
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0x00,0x04,0x00,0x00]
vcompresspd %zmm9, 1024(%rdx)

// CHECK: vcompresspd %zmm9, -1024(%rdx)
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x80]
vcompresspd %zmm9, -1024(%rdx)

// CHECK: vcompresspd %zmm9, -1032(%rdx)
// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0xf8,0xfb,0xff,0xff]
vcompresspd %zmm9, -1032(%rdx)

// CHECK: vcompresspd %zmm4, %zmm8
// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x8a,0xe0]
vcompresspd %zmm4, %zmm8

// CHECK: vcompresspd %zmm4, %zmm8 {%k6}
// CHECK: encoding: [0x62,0xd2,0xfd,0x4e,0x8a,0xe0]
vcompresspd %zmm4, %zmm8 {%k6}

// CHECK: vcompresspd %zmm4, %zmm8 {%k6} {z}
// CHECK: encoding: [0x62,0xd2,0xfd,0xce,0x8a,0xe0]
vcompresspd %zmm4, %zmm8 {%k6} {z}

// CHECK: vcompressps %zmm10, (%rcx)
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x11]
vcompressps %zmm10, (%rcx)

// CHECK: vcompressps %zmm10, (%rcx) {%k7}
// CHECK: encoding: [0x62,0x72,0x7d,0x4f,0x8a,0x11]
vcompressps %zmm10, (%rcx) {%k7}

// CHECK: vcompressps %zmm10, 291(%rax,%r14,8)
// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x8a,0x94,0xf0,0x23,0x01,0x00,0x00]
vcompressps %zmm10, 291(%rax,%r14,8)

// CHECK: vcompressps %zmm10, 508(%rdx)
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x7f]
vcompressps %zmm10, 508(%rdx)

// CHECK: vcompressps %zmm10, 512(%rdx)
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0x00,0x02,0x00,0x00]
vcompressps %zmm10, 512(%rdx)

// CHECK: vcompressps %zmm10, -512(%rdx)
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x80]
vcompressps %zmm10, -512(%rdx)

// CHECK: vcompressps %zmm10, -516(%rdx)
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0xfc,0xfd,0xff,0xff]
vcompressps %zmm10, -516(%rdx)

// CHECK: vcompressps %zmm14, %zmm4
// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0xf4]
vcompressps %zmm14, %zmm4

// CHECK: vcompressps %zmm14, %zmm4 {%k2}
// CHECK: encoding: [0x62,0x72,0x7d,0x4a,0x8a,0xf4]
vcompressps %zmm14, %zmm4 {%k2}

// CHECK: vcompressps %zmm14, %zmm4 {%k2} {z}
// CHECK: encoding: [0x62,0x72,0x7d,0xca,0x8a,0xf4]
vcompressps %zmm14, %zmm4 {%k2} {z}

// CHECK: vexpandpd (%rcx), %zmm24
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x01]
vexpandpd (%rcx), %zmm24

// CHECK: vexpandpd (%rcx), %zmm24 {%k4}
// CHECK: encoding: [0x62,0x62,0xfd,0x4c,0x88,0x01]
vexpandpd (%rcx), %zmm24 {%k4}

// CHECK: vexpandpd (%rcx), %zmm24 {%k4} {z}
// CHECK: encoding: [0x62,0x62,0xfd,0xcc,0x88,0x01]
vexpandpd (%rcx), %zmm24 {%k4} {z}

// CHECK: vexpandpd 291(%rax,%r14,8), %zmm24
// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x88,0x84,0xf0,0x23,0x01,0x00,0x00]
vexpandpd 291(%rax,%r14,8), %zmm24

// CHECK: vexpandpd 1016(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x7f]
vexpandpd 1016(%rdx), %zmm24

// CHECK: vexpandpd 1024(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0x00,0x04,0x00,0x00]
vexpandpd 1024(%rdx), %zmm24

// CHECK: vexpandpd -1024(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x80]
vexpandpd -1024(%rdx), %zmm24

// CHECK: vexpandpd -1032(%rdx), %zmm24
// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0xf8,0xfb,0xff,0xff]
vexpandpd -1032(%rdx), %zmm24

// CHECK: vexpandpd %zmm15, %zmm23
// CHECK: encoding: [0x62,0xc2,0xfd,0x48,0x88,0xff]
vexpandpd %zmm15, %zmm23

// CHECK: vexpandpd %zmm15, %zmm23 {%k5}
// CHECK: encoding: [0x62,0xc2,0xfd,0x4d,0x88,0xff]
vexpandpd %zmm15, %zmm23 {%k5}

// CHECK: vexpandpd %zmm15, %zmm23 {%k5} {z}
// CHECK: encoding: [0x62,0xc2,0xfd,0xcd,0x88,0xff]
vexpandpd %zmm15, %zmm23 {%k5} {z}

// CHECK: vexpandps (%rcx), %zmm4
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x21]
vexpandps (%rcx), %zmm4

// CHECK: vexpandps (%rcx), %zmm4 {%k6}
// CHECK: encoding: [0x62,0xf2,0x7d,0x4e,0x88,0x21]
vexpandps (%rcx), %zmm4 {%k6}

// CHECK: vexpandps (%rcx), %zmm4 {%k6} {z}
// CHECK: encoding: [0x62,0xf2,0x7d,0xce,0x88,0x21]
vexpandps (%rcx), %zmm4 {%k6} {z}

// CHECK: vexpandps 291(%rax,%r14,8), %zmm4
// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x88,0xa4,0xf0,0x23,0x01,0x00,0x00]
vexpandps 291(%rax,%r14,8), %zmm4

// CHECK: vexpandps 508(%rdx), %zmm4
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x7f]
vexpandps 508(%rdx), %zmm4

// CHECK: vexpandps 512(%rdx), %zmm4
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0x00,0x02,0x00,0x00]
vexpandps 512(%rdx), %zmm4

// CHECK: vexpandps -512(%rdx), %zmm4
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x80]
vexpandps -512(%rdx), %zmm4

// CHECK: vexpandps -516(%rdx), %zmm4
// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0xfc,0xfd,0xff,0xff]
vexpandps -516(%rdx), %zmm4

// CHECK: vexpandps %zmm9, %zmm14
// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x88,0xf1]
vexpandps %zmm9, %zmm14

// CHECK: vexpandps %zmm9, %zmm14 {%k2}
// CHECK: encoding: [0x62,0x52,0x7d,0x4a,0x88,0xf1]
vexpandps %zmm9, %zmm14 {%k2}

// CHECK: vexpandps %zmm9, %zmm14 {%k2} {z}
// CHECK: encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1]
vexpandps %zmm9, %zmm14 {%k2} {z}
Loading

0 comments on commit 4f1ddd3

Please sign in to comment.