From 4f1ddd396b6da217f8a85547990d1d887435bacd Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Mon, 22 Jun 2015 11:16:30 +0000 Subject: [PATCH] AVX-512: All forms of VCOPMRESS VEXPAND instructions, encoding tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240272 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 40 +-- lib/Target/X86/X86InstrAVX512.td | 61 ++--- lib/Target/X86/X86InstrFragmentsSIMD.td | 10 +- test/MC/X86/avx512-encodings.s | 167 ++++++++++++ test/MC/X86/avx512vl-encoding.s | 336 ++++++++++++++++++++++++ 5 files changed, 535 insertions(+), 79 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 67e733384ab6..00c04c1ba4c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15215,18 +15215,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue PassThru = Op.getOperand(2); if (isAllOnes(Mask)) // return data as is return Op.getOperand(1); - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDLoc dl(Op); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, - PassThru); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + DataToCompress), + Mask, PassThru, Subtarget, DAG); } case BLEND: { SDValue Mask = Op.getOperand(3); @@ -15769,16 +15761,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - - SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, - DataToCompress, DAG.getUNDEF(VT)); + SDValue Compressed = + getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), + Mask, DAG.getUNDEF(VT), Subtarget, DAG); return DAG.getStore(Chain, dl, Compressed, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); @@ -15786,7 +15771,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case EXPAND_FROM_MEM: { SDLoc dl(Op); SDValue Mask = Op.getOperand(4); - SDValue PathThru = Op.getOperand(3); + SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); EVT VT = Op.getValueType(); @@ -15794,21 +15779,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (isAllOnes(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); SDValue Results[] = { - DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToExpand, PathThru), - Chain}; + getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand), + Mask, PassThru, Subtarget, DAG), Chain}; return DAG.getMergeValues(Results, dl); } } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index de6a83506b28..c60b11cbe316 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -5868,26 +5868,24 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m", //===----------------------------------------------------------------------===// // AVX-512 - COMPRESS and EXPAND // + multiclass compress_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; + defm rr : AVX512_maskable, AVX5128IBase; let mayStore = 1 in { + def mr : AVX5128I, EVEX_CD8<_.EltSize, CD8VT1>; + def mrk : AVX5128I, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -5915,37 +5913,16 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info // expand multiclass expand_by_vec_width opc, X86VectorVTInfo _, string OpcodeStr> { - def rrkz : AVX5128I, EVEX_KZ; - - let Constraints = "$src0 = $dst" in - def rrk : AVX5128I, EVEX_K; - - let mayLoad = 1, Constraints = "$src0 = $dst" in - def rmk : AVX5128I, - EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + defm rr : AVX512_maskable, AVX5128IBase; let mayLoad = 1 in - def rmkz : AVX5128I, - EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; + defm rm : AVX512_maskable, + AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass expand_by_elt_width opc, string OpcodeStr, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 16ae77dd81a3..de3b3b6516a4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -347,12 +347,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; -def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, - [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, - SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; -def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, - [SDTCisSameAs<0, 3>, - SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; +def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; +def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index e52dfac1976c..05a7b1b82b6d 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -9514,3 +9514,170 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff] vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20 +// CHECK: vcompresspd %zmm9, (%rcx) +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x09] + vcompresspd %zmm9, (%rcx) + +// CHECK: vcompresspd %zmm9, (%rcx) {%k4} +// CHECK: encoding: [0x62,0x72,0xfd,0x4c,0x8a,0x09] + vcompresspd %zmm9, (%rcx) {%k4} + +// CHECK: vcompresspd %zmm9, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x8a,0x8c,0xf0,0x23,0x01,0x00,0x00] + vcompresspd %zmm9, 291(%rax,%r14,8) + +// CHECK: vcompresspd %zmm9, 1016(%rdx) +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x7f] + vcompresspd %zmm9, 1016(%rdx) + +// CHECK: vcompresspd %zmm9, 1024(%rdx) +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0x00,0x04,0x00,0x00] + vcompresspd %zmm9, 1024(%rdx) + +// CHECK: vcompresspd %zmm9, -1024(%rdx) +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x4a,0x80] + vcompresspd %zmm9, -1024(%rdx) + +// CHECK: vcompresspd %zmm9, -1032(%rdx) +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x8a,0x8a,0xf8,0xfb,0xff,0xff] + vcompresspd %zmm9, -1032(%rdx) + +// CHECK: vcompresspd %zmm4, %zmm8 +// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x8a,0xe0] + vcompresspd %zmm4, %zmm8 + +// CHECK: vcompresspd %zmm4, %zmm8 {%k6} +// CHECK: encoding: [0x62,0xd2,0xfd,0x4e,0x8a,0xe0] + vcompresspd %zmm4, %zmm8 {%k6} + +// CHECK: vcompresspd %zmm4, %zmm8 {%k6} {z} +// CHECK: encoding: [0x62,0xd2,0xfd,0xce,0x8a,0xe0] + vcompresspd %zmm4, %zmm8 {%k6} {z} + +// CHECK: vcompressps %zmm10, (%rcx) +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x11] + vcompressps %zmm10, (%rcx) + +// CHECK: vcompressps %zmm10, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x72,0x7d,0x4f,0x8a,0x11] + vcompressps %zmm10, (%rcx) {%k7} + +// CHECK: vcompressps %zmm10, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x8a,0x94,0xf0,0x23,0x01,0x00,0x00] + vcompressps %zmm10, 291(%rax,%r14,8) + +// CHECK: vcompressps %zmm10, 508(%rdx) +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x7f] + vcompressps %zmm10, 508(%rdx) + +// CHECK: vcompressps %zmm10, 512(%rdx) +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0x00,0x02,0x00,0x00] + vcompressps %zmm10, 512(%rdx) + +// CHECK: vcompressps %zmm10, -512(%rdx) +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x52,0x80] + vcompressps %zmm10, -512(%rdx) + +// CHECK: vcompressps %zmm10, -516(%rdx) +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0x92,0xfc,0xfd,0xff,0xff] + vcompressps %zmm10, -516(%rdx) + +// CHECK: vcompressps %zmm14, %zmm4 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x8a,0xf4] + vcompressps %zmm14, %zmm4 + +// CHECK: vcompressps %zmm14, %zmm4 {%k2} +// CHECK: encoding: [0x62,0x72,0x7d,0x4a,0x8a,0xf4] + vcompressps %zmm14, %zmm4 {%k2} + +// CHECK: vcompressps %zmm14, %zmm4 {%k2} {z} +// CHECK: encoding: [0x62,0x72,0x7d,0xca,0x8a,0xf4] + vcompressps %zmm14, %zmm4 {%k2} {z} + +// CHECK: vexpandpd (%rcx), %zmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x01] + vexpandpd (%rcx), %zmm24 + +// CHECK: vexpandpd (%rcx), %zmm24 {%k4} +// CHECK: encoding: [0x62,0x62,0xfd,0x4c,0x88,0x01] + vexpandpd (%rcx), %zmm24 {%k4} + +// CHECK: vexpandpd (%rcx), %zmm24 {%k4} {z} +// CHECK: encoding: [0x62,0x62,0xfd,0xcc,0x88,0x01] + vexpandpd (%rcx), %zmm24 {%k4} {z} + +// CHECK: vexpandpd 291(%rax,%r14,8), %zmm24 +// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x88,0x84,0xf0,0x23,0x01,0x00,0x00] + vexpandpd 291(%rax,%r14,8), %zmm24 + +// CHECK: vexpandpd 1016(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x7f] + vexpandpd 1016(%rdx), %zmm24 + +// CHECK: vexpandpd 1024(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0x00,0x04,0x00,0x00] + vexpandpd 1024(%rdx), %zmm24 + +// CHECK: vexpandpd -1024(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x42,0x80] + vexpandpd -1024(%rdx), %zmm24 + +// CHECK: vexpandpd -1032(%rdx), %zmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x88,0x82,0xf8,0xfb,0xff,0xff] + vexpandpd -1032(%rdx), %zmm24 + +// CHECK: vexpandpd %zmm15, %zmm23 +// CHECK: encoding: [0x62,0xc2,0xfd,0x48,0x88,0xff] + vexpandpd %zmm15, %zmm23 + +// CHECK: vexpandpd %zmm15, %zmm23 {%k5} +// CHECK: encoding: [0x62,0xc2,0xfd,0x4d,0x88,0xff] + vexpandpd %zmm15, %zmm23 {%k5} + +// CHECK: vexpandpd %zmm15, %zmm23 {%k5} {z} +// CHECK: encoding: [0x62,0xc2,0xfd,0xcd,0x88,0xff] + vexpandpd %zmm15, %zmm23 {%k5} {z} + +// CHECK: vexpandps (%rcx), %zmm4 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x21] + vexpandps (%rcx), %zmm4 + +// CHECK: vexpandps (%rcx), %zmm4 {%k6} +// CHECK: encoding: [0x62,0xf2,0x7d,0x4e,0x88,0x21] + vexpandps (%rcx), %zmm4 {%k6} + +// CHECK: vexpandps (%rcx), %zmm4 {%k6} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xce,0x88,0x21] + vexpandps (%rcx), %zmm4 {%k6} {z} + +// CHECK: vexpandps 291(%rax,%r14,8), %zmm4 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x88,0xa4,0xf0,0x23,0x01,0x00,0x00] + vexpandps 291(%rax,%r14,8), %zmm4 + +// CHECK: vexpandps 508(%rdx), %zmm4 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x7f] + vexpandps 508(%rdx), %zmm4 + +// CHECK: vexpandps 512(%rdx), %zmm4 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0x00,0x02,0x00,0x00] + vexpandps 512(%rdx), %zmm4 + +// CHECK: vexpandps -512(%rdx), %zmm4 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0x62,0x80] + vexpandps -512(%rdx), %zmm4 + +// CHECK: vexpandps -516(%rdx), %zmm4 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x88,0xa2,0xfc,0xfd,0xff,0xff] + vexpandps -516(%rdx), %zmm4 + +// CHECK: vexpandps %zmm9, %zmm14 +// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x88,0xf1] + vexpandps %zmm9, %zmm14 + +// CHECK: vexpandps %zmm9, %zmm14 {%k2} +// CHECK: encoding: [0x62,0x52,0x7d,0x4a,0x88,0xf1] + vexpandps %zmm9, %zmm14 {%k2} + +// CHECK: vexpandps %zmm9, %zmm14 {%k2} {z} +// CHECK: encoding: [0x62,0x52,0x7d,0xca,0x88,0xf1] + vexpandps %zmm9, %zmm14 {%k2} {z} diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s index dd1ac24b04ea..deae35f12a31 100644 --- a/test/MC/X86/avx512vl-encoding.s +++ b/test/MC/X86/avx512vl-encoding.s @@ -891,3 +891,339 @@ // CHECK: vpmovm2q %k2, %ymm30 // CHECK: encoding: [0x62,0x62,0xfe,0x28,0x38,0xf2] vpmovm2q %k2, %ymm30 + +// CHECK: vcompresspd %xmm23, (%rcx) +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x39] + vcompresspd %xmm23, (%rcx) + +// CHECK: vcompresspd %xmm23, (%rcx) {%k6} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0e,0x8a,0x39] + vcompresspd %xmm23, (%rcx) {%k6} + +// CHECK: vcompresspd %xmm23, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x8a,0xbc,0xf0,0x23,0x01,0x00,0x00] + vcompresspd %xmm23, 291(%rax,%r14,8) + +// CHECK: vcompresspd %xmm23, 1016(%rdx) +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x7f] + vcompresspd %xmm23, 1016(%rdx) + +// CHECK: vcompresspd %xmm23, 1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0x00,0x04,0x00,0x00] + vcompresspd %xmm23, 1024(%rdx) + +// CHECK: vcompresspd %xmm23, -1024(%rdx) +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x8a,0x7a,0x80] + vcompresspd %xmm23, -1024(%rdx) + +// CHECK: vcompresspd %xmm23, -1032(%rdx) +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x8a,0xba,0xf8,0xfb,0xff,0xff] + vcompresspd %xmm23, -1032(%rdx) + +// CHECK: vcompresspd %ymm29, (%rcx) +// CHECK: encoding: [0x62,0x62,0xfd,0x28,0x8a,0x29] + vcompresspd %ymm29, (%rcx) + +// CHECK: vcompresspd %ymm29, (%rcx) {%k2} +// CHECK: encoding: [0x62,0x62,0xfd,0x2a,0x8a,0x29] + vcompresspd %ymm29, (%rcx) {%k2} + +// CHECK: vcompresspd %ymm29, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0xfd,0x28,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00] + vcompresspd %ymm29, 291(%rax,%r14,8) + +// CHECK: vcompresspd %ymm29, 1016(%rdx) +// CHECK: encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x7f] + vcompresspd %ymm29, 1016(%rdx) + +// CHECK: vcompresspd %ymm29, 1024(%rdx) +// CHECK: encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0x00,0x04,0x00,0x00] + vcompresspd %ymm29, 1024(%rdx) + +// CHECK: vcompresspd %ymm29, -1024(%rdx) +// CHECK: encoding: [0x62,0x62,0xfd,0x28,0x8a,0x6a,0x80] + vcompresspd %ymm29, -1024(%rdx) + +// CHECK: vcompresspd %ymm29, -1032(%rdx) +// CHECK: encoding: [0x62,0x62,0xfd,0x28,0x8a,0xaa,0xf8,0xfb,0xff,0xff] + vcompresspd %ymm29, -1032(%rdx) + +// CHECK: vcompresspd %xmm27, %xmm20 +// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x8a,0xdc] + vcompresspd %xmm27, %xmm20 + +// CHECK: vcompresspd %xmm27, %xmm20 {%k2} +// CHECK: encoding: [0x62,0x22,0xfd,0x0a,0x8a,0xdc] + vcompresspd %xmm27, %xmm20 {%k2} + +// CHECK: vcompresspd %xmm27, %xmm20 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0xfd,0x8a,0x8a,0xdc] + vcompresspd %xmm27, %xmm20 {%k2} {z} + +// CHECK: vcompresspd %ymm20, %ymm24 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x8a,0xe0] + vcompresspd %ymm20, %ymm24 + +// CHECK: vcompresspd %ymm20, %ymm24 {%k3} +// CHECK: encoding: [0x62,0x82,0xfd,0x2b,0x8a,0xe0] + vcompresspd %ymm20, %ymm24 {%k3} + +// CHECK: vcompresspd %ymm20, %ymm24 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xab,0x8a,0xe0] + vcompresspd %ymm20, %ymm24 {%k3} {z} + +// CHECK: vcompressps %xmm21, (%rcx) +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x29] + vcompressps %xmm21, (%rcx) + +// CHECK: vcompressps %xmm21, (%rcx) {%k7} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0f,0x8a,0x29] + vcompressps %xmm21, (%rcx) {%k7} + +// CHECK: vcompressps %xmm21, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x8a,0xac,0xf0,0x23,0x01,0x00,0x00] + vcompressps %xmm21, 291(%rax,%r14,8) + +// CHECK: vcompressps %xmm21, 508(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x7f] + vcompressps %xmm21, 508(%rdx) + +// CHECK: vcompressps %xmm21, 512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0x00,0x02,0x00,0x00] + vcompressps %xmm21, 512(%rdx) + +// CHECK: vcompressps %xmm21, -512(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x8a,0x6a,0x80] + vcompressps %xmm21, -512(%rdx) + +// CHECK: vcompressps %xmm21, -516(%rdx) +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x8a,0xaa,0xfc,0xfd,0xff,0xff] + vcompressps %xmm21, -516(%rdx) + +// CHECK: vcompressps %ymm24, (%rcx) +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x8a,0x01] + vcompressps %ymm24, (%rcx) + +// CHECK: vcompressps %ymm24, (%rcx) {%k7} +// CHECK: encoding: [0x62,0x62,0x7d,0x2f,0x8a,0x01] + vcompressps %ymm24, (%rcx) {%k7} + +// CHECK: vcompressps %ymm24, 291(%rax,%r14,8) +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x8a,0x84,0xf0,0x23,0x01,0x00,0x00] + vcompressps %ymm24, 291(%rax,%r14,8) + +// CHECK: vcompressps %ymm24, 508(%rdx) +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x7f] + vcompressps %ymm24, 508(%rdx) + +// CHECK: vcompressps %ymm24, 512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0x00,0x02,0x00,0x00] + vcompressps %ymm24, 512(%rdx) + +// CHECK: vcompressps %ymm24, -512(%rdx) +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x8a,0x42,0x80] + vcompressps %ymm24, -512(%rdx) + +// CHECK: vcompressps %ymm24, -516(%rdx) +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x8a,0x82,0xfc,0xfd,0xff,0xff] + vcompressps %ymm24, -516(%rdx) + +// CHECK: vcompressps %xmm29, %xmm28 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x8a,0xec] + vcompressps %xmm29, %xmm28 + +// CHECK: vcompressps %xmm29, %xmm28 {%k3} +// CHECK: encoding: [0x62,0x02,0x7d,0x0b,0x8a,0xec] + vcompressps %xmm29, %xmm28 {%k3} + +// CHECK: vcompressps %xmm29, %xmm28 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8b,0x8a,0xec] + vcompressps %xmm29, %xmm28 {%k3} {z} + +// CHECK: vcompressps %ymm25, %ymm23 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x8a,0xcf] + vcompressps %ymm25, %ymm23 + +// CHECK: vcompressps %ymm25, %ymm23 {%k6} +// CHECK: encoding: [0x62,0x22,0x7d,0x2e,0x8a,0xcf] + vcompressps %ymm25, %ymm23 {%k6} + +// CHECK: vcompressps %ymm25, %ymm23 {%k6} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xae,0x8a,0xcf] + vcompressps %ymm25, %ymm23 {%k6} {z} + +// CHECK: vexpandpd (%rcx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x88,0x39] + vexpandpd (%rcx), %xmm23 + +// CHECK: vexpandpd (%rcx), %xmm23 {%k3} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0b,0x88,0x39] + vexpandpd (%rcx), %xmm23 {%k3} + +// CHECK: vexpandpd (%rcx), %xmm23 {%k3} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0x8b,0x88,0x39] + vexpandpd (%rcx), %xmm23 {%k3} {z} + +// CHECK: vexpandpd 291(%rax,%r14,8), %xmm23 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00] + vexpandpd 291(%rax,%r14,8), %xmm23 + +// CHECK: vexpandpd 1016(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x7f] + vexpandpd 1016(%rdx), %xmm23 + +// CHECK: vexpandpd 1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0x00,0x04,0x00,0x00] + vexpandpd 1024(%rdx), %xmm23 + +// CHECK: vexpandpd -1024(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x88,0x7a,0x80] + vexpandpd -1024(%rdx), %xmm23 + +// CHECK: vexpandpd -1032(%rdx), %xmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x88,0xba,0xf8,0xfb,0xff,0xff] + vexpandpd -1032(%rdx), %xmm23 + +// CHECK: vexpandpd (%rcx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x88,0x31] + vexpandpd (%rcx), %ymm22 + +// CHECK: vexpandpd (%rcx), %ymm22 {%k5} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x88,0x31] + vexpandpd (%rcx), %ymm22 {%k5} + +// CHECK: vexpandpd (%rcx), %ymm22 {%k5} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x88,0x31] + vexpandpd (%rcx), %ymm22 {%k5} {z} + +// CHECK: vexpandpd 291(%rax,%r14,8), %ymm22 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x88,0xb4,0xf0,0x23,0x01,0x00,0x00] + vexpandpd 291(%rax,%r14,8), %ymm22 + +// CHECK: vexpandpd 1016(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x7f] + vexpandpd 1016(%rdx), %ymm22 + +// CHECK: vexpandpd 1024(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0x00,0x04,0x00,0x00] + vexpandpd 1024(%rdx), %ymm22 + +// CHECK: vexpandpd -1024(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x88,0x72,0x80] + vexpandpd -1024(%rdx), %ymm22 + +// CHECK: vexpandpd -1032(%rdx), %ymm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x88,0xb2,0xf8,0xfb,0xff,0xff] + vexpandpd -1032(%rdx), %ymm22 + +// CHECK: vexpandpd %xmm25, %xmm29 +// CHECK: encoding: [0x62,0x02,0xfd,0x08,0x88,0xe9] + vexpandpd %xmm25, %xmm29 + +// CHECK: vexpandpd %xmm25, %xmm29 {%k7} +// CHECK: encoding: [0x62,0x02,0xfd,0x0f,0x88,0xe9] + vexpandpd %xmm25, %xmm29 {%k7} + +// CHECK: vexpandpd %xmm25, %xmm29 {%k7} {z} +// CHECK: encoding: [0x62,0x02,0xfd,0x8f,0x88,0xe9] + vexpandpd %xmm25, %xmm29 {%k7} {z} + +// CHECK: vexpandpd %ymm27, %ymm21 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x88,0xeb] + vexpandpd %ymm27, %ymm21 + +// CHECK: vexpandpd %ymm27, %ymm21 {%k2} +// CHECK: encoding: [0x62,0x82,0xfd,0x2a,0x88,0xeb] + vexpandpd %ymm27, %ymm21 {%k2} + +// CHECK: vexpandpd %ymm27, %ymm21 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xaa,0x88,0xeb] + vexpandpd %ymm27, %ymm21 {%k2} {z} + +// CHECK: vexpandps (%rcx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x88,0x11] + vexpandps (%rcx), %xmm18 + +// CHECK: vexpandps (%rcx), %xmm18 {%k1} +// CHECK: encoding: [0x62,0xe2,0x7d,0x09,0x88,0x11] + vexpandps (%rcx), %xmm18 {%k1} + +// CHECK: vexpandps (%rcx), %xmm18 {%k1} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0x89,0x88,0x11] + vexpandps (%rcx), %xmm18 {%k1} {z} + +// CHECK: vexpandps 291(%rax,%r14,8), %xmm18 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x88,0x94,0xf0,0x23,0x01,0x00,0x00] + vexpandps 291(%rax,%r14,8), %xmm18 + +// CHECK: vexpandps 508(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x7f] + vexpandps 508(%rdx), %xmm18 + +// CHECK: vexpandps 512(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0x00,0x02,0x00,0x00] + vexpandps 512(%rdx), %xmm18 + +// CHECK: vexpandps -512(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x88,0x52,0x80] + vexpandps -512(%rdx), %xmm18 + +// CHECK: vexpandps -516(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x88,0x92,0xfc,0xfd,0xff,0xff] + vexpandps -516(%rdx), %xmm18 + +// CHECK: vexpandps (%rcx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x88,0x39] + vexpandps (%rcx), %ymm23 + +// CHECK: vexpandps (%rcx), %ymm23 {%k7} +// CHECK: encoding: [0x62,0xe2,0x7d,0x2f,0x88,0x39] + vexpandps (%rcx), %ymm23 {%k7} + +// CHECK: vexpandps (%rcx), %ymm23 {%k7} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0xaf,0x88,0x39] + vexpandps (%rcx), %ymm23 {%k7} {z} + +// CHECK: vexpandps 291(%rax,%r14,8), %ymm23 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x88,0xbc,0xf0,0x23,0x01,0x00,0x00] + vexpandps 291(%rax,%r14,8), %ymm23 + +// CHECK: vexpandps 508(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x7f] + vexpandps 508(%rdx), %ymm23 + +// CHECK: vexpandps 512(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0x00,0x02,0x00,0x00] + vexpandps 512(%rdx), %ymm23 + +// CHECK: vexpandps -512(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x88,0x7a,0x80] + vexpandps -512(%rdx), %ymm23 + +// CHECK: vexpandps -516(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x88,0xba,0xfc,0xfd,0xff,0xff] + vexpandps -516(%rdx), %ymm23 + +// CHECK: vexpandps %xmm19, %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x88,0xeb] + vexpandps %xmm19, %xmm29 + +// CHECK: vexpandps %xmm19, %xmm29 {%k5} +// CHECK: encoding: [0x62,0x22,0x7d,0x0d,0x88,0xeb] + vexpandps %xmm19, %xmm29 {%k5} + +// CHECK: vexpandps %xmm19, %xmm29 {%k5} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8d,0x88,0xeb] + vexpandps %xmm19, %xmm29 {%k5} {z} + +// CHECK: vexpandps %ymm29, %ymm29 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x88,0xed] + vexpandps %ymm29, %ymm29 + +// CHECK: vexpandps %ymm29, %ymm29 {%k5} +// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x88,0xed] + vexpandps %ymm29, %ymm29 {%k5} + +// CHECK: vexpandps %ymm29, %ymm29 {%k5} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x88,0xed] + vexpandps %ymm29, %ymm29 {%k5} {z}