Skip to content

Commit

Permalink
Instruction support for Cargo
Browse files Browse the repository at this point in the history
Adds packuswb, psubusb, psubusw, paddusb, paddusw, psubsb, psubsw, paddsb, and paddsw
  • Loading branch information
jason-conway committed Aug 29, 2022
1 parent 1b790aa commit fa4d7a7
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 24 deletions.
19 changes: 18 additions & 1 deletion emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(compares_gtw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x66: TRACEI("pcmpgtd xmm:modrm, xmm");
READMODRM; V_OP(compares_gtd, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x67: TRACEI("packuswb xmm:modrm, xmm");
READMODRM; V_OP(packsu_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x68: TRACEI("punpckhbw xmm:modrm, xmm");
READMODRM; V_OP(unpackh_bw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x69: TRACEI("punpckhwd xmm:modrm, xmm");
Expand Down Expand Up @@ -394,11 +396,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0xd7: TRACEI("pmovmskb xmm:modrm, reg");
READMODRM_NOMEM; V_OP(movmask_b, xmm_modrm_val, modrm_reg,128); break;

case 0xd8: TRACEI("psubusb xmm:modrm, xmm");
READMODRM; V_OP(subus_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xd9: TRACEI("psubusw xmm:modrm, xmm");
READMODRM; V_OP(subus_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xda: TRACEI("pminub xmm:modrm, xmm");
READMODRM; V_OP(min_ub, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdb: TRACEI("pand xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdc: TRACEI("paddusb xmm:modrm, xmm");
READMODRM; V_OP(addus_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdd: TRACEI("paddusw xmm:modrm, xmm");
READMODRM; V_OP(addus_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xde: TRACEI("pmaxub xmm:modrm, xmm");
READMODRM; V_OP(max_ub, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdf: TRACEI("pandn xmm:modrm, xmm");
Expand All @@ -413,8 +422,16 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(mulu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe6: TRACEI("cvttpd2dq xmm:modrm, xmm");
READMODRM; V_OP(cvttpd2dq, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0xe8: TRACEI("psubsb xmm:modrm, xmm");
READMODRM; V_OP(subss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xe9: TRACEI("psubsw xmm:modrm, xmm");
READMODRM; V_OP(subss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xeb: TRACEI("por xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xec: TRACEI("paddsb xmm:modrm, xmm");
READMODRM; V_OP(addss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xed: TRACEI("paddsw xmm:modrm, xmm");
READMODRM; V_OP(addss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xef: TRACEI("pxor xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf3: TRACEI("psllq xmm:modrm, xmm");
Expand Down
96 changes: 82 additions & 14 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ static inline void zero_xmm(union xmm_reg *xmm) {
xmm->qw[1] = 0;
}

static inline int32_t satw(int32_t dw) {
static inline int32_t satsw(int32_t dw) {
if (dw > 0xff80)
dw &= 0xff;
else if (dw > 0x7fff)
Expand All @@ -18,7 +18,7 @@ static inline int32_t satw(int32_t dw) {
dw = 0x7f;
return dw;
}
static inline uint32_t satd(uint32_t dw) {
static inline uint32_t satud(uint32_t dw) {
if (dw > 0xffff8000)
dw &= 0xffff;
else if (dw > 0x7fffffff)
Expand All @@ -27,6 +27,23 @@ static inline uint32_t satd(uint32_t dw) {
dw = 0x7fff;
return dw;
}
static inline uint32_t satub(uint32_t dw) {
if (dw >= 0x8000)
dw = 0;
else if (dw > 0xff)
dw = 0xff;
return dw;
}
static inline uint32_t satsb(uint32_t dw)
{
if (dw > 0xffffff80)
dw &= 0xff;
else if (dw > 0x7fffffff)
dw = 0x80;
else if (dw > 0x7f)
dw = 0x7f;
return dw;
}

#define VEC_ZERO_COPY(zero, copy) \
void vec_zero##zero##_copy##copy(NO_CPU, const void *src, void *dst) { \
Expand Down Expand Up @@ -192,6 +209,27 @@ void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw += src->qw;
}
void vec_addus_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 16; i++) {
const int32_t sb = dst->u8[i] + src->u8[i];
dst->u8[i] = sb > 0xff ? 0xff : sb;
}
}
void vec_addus_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++) {
const int32_t sw = dst->u16[i] + src->u16[i];
dst->u16[i] = sw > 0xffff ? 0xffff : sw;
}
}
void vec_addss_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 16; i++)
dst->u8[i] = satsb((int8_t)dst->u8[i] + (int8_t)src->u8[i]);
}
void vec_addss_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++)
dst->u16[i] = satud((int16_t)dst->u16[i] + (int16_t)src->u16[i]);
}

void vec_sub_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] -= src->u8[i];
Expand All @@ -208,6 +246,26 @@ void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] -= src->qw[0];
dst->qw[1] -= src->qw[1];
}
void vec_subus_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 16; i++) {
const int32_t sb = dst->u8[i] - src->u8[i];
dst->u8[i] = sb < 0 ? 0 : sb;
}
}
void vec_subus_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++) {
const int32_t sw = dst->u16[i] - src->u16[i];
dst->u16[i] = sw < 0 ? 0 : sw;
}
}
void vec_subss_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 16; i++)
dst->u8[i] = satsb((int8_t)dst->u8[i] - (int8_t)src->u8[i]);
}
void vec_subss_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++)
dst->u16[i] = satud((int16_t)dst->u16[i] - (int16_t)src->u16[i]);
}

void vec_madd_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (int32_t)((int16_t)dst->u16[0] * (int16_t)src->u16[0]) +
Expand Down Expand Up @@ -457,20 +515,30 @@ void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
}

void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satw(dst->u16[0]) << 0x00) | (satw(dst->u16[1]) << 0x08) |
(satw(dst->u16[2]) << 0x10) | (satw(dst->u16[3]) << 0x18);
dst->u32[1] = (satw(dst->u16[4]) << 0x00) | (satw(dst->u16[5]) << 0x08) |
(satw(dst->u16[6]) << 0x10) | (satw(dst->u16[7]) << 0x18);
dst->u32[2] = (satw(src->u16[0]) << 0x00) | (satw(src->u16[1]) << 0x08) |
(satw(src->u16[2]) << 0x10) | (satw(src->u16[3]) << 0x18);
dst->u32[3] = (satw(src->u16[4]) << 0x00) | (satw(src->u16[5]) << 0x08) |
(satw(src->u16[6]) << 0x10) | (satw(src->u16[7]) << 0x18);
dst->u32[0] = (satsw(dst->u16[0]) << 0x00) | (satsw(dst->u16[1]) << 0x08) |
(satsw(dst->u16[2]) << 0x10) | (satsw(dst->u16[3]) << 0x18);
dst->u32[1] = (satsw(dst->u16[4]) << 0x00) | (satsw(dst->u16[5]) << 0x08) |
(satsw(dst->u16[6]) << 0x10) | (satsw(dst->u16[7]) << 0x18);
dst->u32[2] = (satsw(src->u16[0]) << 0x00) | (satsw(src->u16[1]) << 0x08) |
(satsw(src->u16[2]) << 0x10) | (satsw(src->u16[3]) << 0x18);
dst->u32[3] = (satsw(src->u16[4]) << 0x00) | (satsw(src->u16[5]) << 0x08) |
(satsw(src->u16[6]) << 0x10) | (satsw(src->u16[7]) << 0x18);
}
void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = satd(dst->u32[0]) | (satd(dst->u32[1]) << 16);
dst->u32[1] = satd(dst->u32[2]) | (satd(dst->u32[3]) << 16);
dst->u32[2] = satd(src->u32[0]) | (satd(src->u32[1]) << 16);
dst->u32[3] = satd(src->u32[2]) | (satd(src->u32[3]) << 16);
dst->u32[0] = satud(dst->u32[0]) | (satud(dst->u32[1]) << 16);
dst->u32[1] = satud(dst->u32[2]) | (satud(dst->u32[3]) << 16);
dst->u32[2] = satud(src->u32[0]) | (satud(src->u32[1]) << 16);
dst->u32[3] = satud(src->u32[2]) | (satud(src->u32[3]) << 16);
}
void vec_packsu_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satub(dst->u16[0]) << 0x00) | (satub(dst->u16[1]) << 0x08) |
(satub(dst->u16[2]) << 0x10) | (satub(dst->u16[3]) << 0x18);
dst->u32[1] = (satub(dst->u16[4]) << 0x00) | (satub(dst->u16[5]) << 0x08) |
(satub(dst->u16[6]) << 0x10) | (satub(dst->u16[7]) << 0x18);
dst->u32[2] = (satub(src->u16[0]) << 0x00) | (satub(src->u16[1]) << 0x08) |
(satub(src->u16[2]) << 0x10) | (satub(src->u16[3]) << 0x18);
dst->u32[3] = (satub(src->u16[4]) << 0x00) | (satub(src->u16[5]) << 0x08) |
(satub(src->u16[6]) << 0x10) | (satub(src->u16[7]) << 0x18);
}

void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding) {
Expand Down
9 changes: 9 additions & 0 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,18 @@ void vec_add_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_addus_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_addus_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_addss_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_addss_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_subus_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_subus_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_subss_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_subss_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_mulu64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
Expand Down Expand Up @@ -98,6 +106,7 @@ void vec_cvttps2dq32(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

// TODO organize
void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_packsu_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_unpackl_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
Expand Down
18 changes: 18 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4366,6 +4366,8 @@ pcmpgtw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
pcmpgtw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=ffff0000ffff00000000ffff00000000
pcmpgtd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=0000000000000000ffffffff00000000
pcmpgtd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=ffffffffffffffff0000000000000000
packuswb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ffff0000ffffff0000ff00ffffff00ff
packuswb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=000000ffffffffffff0000ff7cffffff
punpckhbw: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41dcf2511e5cfbffa994e34ae15846ec
punpckhbw: a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=c223331be9e9e8e8c4cdc9e743439a8d
punpckhwd: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41f2dc511efb5cffa9e3944ae14658ec
Expand All @@ -4388,12 +4390,20 @@ pmullw : a=456723c698694873 b=1f297ccd58bad7ab r=967f8d8ed44af9d1
pmullw : a=007c62c2085427f8 b=0f76255a085427f8 r=7d28c2345b908040
pmullw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=d5921005579ebc88967f8d8ed44af9d1
pmullw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=74612240865f89d27d28c2345b908040
psubusb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9b003e04000000a6263e000040000000
psubusb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=00000000091e000000063d6800000000
psubusw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e0400000000263e00003faf0000
psubusw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=00000000091e000000003d6800000000
pminub : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41511efb944a58461f2923c658694873
pminub : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8c4c9438d0076255a085427f8
pand : a=456723c698694873 b=1f297ccd58bad7ab r=052120c418284023
pand : a=007c62c2085427f8 b=0f76255a085427f8 r=00742042085427f8
pand : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=40501cfb80424044052120c418284023
pand : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0213e9e8c4c1438800742042085427f8
paddusb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ffff7affffffffff64909ffff0ffffff
paddusb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54effffffff86ff0ff287ff10a84eff
paddusw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ffff7bfaffffffff6490a093f123ffff
paddusw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54effffffff87270ff2881c10a84ff0
pmaxub : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dcf25cffa9e3e1ec45677ccd98bad7ab
pmaxub : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=c233e9e8cde7439a0f7c62c2085427f8
pandn : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=01a2020029a1a1021a085c0940929788
Expand All @@ -4404,8 +4414,16 @@ pmulhw : a=456723c698694873 b=1f297ccd58bad7ab r=08721170dc18f495
pmulhw : a=007c62c2085427f8 b=0f76255a085427f8 r=00070e680045063d
pmulhw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=f6ce0b41243bf55308721170dc18f495
pmulhw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=f78601e80b9611d600070e680045063d
psubsb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9b5f3e04eb6777a6263ea7f9807f717f
psubsb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=61e80000091e00f3f1063d8000000000
psubsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e04ea6777a6263ea6f9800070c8
psubsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091efff3f1063d6800000000
por : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ddf35effbdebf9ee5f6f7fcfd8fbdffb
por : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e33be9e8cdef439f0f7e67da085427f8
paddsb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1d437afa802d3932647f7f93f0231f1e
paddsb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54ed2d091b07f800f7f7f1c107f4ef0
paddsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1e437bfa80003a3264907ffff123201e
paddsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54ed3d092b07fff0ff27fff10a84ff0
pxor : a=456723c698694873 b=1f297ccd58bad7ab r=5a4e5f0bc0d39fd8
pxor : a=007c62c2085427f8 b=0f76255a085427f8 r=0f0a479800000000
pxor : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9da342043da9b9aa5a4e5f0bc0d39fd8
Expand Down
18 changes: 9 additions & 9 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2441,7 +2441,7 @@ void test_sse(void)
SSE_OP2(pcmpgtb);
SSE_OP2(pcmpgtw);
SSE_OP2(pcmpgtd);
// MMX_OP2(packuswb);
SSE_OP2(packuswb);
SSE_OP2(punpckhbw);
SSE_OP2(punpckhwd);
SSE_OP2(punpckhdq);
Expand All @@ -2452,24 +2452,24 @@ void test_sse(void)

MMX_OP2(paddq);
MMX_OP2(pmullw);
// MMX_OP2(psubusb);
// MMX_OP2(psubusw);
SSE_OP2(psubusb);
SSE_OP2(psubusw);
SSE_OP2(pminub);
MMX_OP2(pand);
// MMX_OP2(paddusb);
// MMX_OP2(paddusw);
SSE_OP2(paddusb);
SSE_OP2(paddusw);
SSE_OP2(pmaxub);
SSE_OP2(pandn);

SSE_OP2(pmulhuw);
MMX_OP2(pmulhw);

// MMX_OP2(psubsb);
// MMX_OP2(psubsw);
SSE_OP2(psubsb);
SSE_OP2(psubsw);
// MMX_OP2(pminsw);
SSE_OP2(por);
// MMX_OP2(paddsb);
// MMX_OP2(paddsw);
SSE_OP2(paddsb);
SSE_OP2(paddsw);
// MMX_OP2(pmaxsw);
MMX_OP2(pxor);
MMX_OP2(pmuludq);
Expand Down

0 comments on commit fa4d7a7

Please sign in to comment.