Skip to content

Commit

Permalink
[x64] Add AVX512 optimizations for OPCODE_VECTOR_COMPARE_UGT(Integer)
Browse files Browse the repository at this point in the history
AVX512 has native unsigned integer comparisons instructions, removing
the need to XOR the most-significant-bit with a constant in memory to
use the signed comparison instructions. These instructions only write to
a k-mask register though and need an additional call to `vpmovm2*` to
turn the mask-register into a vector-mask register.

As of Icelake:
`vpcmpu*` is all L3/T1
`vpmovm2d` is L1/T0.33
`vpmovm2{b,w}` is L3/T0.33

As of Zen4:
`vpcmpu*` is all L3/T0.50
`vpmovm2*` is all L1/T0.25
  • Loading branch information
Wunkolo authored and gibbed committed May 29, 2023
1 parent 121bf93 commit 6ee2e37
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/xenia/cpu/backend/x64/x64_seq_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,43 @@ struct VECTOR_COMPARE_UGT_V128
: Sequence<VECTOR_COMPARE_UGT_V128,
I<OPCODE_VECTOR_COMPARE_UGT, V128Op, V128Op, V128Op>> {
static void Emit(X64Emitter& e, const EmitArgType& i) {
if (e.IsFeatureEnabled(kX64EmitAVX512Ortho | kX64EmitAVX512BW |
kX64EmitAVX512DQ) &&
(i.instr->flags != FLOAT32_TYPE)) {
Xmm src1 = e.xmm0;
if (i.src1.is_constant) {
e.LoadConstantXmm(src1, i.src1.constant());
} else {
src1 = i.src1;
}

Xmm src2 = e.xmm1;
if (i.src2.is_constant) {
e.LoadConstantXmm(src2, i.src2.constant());
} else {
src2 = i.src2;
}

switch (i.instr->flags) {
case INT8_TYPE:
e.vpcmpub(e.k1, src1, src2, 0x6);
e.vpmovm2b(i.dest, e.k1);
break;
case INT16_TYPE:
e.vpcmpuw(e.k1, src1, src2, 0x6);
e.vpmovm2w(i.dest, e.k1);
break;
case INT32_TYPE:
e.vpcmpud(e.k1, src1, src2, 0x6);
e.vpmovm2d(i.dest, e.k1);
break;
default:
assert_always();
break;
}
return;
}

Xbyak::Address sign_addr = e.ptr[e.rax]; // dummy
switch (i.instr->flags) {
case INT8_TYPE:
Expand Down

0 comments on commit 6ee2e37

Please sign in to comment.