Skip to content

Commit

Permalink
[wasm-simd][x64] Specialize i8x16.popcnt for Atom with slow PSHUFB
Browse files Browse the repository at this point in the history
i8x16.popcnt uses PSHUFB instruction, which is slow on the old Atom
processors. Add an extra i8x16.popcnt implementation for those using
HACKMEM-inspired divide-and-conquer algorithm.

[email protected], [email protected]

Change-Id: I4e130428fea8c3cf3be1bd6da7308fc752b2132a
Bug: v8:11002
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2656858
Reviewed-by: Zhi An Ng <[email protected]>
Reviewed-by: Deepti Gandluri <[email protected]>
Commit-Queue: Marat Dukhan <[email protected]>
Cr-Commit-Position: refs/heads/master@{#72421}
  • Loading branch information
Maratyszcza authored and Commit Bot committed Jan 29, 2021
1 parent 8e7347d commit 71fc222
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 0 deletions.
20 changes: 20 additions & 0 deletions src/codegen/external-reference.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,18 @@ constexpr struct alignas(16) {
} wasm_i8x16_splat_0x0f = {uint64_t{0x0F0F0F0F'0F0F0F0F},
uint64_t{0x0F0F0F0F'0F0F0F0F}};

constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_i8x16_splat_0x33 = {uint64_t{0x33333333'33333333},
uint64_t{0x33333333'33333333}};

constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_i8x16_splat_0x55 = {uint64_t{0x55555555'55555555},
uint64_t{0x55555555'55555555}};

constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
Expand Down Expand Up @@ -582,6 +594,14 @@ ExternalReference ExternalReference::address_of_wasm_i8x16_splat_0x0f() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i8x16_splat_0x0f));
}

ExternalReference ExternalReference::address_of_wasm_i8x16_splat_0x33() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i8x16_splat_0x33));
}

ExternalReference ExternalReference::address_of_wasm_i8x16_splat_0x55() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i8x16_splat_0x55));
}

ExternalReference ExternalReference::address_of_wasm_i16x8_splat_0x0001() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i16x8_splat_0x0001));
}
Expand Down
2 changes: 2 additions & 0 deletions src/codegen/external-reference.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ class StatsCounter;
V(address_of_wasm_i8x16_popcnt_mask, "wasm_i8x16_popcnt_mask") \
V(address_of_wasm_i8x16_splat_0x01, "wasm_i8x16_splat_0x01") \
V(address_of_wasm_i8x16_splat_0x0f, "wasm_i8x16_splat_0x0f") \
V(address_of_wasm_i8x16_splat_0x33, "wasm_i8x16_splat_0x33") \
V(address_of_wasm_i8x16_splat_0x55, "wasm_i8x16_splat_0x55") \
V(address_of_wasm_i16x8_splat_0x0001, "wasm_16x8_splat_0x0001") \
V(bytecode_size_table_address, "Bytecodes::bytecode_size_table_address") \
V(check_object_type, "check_object_type") \
Expand Down
27 changes: 27 additions & 0 deletions src/compiler/backend/x64/code-generator-x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3941,6 +3941,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpshufb(dst, tmp, dst);
__ vpshufb(kScratchDoubleReg, tmp, kScratchDoubleReg);
__ vpaddb(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(ATOM)) {
// Pre-Goldmont low-power Intel microarchitectures have very slow
// PSHUFB instruction, thus use PSHUFB-free divide-and-conquer
// algorithm on these processors. ATOM CPU feature captures exactly
// the right set of processors.
__ xorps(tmp, tmp);
__ pavgb(tmp, src);
if (dst != src) {
__ movaps(dst, src);
}
__ andps(tmp,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x55()));
__ psubb(dst, tmp);
Operand splat_0x33 = __ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x33());
__ movaps(tmp, dst);
__ andps(dst, splat_0x33);
__ psrlw(tmp, 2);
__ andps(tmp, splat_0x33);
__ paddb(dst, tmp);
__ movaps(tmp, dst);
__ psrlw(dst, 4);
__ paddb(dst, tmp);
__ andps(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_i8x16_splat_0x0f()));
} else {
__ movaps(tmp,
__ ExternalReferenceAsOperand(
Expand Down

0 comments on commit 71fc222

Please sign in to comment.