Skip to content

Commit

Permalink
[mips][wasm-simd] Prototype extended pairwise addition
Browse files Browse the repository at this point in the history
Besides, implement extended add pairwise and i64x2.abs
in liftoff.

Port: aee8522
Port: 8136e39
Port: 31aab83

Bug: v8:11086 v8:11416
Change-Id: I77539c745b67c6b9d4205b4e5e96504e5ad3da13
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2717150
Auto-Submit: Liu yu <[email protected]>
Reviewed-by: Zhao Jiazhong <[email protected]>
Commit-Queue: Zhao Jiazhong <[email protected]>
Cr-Commit-Position: refs/heads/master@{#72992}
  • Loading branch information
LiuYu396 authored and Commit Bot committed Feb 24, 2021
1 parent 966362a commit 42b5b40
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 54 deletions.
20 changes: 20 additions & 0 deletions src/codegen/mips64/macro-assembler-mips64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2780,6 +2780,26 @@ void TurboAssembler::LoadSplat(MSASize sz, MSARegister dst, MemOperand src) {
}
}

void TurboAssembler::ExtAddPairwise(MSADataType type, MSARegister dst,
MSARegister src) {
switch (type) {
case MSAS8:
hadd_s_h(dst, src, src);
break;
case MSAU8:
hadd_u_h(dst, src, src);
break;
case MSAS16:
hadd_s_w(dst, src, src);
break;
case MSAU16:
hadd_u_w(dst, src, src);
break;
default:
UNREACHABLE();
}
}

void TurboAssembler::MSARoundW(MSARegister dst, MSARegister src,
FPURoundingMode mode) {
BlockTrampolinePoolScope block_trampoline_pool(this);
Expand Down
1 change: 1 addition & 0 deletions src/codegen/mips64/macro-assembler-mips64.h
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void ExtMulHigh(MSADataType type, MSARegister dst, MSARegister src1,
MSARegister src2);
void LoadSplat(MSASize sz, MSARegister dst, MemOperand src);
void ExtAddPairwise(MSADataType type, MSARegister dst, MSARegister src);
void MSARoundW(MSARegister dst, MSARegister src, FPURoundingMode mode);
void MSARoundD(MSARegister dst, MSARegister src, FPURoundingMode mode);

Expand Down
24 changes: 24 additions & 0 deletions src/compiler/backend/mips/code-generator-mips.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMipsI32x4ExtAddPairwiseI16x8S: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ hadd_s_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(0));
break;
}
case kMipsI32x4ExtAddPairwiseI16x8U: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ hadd_u_w(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(0));
break;
}
case kMipsF64x2Abs: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ bclri_d(i.OutputSimd128Register(), i.InputSimd128Register(0), 63);
Expand Down Expand Up @@ -2912,6 +2924,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMipsI16x8ExtAddPairwiseI8x16S: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ hadd_s_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(0));
break;
}
case kMipsI16x8ExtAddPairwiseI8x16U: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ hadd_u_h(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(0));
break;
}
case kMipsI8x16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/backend/mips/instruction-codes-mips.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ namespace compiler {
V(MipsI32x4ExtMulHighI16x8U) \
V(MipsI32x4TruncSatF64x2SZero) \
V(MipsI32x4TruncSatF64x2UZero) \
V(MipsI32x4ExtAddPairwiseI16x8S) \
V(MipsI32x4ExtAddPairwiseI16x8U) \
V(MipsI16x8Splat) \
V(MipsI16x8ExtractLaneU) \
V(MipsI16x8ExtractLaneS) \
Expand Down Expand Up @@ -275,6 +277,8 @@ namespace compiler {
V(MipsI16x8ExtMulHighI8x16S) \
V(MipsI16x8ExtMulLowI8x16U) \
V(MipsI16x8ExtMulHighI8x16U) \
V(MipsI16x8ExtAddPairwiseI8x16S) \
V(MipsI16x8ExtAddPairwiseI8x16U) \
V(MipsI8x16Splat) \
V(MipsI8x16ExtractLaneU) \
V(MipsI8x16ExtractLaneS) \
Expand Down
4 changes: 4 additions & 0 deletions src/compiler/backend/mips/instruction-scheduler-mips.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI16x8ExtMulHighI8x16S:
case kMipsI16x8ExtMulLowI8x16U:
case kMipsI16x8ExtMulHighI8x16U:
case kMipsI16x8ExtAddPairwiseI8x16S:
case kMipsI16x8ExtAddPairwiseI8x16U:
case kMipsI32x4ExtAddPairwiseI16x8S:
case kMipsI32x4ExtAddPairwiseI16x8U:
case kMipsI32x4Add:
case kMipsI32x4AddHoriz:
case kMipsI32x4Eq:
Expand Down
112 changes: 58 additions & 54 deletions src/compiler/backend/mips/instruction-selector-mips.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2115,60 +2115,64 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8) \
V(I8x16)

#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kMipsF64x2Abs) \
V(F64x2Neg, kMipsF64x2Neg) \
V(F64x2Sqrt, kMipsF64x2Sqrt) \
V(F64x2Ceil, kMipsF64x2Ceil) \
V(F64x2Floor, kMipsF64x2Floor) \
V(F64x2Trunc, kMipsF64x2Trunc) \
V(F64x2NearestInt, kMipsF64x2NearestInt) \
V(F64x2ConvertLowI32x4S, kMipsF64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U, kMipsF64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4, kMipsF64x2PromoteLowF32x4) \
V(I64x2Neg, kMipsI64x2Neg) \
V(I64x2BitMask, kMipsI64x2BitMask) \
V(I64x2Abs, kMipsI64x2Abs) \
V(I64x2SConvertI32x4Low, kMipsI64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High, kMipsI64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low, kMipsI64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High, kMipsI64x2UConvertI32x4High) \
V(F32x4SConvertI32x4, kMipsF32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \
V(F32x4Abs, kMipsF32x4Abs) \
V(F32x4Neg, kMipsF32x4Neg) \
V(F32x4Sqrt, kMipsF32x4Sqrt) \
V(F32x4RecipApprox, kMipsF32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \
V(F32x4Ceil, kMipsF32x4Ceil) \
V(F32x4Floor, kMipsF32x4Floor) \
V(F32x4Trunc, kMipsF32x4Trunc) \
V(F32x4NearestInt, kMipsF32x4NearestInt) \
V(F32x4DemoteF64x2Zero, kMipsF32x4DemoteF64x2Zero) \
V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \
V(I32x4UConvertF32x4, kMipsI32x4UConvertF32x4) \
V(I32x4Neg, kMipsI32x4Neg) \
V(I32x4BitMask, kMipsI32x4BitMask) \
V(I32x4SConvertI16x8Low, kMipsI32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High, kMipsI32x4SConvertI16x8High) \
V(I32x4UConvertI16x8Low, kMipsI32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kMipsI32x4UConvertI16x8High) \
V(I32x4TruncSatF64x2SZero, kMipsI32x4TruncSatF64x2SZero) \
V(I32x4TruncSatF64x2UZero, kMipsI32x4TruncSatF64x2UZero) \
V(I16x8Neg, kMipsI16x8Neg) \
V(I16x8BitMask, kMipsI16x8BitMask) \
V(I16x8SConvertI8x16Low, kMipsI16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kMipsI16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low, kMipsI16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kMipsI16x8UConvertI8x16High) \
V(I8x16Neg, kMipsI8x16Neg) \
V(I8x16Popcnt, kMipsI8x16Popcnt) \
V(I8x16BitMask, kMipsI8x16BitMask) \
V(S128Not, kMipsS128Not) \
V(V64x2AllTrue, kMipsV64x2AllTrue) \
V(V32x4AllTrue, kMipsV32x4AllTrue) \
V(V16x8AllTrue, kMipsV16x8AllTrue) \
V(V8x16AllTrue, kMipsV8x16AllTrue) \
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kMipsF64x2Abs) \
V(F64x2Neg, kMipsF64x2Neg) \
V(F64x2Sqrt, kMipsF64x2Sqrt) \
V(F64x2Ceil, kMipsF64x2Ceil) \
V(F64x2Floor, kMipsF64x2Floor) \
V(F64x2Trunc, kMipsF64x2Trunc) \
V(F64x2NearestInt, kMipsF64x2NearestInt) \
V(F64x2ConvertLowI32x4S, kMipsF64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U, kMipsF64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4, kMipsF64x2PromoteLowF32x4) \
V(I64x2Neg, kMipsI64x2Neg) \
V(I64x2BitMask, kMipsI64x2BitMask) \
V(I64x2Abs, kMipsI64x2Abs) \
V(I64x2SConvertI32x4Low, kMipsI64x2SConvertI32x4Low) \
V(I64x2SConvertI32x4High, kMipsI64x2SConvertI32x4High) \
V(I64x2UConvertI32x4Low, kMipsI64x2UConvertI32x4Low) \
V(I64x2UConvertI32x4High, kMipsI64x2UConvertI32x4High) \
V(F32x4SConvertI32x4, kMipsF32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kMipsF32x4UConvertI32x4) \
V(F32x4Abs, kMipsF32x4Abs) \
V(F32x4Neg, kMipsF32x4Neg) \
V(F32x4Sqrt, kMipsF32x4Sqrt) \
V(F32x4RecipApprox, kMipsF32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kMipsF32x4RecipSqrtApprox) \
V(F32x4Ceil, kMipsF32x4Ceil) \
V(F32x4Floor, kMipsF32x4Floor) \
V(F32x4Trunc, kMipsF32x4Trunc) \
V(F32x4NearestInt, kMipsF32x4NearestInt) \
V(F32x4DemoteF64x2Zero, kMipsF32x4DemoteF64x2Zero) \
V(I32x4SConvertF32x4, kMipsI32x4SConvertF32x4) \
V(I32x4UConvertF32x4, kMipsI32x4UConvertF32x4) \
V(I32x4Neg, kMipsI32x4Neg) \
V(I32x4BitMask, kMipsI32x4BitMask) \
V(I32x4SConvertI16x8Low, kMipsI32x4SConvertI16x8Low) \
V(I32x4SConvertI16x8High, kMipsI32x4SConvertI16x8High) \
V(I32x4UConvertI16x8Low, kMipsI32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kMipsI32x4UConvertI16x8High) \
V(I32x4ExtAddPairwiseI16x8S, kMipsI32x4ExtAddPairwiseI16x8S) \
V(I32x4ExtAddPairwiseI16x8U, kMipsI32x4ExtAddPairwiseI16x8U) \
V(I32x4TruncSatF64x2SZero, kMipsI32x4TruncSatF64x2SZero) \
V(I32x4TruncSatF64x2UZero, kMipsI32x4TruncSatF64x2UZero) \
V(I16x8Neg, kMipsI16x8Neg) \
V(I16x8BitMask, kMipsI16x8BitMask) \
V(I16x8SConvertI8x16Low, kMipsI16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kMipsI16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low, kMipsI16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kMipsI16x8UConvertI8x16High) \
V(I16x8ExtAddPairwiseI8x16S, kMipsI16x8ExtAddPairwiseI8x16S) \
V(I16x8ExtAddPairwiseI8x16U, kMipsI16x8ExtAddPairwiseI8x16U) \
V(I8x16Neg, kMipsI8x16Neg) \
V(I8x16Popcnt, kMipsI8x16Popcnt) \
V(I8x16BitMask, kMipsI8x16BitMask) \
V(S128Not, kMipsS128Not) \
V(V64x2AllTrue, kMipsV64x2AllTrue) \
V(V32x4AllTrue, kMipsV32x4AllTrue) \
V(V16x8AllTrue, kMipsV16x8AllTrue) \
V(V8x16AllTrue, kMipsV8x16AllTrue) \
V(V128AnyTrue, kMipsV128AnyTrue)

#define SIMD_SHIFT_OP_LIST(V) \
Expand Down
6 changes: 6 additions & 0 deletions src/compiler/backend/mips64/code-generator-mips64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2480,6 +2480,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1));
break;
}
case kMips64ExtAddPairwise: {
auto dt = static_cast<MSADataType>(MiscField::decode(instr->opcode()));
__ ExtAddPairwise(dt, i.OutputSimd128Register(),
i.InputSimd128Register(0));
break;
}
case kMips64F32x4Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ FmoveLow(kScratchReg, i.InputSingleRegister(0));
Expand Down
1 change: 1 addition & 0 deletions src/compiler/backend/mips64/instruction-codes-mips64.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ namespace compiler {
V(Mips64I64x2UConvertI32x4High) \
V(Mips64ExtMulLow) \
V(Mips64ExtMulHigh) \
V(Mips64ExtAddPairwise) \
V(Mips64F32x4Abs) \
V(Mips64F32x4Neg) \
V(Mips64F32x4Sqrt) \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I64x2UConvertI32x4High:
case kMips64ExtMulLow:
case kMips64ExtMulHigh:
case kMips64ExtAddPairwise:
case kMips64F32x4Abs:
case kMips64F32x4Add:
case kMips64F32x4AddHoriz:
Expand Down
12 changes: 12 additions & 0 deletions src/compiler/backend/mips64/instruction-selector-mips64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3278,6 +3278,18 @@ VISIT_EXT_MUL(I16x8, I8x16S, MSAS8)
VISIT_EXT_MUL(I16x8, I8x16U, MSAU8)
#undef VISIT_EXT_MUL

#define VISIT_EXTADD_PAIRWISE(OPCODE, TYPE) \
void InstructionSelector::Visit##OPCODE(Node* node) { \
Mips64OperandGenerator g(this); \
Emit(kMips64ExtAddPairwise | MiscField::encode(TYPE), \
g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); \
}
VISIT_EXTADD_PAIRWISE(I16x8ExtAddPairwiseI8x16S, MSAS8)
VISIT_EXTADD_PAIRWISE(I16x8ExtAddPairwiseI8x16U, MSAU8)
VISIT_EXTADD_PAIRWISE(I32x4ExtAddPairwiseI16x8S, MSAS16)
VISIT_EXTADD_PAIRWISE(I32x4ExtAddPairwiseI16x8U, MSAU16)
#undef VISIT_EXTADD_PAIRWISE

// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
Expand Down
25 changes: 25 additions & 0 deletions src/wasm/baseline/mips/liftoff-assembler-mips.h
Original file line number Diff line number Diff line change
Expand Up @@ -1947,6 +1947,11 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
bailout(kSimd, "emit_i64x2_ne");
}

void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i64x2_abs");
}

void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_f64x2_eq");
Expand Down Expand Up @@ -2234,6 +2239,16 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
bailout(kSimd, "emit_i16x8_max_u");
}

void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i16x8_extadd_pairwise_i8x16_s");
}

void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i16x8_extadd_pairwise_i8x16_u");
}

void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i32x4_neg");
Expand Down Expand Up @@ -2326,6 +2341,16 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
bailout(kSimd, "emit_i32x4_dot_i16x8_s");
}

void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_s(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i32x4_extadd_pairwise_i16x8_s");
}

void LiftoffAssembler::emit_i32x4_extadd_pairwise_i16x8_u(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i32x4_extadd_pairwise_i16x8_u");
}

void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "emit_i64x2_neg");
Expand Down
18 changes: 18 additions & 0 deletions src/wasm/baseline/mips64/liftoff-assembler-mips64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1754,6 +1754,18 @@ SIMD_BINOP(i64x2, i32x4_u, MSAU32)

#undef SIMD_BINOP

#define SIMD_BINOP(name1, name2, type) \
void LiftoffAssembler::emit_##name1##_extadd_pairwise_##name2( \
LiftoffRegister dst, LiftoffRegister src) { \
TurboAssembler::ExtAddPairwise(type, dst.fp().toW(), src.fp().toW()); \
}

SIMD_BINOP(i16x8, i8x16_s, MSAS8)
SIMD_BINOP(i16x8, i8x16_u, MSAU8)
SIMD_BINOP(i32x4, i16x8_s, MSAS16)
SIMD_BINOP(i32x4, i16x8_u, MSAU16)
#undef SIMD_BINOP

void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Expand Down Expand Up @@ -1884,6 +1896,12 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
nor_v(dst.fp().toW(), dst.fp().toW(), dst.fp().toW());
}

void LiftoffAssembler::emit_i64x2_abs(LiftoffRegister dst,
LiftoffRegister src) {
xor_v(kSimd128RegZero, kSimd128RegZero, kSimd128RegZero);
add_a_d(dst.fp().toW(), src.fp().toW(), kSimd128RegZero);
}

void LiftoffAssembler::emit_f64x2_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
fceq_d(dst.fp().toW(), lhs.fp().toW(), rhs.fp().toW());
Expand Down

0 comments on commit 42b5b40

Please sign in to comment.