Skip to content

Commit

Permalink
[AArch64] Extend load insert into zero patterns to SVE.
Browse files Browse the repository at this point in the history
This extends the patterns for loading into the zeroth lane of a zero vector
from D144086 to SVE, which work in the same way as the existing patterns. Only
full length vectors are added here, not the narrower floating point vector
types.
  • Loading branch information
davemgreen committed Mar 6, 2023
1 parent 7975e3b commit a10ac65
Show file tree
Hide file tree
Showing 2 changed files with 381 additions and 11 deletions.
29 changes: 19 additions & 10 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3326,8 +3326,8 @@ def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",

// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
// load, 0) can use a single load.
multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType ScalarVT,
Instruction LoadInst, Instruction UnscaledLoadInst,
multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
SubRegIndex SubReg> {
// Scaled
Expand All @@ -3347,23 +3347,32 @@ multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueT
def : Pat <(vector_insert (HVT immAllZerosV),
(ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
(SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;

// SVE patterns
def : Pat <(vector_insert (SVT immAllZerosV),
(ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
(SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
// Unscaled
def : Pat <(vector_insert (SVT immAllZerosV),
(ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
(SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
}

defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, i32, LDRBui, LDRBui,
defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDRBui,
am_indexed8, am_unscaled8, uimm12s1, bsub>;
defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, i32, LDRHui, LDURHi,
defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi,
am_indexed16, am_unscaled16, uimm12s2, hsub>;
defm : LoadInsertZeroPatterns<load, v4i32, v2i32, i32, LDRSui, LDURSi,
defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi,
am_indexed32, am_unscaled32, uimm12s4, ssub>;
defm : LoadInsertZeroPatterns<load, v2i64, v1i64, i64, LDRDui, LDURDi,
defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi,
am_indexed64, am_unscaled64, uimm12s8, dsub>;
defm : LoadInsertZeroPatterns<load, v8f16, v4f16, f16, LDRHui, LDURHi,
defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi,
am_indexed16, am_unscaled16, uimm12s2, hsub>;
defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, bf16, LDRHui, LDURHi,
defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
am_indexed16, am_unscaled16, uimm12s2, hsub>;
defm : LoadInsertZeroPatterns<load, v4f32, v2f32, f32, LDRSui, LDURSi,
defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi,
am_indexed32, am_unscaled32, uimm12s4, ssub>;
defm : LoadInsertZeroPatterns<load, v2f64, v1f64, f64, LDRDui, LDURDi,
defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi,
am_indexed64, am_unscaled64, uimm12s8, dsub>;

// Pre-fetch.
Expand Down
Loading

0 comments on commit a10ac65

Please sign in to comment.