Skip to content

Commit

Permalink
Improve ARM64 vector creation
Browse files Browse the repository at this point in the history
This patch improves the performance of vector creation in caseiswhere where
several of the lanes in the vector are a constant floating point value. It
also includes new patterns to fold together some of the instructions when the
value is 0.0f. Test cases included.

rdar://16349427

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206496 91177308-0d34-0410-b5e6-96231b3b80d8
lgerbarg committed Apr 17, 2014
1 parent 4af58f1 commit 5540570
Showing 3 changed files with 38 additions and 2 deletions.
4 changes: 2 additions & 2 deletions lib/Target/ARM64/ARM64ISelLowering.cpp
Original file line number Diff line number Diff line change
@@ -4891,7 +4891,7 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;

if (isa<ConstantSDNode>(V)) {
if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
++NumConstantLanes;
if (!ConstantValue.getNode())
ConstantValue = V;
@@ -4955,7 +4955,7 @@ SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
if (!isa<ConstantSDNode>(V)) {
if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
3 changes: 3 additions & 0 deletions lib/Target/ARM64/ARM64InstrInfo.td
Original file line number Diff line number Diff line change
@@ -3472,6 +3472,9 @@ def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;

def : Pat<(v2f64 (ARM64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4f32 (ARM64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;

// EDIT per word & halfword: 2s, 4h, 4s, & 8h
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
33 changes: 33 additions & 0 deletions test/CodeGen/ARM64/vector-insertion.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: llc -march=arm64 -mcpu=generic < %s | FileCheck %s

define void @test0f(float* nocapture %x, float %a) #0 {
entry:
%0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0
%1 = bitcast float* %x to <4 x float>*
store <4 x float> %0, <4 x float>* %1, align 16
ret void

; CHECK-LABEL: test0f
; CHECK: movi.2d v[[TEMP:[0-9]+]], #0000000000000000
; CHECK: ins.s v[[TEMP]][0], v{{[0-9]+}}[0]
; CHECK: str q[[TEMP]], [x0]
; CHECK: ret


}


define void @test1f(float* nocapture %x, float %a) #0 {
entry:
%0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0
%1 = bitcast float* %x to <4 x float>*
store <4 x float> %0, <4 x float>* %1, align 16
ret void

; CHECK-LABEL: test1f
; CHECK: fmov s[[TEMP:[0-9]+]], #1.000000e+00
; CHECK: dup.4s v[[TEMP2:[0-9]+]], v[[TEMP]][0]
; CHECK: ins.s v[[TEMP2]][0], v0[0]
; CHECK: str q[[TEMP2]], [x0]
; CHECK: ret
}

0 comments on commit 5540570

Please sign in to comment.