Skip to content

Commit

Permalink
[AArch64] Add ARMv8.2-A FP16 vector instructions
Browse files Browse the repository at this point in the history
ARMv8.2-A adds 16-bit floating point versions of all existing SIMD
floating-point instructions. This is an optional extension, so all of
these instructions require the FeatureFullFP16 subtarget feature.

Note that VFP without SIMD is not a valid combination for any version of
ARMv8-A, but I have ensured that these instructions all depend on both
FeatureNEON and FeatureFullFP16 for consistency.

The ".2h" vector type specifier is now legal (for the scalar pairwise
reduction instructions), so some unrelated tests have been modified as
different error messages are emitted. This is not a problem as the
invalid operands are still caught.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255010 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
ostannard committed Dec 8, 2015
1 parent 2450ea1 commit 59ad77e
Show file tree
Hide file tree
Showing 31 changed files with 1,917 additions and 337 deletions.
576 changes: 402 additions & 174 deletions lib/Target/AArch64/AArch64InstrFormats.td

Large diffs are not rendered by default.

152 changes: 96 additions & 56 deletions lib/Target/AArch64/AArch64InstrInfo.td

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/Target/AArch64/AArch64RegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64",
// The lower 16 vector registers. Some instructions can only take registers
// in this range.
def FPR128_lo : RegisterClass<"AArch64",
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
128, (trunc FPR128, 16)>;

// Pairs, triples, and quads of 64-bit vector registers.
Expand Down
2 changes: 2 additions & 0 deletions lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1921,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) {
.Case(".h", true)
.Case(".s", true)
.Case(".d", true)
// Needed for fp16 scalar pairwise reductions
.Case(".2h", true)
.Default(false);
}

Expand Down
153 changes: 152 additions & 1 deletion test/MC/AArch64/arm64-advsimd.s
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s
; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto,fullfp16 -output-asm-variant=1 -show-encoding < %s | FileCheck %s

foo:

Expand Down Expand Up @@ -440,6 +440,106 @@ foo:
; CHECK: urshl.8b v0, v0, v0 ; encoding: [0x00,0x54,0x20,0x2e]
; CHECK: ushl.8b v0, v0, v0 ; encoding: [0x00,0x44,0x20,0x2e]

fabd.4h v0, v0, v0
facge.4h v0, v0, v0
facgt.4h v0, v0, v0
faddp.4h v0, v0, v0
fadd.4h v0, v0, v0
fcmeq.4h v0, v0, v0
fcmge.4h v0, v0, v0
fcmgt.4h v0, v0, v0
fdiv.4h v0, v0, v0
fmaxnmp.4h v0, v0, v0
fmaxnm.4h v0, v0, v0
fmaxp.4h v0, v0, v0
fmax.4h v0, v0, v0
fminnmp.4h v0, v0, v0
fminnm.4h v0, v0, v0
fminp.4h v0, v0, v0
fmin.4h v0, v0, v0
fmla.4h v0, v0, v0
fmls.4h v0, v0, v0
fmulx.4h v0, v0, v0
fmul.4h v0, v0, v0
frecps.4h v0, v0, v0
frsqrts.4h v0, v0, v0
fsub.4h v0, v0, v0

; CHECK: fabd.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x2e]
; CHECK: facge.4h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x2e]
; CHECK: facgt.4h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x2e]
; CHECK: faddp.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x2e]
; CHECK: fadd.4h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x0e]
; CHECK: fcmeq.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x0e]
; CHECK: fcmge.4h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x2e]
; CHECK: fcmgt.4h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x2e]
; CHECK: fdiv.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x2e]
; CHECK: fmaxnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x2e]
; CHECK: fmaxnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x0e]
; CHECK: fmaxp.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x2e]
; CHECK: fmax.4h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x0e]
; CHECK: fminnmp.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x2e]
; CHECK: fminnm.4h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x0e]
; CHECK: fminp.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x2e]
; CHECK: fmin.4h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x0e]
; CHECK: fmla.4h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x0e]
; CHECK: fmls.4h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x0e]
; CHECK: fmulx.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x0e]
; CHECK: fmul.4h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x2e]
; CHECK: frecps.4h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x0e]
; CHECK: frsqrts.4h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x0e]
; CHECK: fsub.4h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x0e]

fabd.8h v0, v0, v0
facge.8h v0, v0, v0
facgt.8h v0, v0, v0
faddp.8h v0, v0, v0
fadd.8h v0, v0, v0
fcmeq.8h v0, v0, v0
fcmge.8h v0, v0, v0
fcmgt.8h v0, v0, v0
fdiv.8h v0, v0, v0
fmaxnmp.8h v0, v0, v0
fmaxnm.8h v0, v0, v0
fmaxp.8h v0, v0, v0
fmax.8h v0, v0, v0
fminnmp.8h v0, v0, v0
fminnm.8h v0, v0, v0
fminp.8h v0, v0, v0
fmin.8h v0, v0, v0
fmla.8h v0, v0, v0
fmls.8h v0, v0, v0
fmulx.8h v0, v0, v0
fmul.8h v0, v0, v0
frecps.8h v0, v0, v0
frsqrts.8h v0, v0, v0
fsub.8h v0, v0, v0

; CHECK: fabd.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x6e]
; CHECK: facge.8h v0, v0, v0 ; encoding: [0x00,0x2c,0x40,0x6e]
; CHECK: facgt.8h v0, v0, v0 ; encoding: [0x00,0x2c,0xc0,0x6e]
; CHECK: faddp.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x6e]
; CHECK: fadd.8h v0, v0, v0 ; encoding: [0x00,0x14,0x40,0x4e]
; CHECK: fcmeq.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x4e]
; CHECK: fcmge.8h v0, v0, v0 ; encoding: [0x00,0x24,0x40,0x6e]
; CHECK: fcmgt.8h v0, v0, v0 ; encoding: [0x00,0x24,0xc0,0x6e]
; CHECK: fdiv.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x6e]
; CHECK: fmaxnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x6e]
; CHECK: fmaxnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0x40,0x4e]
; CHECK: fmaxp.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x6e]
; CHECK: fmax.8h v0, v0, v0 ; encoding: [0x00,0x34,0x40,0x4e]
; CHECK: fminnmp.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x6e]
; CHECK: fminnm.8h v0, v0, v0 ; encoding: [0x00,0x04,0xc0,0x4e]
; CHECK: fminp.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x6e]
; CHECK: fmin.8h v0, v0, v0 ; encoding: [0x00,0x34,0xc0,0x4e]
; CHECK: fmla.8h v0, v0, v0 ; encoding: [0x00,0x0c,0x40,0x4e]
; CHECK: fmls.8h v0, v0, v0 ; encoding: [0x00,0x0c,0xc0,0x4e]
; CHECK: fmulx.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x4e]
; CHECK: fmul.8h v0, v0, v0 ; encoding: [0x00,0x1c,0x40,0x6e]
; CHECK: frecps.8h v0, v0, v0 ; encoding: [0x00,0x3c,0x40,0x4e]
; CHECK: frsqrts.8h v0, v0, v0 ; encoding: [0x00,0x3c,0xc0,0x4e]
; CHECK: fsub.8h v0, v0, v0 ; encoding: [0x00,0x14,0xc0,0x4e]

bif.8b v0, v0, v0
bit.8b v0, v0, v0
bsl.8b v0, v0, v0
Expand Down Expand Up @@ -568,6 +668,57 @@ foo:
; CHECK: shll2.4s v1, v2, #16 ; encoding: [0x41,0x38,0x61,0x6e]
; CHECK: shll2.2d v1, v2, #32 ; encoding: [0x41,0x38,0xa1,0x6e]

fabs.4h v0, v0
fneg.4h v0, v0
frecpe.4h v0, v0
frinta.4h v0, v0
frintx.4h v0, v0
frinti.4h v0, v0
frintm.4h v0, v0
frintn.4h v0, v0
frintp.4h v0, v0
frintz.4h v0, v0
frsqrte.4h v0, v0
fsqrt.4h v0, v0

; CHECK: fabs.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x0e]
; CHECK: fneg.4h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x2e]
; CHECK: frecpe.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x0e]
; CHECK: frinta.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x2e]
; CHECK: frintx.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x2e]
; CHECK: frinti.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x2e]
; CHECK: frintm.4h v0, v0 ; encoding: [0x00,0x98,0x79,0x0e]
; CHECK: frintn.4h v0, v0 ; encoding: [0x00,0x88,0x79,0x0e]
; CHECK: frintp.4h v0, v0 ; encoding: [0x00,0x88,0xf9,0x0e]
; CHECK: frintz.4h v0, v0 ; encoding: [0x00,0x98,0xf9,0x0e]
; CHECK: frsqrte.4h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x2e]
; CHECK: fsqrt.4h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x2e]

fabs.8h v0, v0
fneg.8h v0, v0
frecpe.8h v0, v0
frinta.8h v0, v0
frintx.8h v0, v0
frinti.8h v0, v0
frintm.8h v0, v0
frintn.8h v0, v0
frintp.8h v0, v0
frintz.8h v0, v0
frsqrte.8h v0, v0
fsqrt.8h v0, v0

; CHECK: fabs.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x4e]
; CHECK: fneg.8h v0, v0 ; encoding: [0x00,0xf8,0xf8,0x6e]
; CHECK: frecpe.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x4e]
; CHECK: frinta.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x6e]
; CHECK: frintx.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x6e]
; CHECK: frinti.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x6e]
; CHECK: frintm.8h v0, v0 ; encoding: [0x00,0x98,0x79,0x4e]
; CHECK: frintn.8h v0, v0 ; encoding: [0x00,0x88,0x79,0x4e]
; CHECK: frintp.8h v0, v0 ; encoding: [0x00,0x88,0xf9,0x4e]
; CHECK: frintz.8h v0, v0 ; encoding: [0x00,0x98,0xf9,0x4e]
; CHECK: frsqrte.8h v0, v0 ; encoding: [0x00,0xd8,0xf9,0x6e]
; CHECK: fsqrt.8h v0, v0 ; encoding: [0x00,0xf8,0xf9,0x6e]

cmeq.8b v0, v0, #0
cmeq.16b v0, v0, #0
Expand Down
18 changes: 0 additions & 18 deletions test/MC/AArch64/armv8.1a-rdma.s
Original file line number Diff line number Diff line change
Expand Up @@ -26,27 +26,9 @@
sqrdmlsh v0.8s, v1.8s, v2.8s
sqrdmlah v0.2s, v1.4h, v2.8h
sqrdmlsh v0.4s, v1.8h, v2.2s
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid vector kind qualifier
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h
// CHECK-ERROR: ^
Expand Down
42 changes: 42 additions & 0 deletions test/MC/AArch64/fullfp16-diagnostics.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
// RUN: FileCheck < %t %s

fmla v0.4h, v1.4h, v16.h[3]
fmla v2.8h, v3.8h, v17.h[6]

// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmla v0.4h, v1.4h, v16.h[3]
// CHECK-NEXT: ^
// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmla v2.8h, v3.8h, v17.h[6]
// CHECK-NEXT: ^

fmls v0.4h, v1.4h, v16.h[3]
fmls v2.8h, v3.8h, v17.h[6]

// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmls v0.4h, v1.4h, v16.h[3]
// CHECK-NEXT: ^
// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmls v2.8h, v3.8h, v17.h[6]
// CHECK-NEXT: ^

fmul v0.4h, v1.4h, v16.h[3]
fmul v2.8h, v3.8h, v17.h[6]

// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmul v0.4h, v1.4h, v16.h[3]
// CHECK-NEXT: ^
// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmul v2.8h, v3.8h, v17.h[6]
// CHECK-NEXT: ^

fmulx v0.4h, v1.4h, v16.h[3]
fmulx v2.8h, v3.8h, v17.h[6]

// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmulx v0.4h, v1.4h, v16.h[3]
// CHECK-NEXT: ^
// CHECK: error: invalid operand for instruction
// CHECK-NEXT: fmulx v2.8h, v3.8h, v17.h[6]
// CHECK-NEXT: ^
Loading

0 comments on commit 59ad77e

Please sign in to comment.