Skip to content

Commit

Permalink
[ARM-BE] Generate correct NEON intrinsics for big endian systems.
Browse files Browse the repository at this point in the history
The NEON intrinsics in arm_neon.h are designed to work on vectors
"as-if" loaded by (V)LDR. We load vectors "as-if" (V)LD1, so the
intrinsics are currently incorrect.

This patch adds big-endian versions of the intrinsics that does the
"obvious but dumb" thing of reversing all vector inputs and all
vector outputs. This will produce extra REVs, but we trust the
optimizer to remove them.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@211893 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
James Molloy committed Jun 27, 2014
1 parent e1a4af7 commit f19ae32
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 64 deletions.
11 changes: 9 additions & 2 deletions include/clang/Basic/arm_neon.td
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ class Inst <string n, string p, string t, Operation o> {

Operation Operation = o;
bit CartesianProductOfTypes = 0;
bit BigEndianSafe = 0;
bit isShift = 0;
bit isScalarShift = 0;
bit isScalarNarrowShift = 0;
Expand Down Expand Up @@ -654,7 +655,9 @@ def VSET_LANE : IInst<"vset_lane", "dsdi",

////////////////////////////////////////////////////////////////////////////////
// E.3.18 Initialize a vector from bit pattern
def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST>;
def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> {
let BigEndianSafe = 1;
}

////////////////////////////////////////////////////////////////////////////////
// E.3.19 Set all lanes to same value
Expand Down Expand Up @@ -791,6 +794,7 @@ def VREINTERPRET
"csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> {
let CartesianProductOfTypes = 1;
let ArchGuard = "!defined(__aarch64__)";
let BigEndianSafe = 1;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1092,7 +1096,9 @@ def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>;

////////////////////////////////////////////////////////////////////////////////
//Initialize a vector from bit pattern
def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST>;
def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> {
let BigEndianSafe = 1;
}

////////////////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -1256,6 +1262,7 @@ def VVREINTERPRET
: NoTestOpInst<"vreinterpret", "dd",
"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> {
let CartesianProductOfTypes = 1;
let BigEndianSafe = 1;
let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)";
}

Expand Down
11 changes: 11 additions & 0 deletions test/CodeGen/arm64-lanes.c
Original file line number Diff line number Diff line change
@@ -1,63 +1,74 @@
// RUN: %clang_cc1 -O3 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -O3 -triple arm64_be-linux-gnu -target-feature +neon -ffreestanding -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-BE

#include <arm_neon.h>

// CHECK-LABEL: @test_vdupb_lane_s8
int8_t test_vdupb_lane_s8(int8x8_t src) {
return vdupb_lane_s8(src, 2);
// CHECK: extractelement <8 x i8> %src, i32 2
// CHECK-BE: extractelement <8 x i8> %src, i32 5
}

// CHECK-LABEL: @test_vdupb_lane_u8
uint8_t test_vdupb_lane_u8(uint8x8_t src) {
return vdupb_lane_u8(src, 2);
// CHECK: extractelement <8 x i8> %src, i32 2
// CHECK-BE: extractelement <8 x i8> %src, i32 5
}

// CHECK-LABEL: @test_vduph_lane_s16
int16_t test_vduph_lane_s16(int16x4_t src) {
return vduph_lane_s16(src, 2);
// CHECK: extractelement <4 x i16> %src, i32 2
// CHECK-BE: extractelement <4 x i16> %src, i32 1
}

// CHECK-LABEL: @test_vduph_lane_u16
uint16_t test_vduph_lane_u16(uint16x4_t src) {
return vduph_lane_u16(src, 2);
// CHECK: extractelement <4 x i16> %src, i32 2
// CHECK-BE: extractelement <4 x i16> %src, i32 1
}

// CHECK-LABEL: @test_vdups_lane_s32
int32_t test_vdups_lane_s32(int32x2_t src) {
return vdups_lane_s32(src, 0);
// CHECK: extractelement <2 x i32> %src, i32 0
// CHECK-BE: extractelement <2 x i32> %src, i32 1
}

// CHECK-LABEL: @test_vdups_lane_u32
uint32_t test_vdups_lane_u32(uint32x2_t src) {
return vdups_lane_u32(src, 0);
// CHECK: extractelement <2 x i32> %src, i32 0
// CHECK-BE: extractelement <2 x i32> %src, i32 1
}

// CHECK-LABEL: @test_vdups_lane_f32
float32_t test_vdups_lane_f32(float32x2_t src) {
return vdups_lane_f32(src, 0);
// CHECK: extractelement <2 x float> %src, i32 0
// CHECK-BE: extractelement <2 x float> %src, i32 1
}

// CHECK-LABEL: @test_vdupd_lane_s64
int64_t test_vdupd_lane_s64(int64x1_t src) {
return vdupd_lane_s64(src, 0);
// CHECK: extractelement <1 x i64> %src, i32 0
// CHECK-BE: extractelement <1 x i64> %src, i32 0
}

// CHECK-LABEL: @test_vdupd_lane_u64
uint64_t test_vdupd_lane_u64(uint64x1_t src) {
return vdupd_lane_u64(src, 0);
// CHECK: extractelement <1 x i64> %src, i32 0
// CHECK-BE: extractelement <1 x i64> %src, i32 0
}

// CHECK-LABEL: @test_vdupd_lane_f64
float64_t test_vdupd_lane_f64(float64x1_t src) {
return vdupd_lane_f64(src, 0);
// CHECK: extractelement <1 x double> %src, i32 0
// CHECK-BE: extractelement <1 x double> %src, i32 0
}
Loading

0 comments on commit f19ae32

Please sign in to comment.