forked from llvm-mirror/clang
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ARM,AArch64] Add intrinsics for dot product instructions
The ACLE spec which describes these intrinsics hasn't been published yet, but this is based on the final draft which will be published soon, and these have already been implemented by GCC. Differential revision: https://reviews.llvm.org/D46109 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@331039 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
6 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +dotprod \ | ||
// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -instcombine | FileCheck %s | ||
|
||
// REQUIRES: aarch64-registered-target | ||
|
||
// Test AArch64 Armv8.2-A dot product intrinsics | ||
|
||
#include <arm_neon.h> | ||
|
||
uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_u32(a, b, c); | ||
} | ||
|
||
uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_u32(a, b, c); | ||
} | ||
|
||
int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_s32(a, b, c); | ||
} | ||
|
||
int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_s32(a, b, c); | ||
} | ||
|
||
uint32x2_t test_vdot_lane_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_lane_u32(a, b, c, 1); | ||
} | ||
|
||
uint32x4_t test_vdotq_lane_u32(uint32x4_t a, uint8x16_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_lane_u32(a, b, c, 1); | ||
} | ||
|
||
uint32x2_t test_vdot_laneq_u32(uint32x2_t a, uint8x8_t b, uint8x16_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_laneq_u32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_laneq_u32(a, b, c, 1); | ||
} | ||
|
||
uint32x4_t test_vdotq_laneq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_laneq_u32(a, b, c, 1); | ||
} | ||
|
||
int32x2_t test_vdot_lane_s32(int32x2_t a, int8x8_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_lane_s32(a, b, c, 1); | ||
} | ||
|
||
int32x4_t test_vdotq_lane_s32(int32x4_t a, int8x16_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_lane_s32(a, b, c, 1); | ||
} | ||
|
||
int32x2_t test_vdot_laneq_s32(int32x2_t a, int8x8_t b, int8x16_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_laneq_s32(<2 x i32> %a, <8 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_laneq_s32(a, b, c, 1); | ||
} | ||
|
||
int32x4_t test_vdotq_laneq_s32(int32x4_t a, int8x16_t b, int8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_laneq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <16 x i8> %c to <4 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_laneq_s32(a, b, c, 1); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// RUN: %clang_cc1 -triple armv8-linux-gnueabihf -target-cpu cortex-a75 -target-feature +dotprod \ | ||
// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -instcombine | FileCheck %s | ||
|
||
// REQUIRES: arm-registered-target | ||
|
||
// Test ARM v8.2-A dot product intrinsics | ||
|
||
#include <arm_neon.h> | ||
|
||
uint32x2_t test_vdot_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_u32(a, b, c); | ||
} | ||
|
||
uint32x4_t test_vdotq_u32(uint32x4_t a, uint8x16_t b, uint8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_u32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_u32(a, b, c); | ||
} | ||
|
||
int32x2_t test_vdot_s32(int32x2_t a, int8x8_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_s32(a, b, c); | ||
} | ||
|
||
int32x4_t test_vdotq_s32(int32x4_t a, int8x16_t b, int8x16_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_s32(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> %c) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_s32(a, b, c); | ||
} | ||
|
||
uint32x2_t test_vdot_lane_u32(uint32x2_t a, uint8x8_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.udot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_lane_u32(a, b, c, 1); | ||
} | ||
|
||
uint32x4_t test_vdotq_lane_u32(uint32x4_t a, uint8x16_t b, uint8x8_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.udot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_lane_u32(a, b, c, 1); | ||
} | ||
|
||
int32x2_t test_vdot_lane_s32(int32x2_t a, int8x8_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <2 x i32> @llvm.arm.neon.sdot.v2i32.v8i8(<2 x i32> %a, <8 x i8> %b, <8 x i8> [[CAST2]]) | ||
// CHECK: ret <2 x i32> [[RESULT]] | ||
return vdot_lane_s32(a, b, c, 1); | ||
} | ||
|
||
int32x4_t test_vdotq_lane_s32(int32x4_t a, int8x16_t b, int8x8_t c) { | ||
// CHECK-LABEL: define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) | ||
// CHECK: [[CAST1:%.*]] = bitcast <8 x i8> %c to <2 x i32> | ||
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[CAST1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
// CHECK: [[CAST2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> | ||
// CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.arm.neon.sdot.v4i32.v16i8(<4 x i32> %a, <16 x i8> %b, <16 x i8> [[CAST2]]) | ||
// CHECK: ret <4 x i32> [[RESULT]] | ||
return vdotq_lane_s32(a, b, c, 1); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters