forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PPC64] Add vector pack/unpack support from ISA 2.07
This patch adds support for the following new instructions in the Power ISA 2.07: vpksdss vpksdus vpkudus vpkudum vupkhsw vupklsw These instructions are available through the vec_packs, vec_packsu, vec_unpackh, and vec_unpackl built-in interfaces. These are lane-sensitive instructions, so the built-ins have different implementations for big- and little-endian, and the instructions must be marked as killing the vector swap optimization for now. The first three instructions perform saturating pack operations. The fourth performs a modulo pack operation, which means it can be represented with a vector shuffle, and conversely the appropriate vector shuffles may cause this instruction to be generated. The other instructions are only generated via built-in support for now. Appropriate tests have been added. There is a companion patch to clang for the rest of this support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237499 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
1 parent
9e6378d
commit 24f0469
Showing
9 changed files
with
238 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s | ||
|
||
define void @VPKUDUM_unary(<2 x i64>* %A) { | ||
entry: | ||
%tmp = load <2 x i64>, <2 x i64>* %A | ||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32> | ||
%tmp3 = extractelement <4 x i32> %tmp2, i32 1 | ||
%tmp4 = extractelement <4 x i32> %tmp2, i32 3 | ||
%tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0 | ||
%tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1 | ||
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2 | ||
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3 | ||
%tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64> | ||
store <2 x i64> %tmp9, <2 x i64>* %A | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @VPKUDUM_unary | ||
; CHECK-NOT: vperm | ||
; CHECK: vpkudum | ||
|
||
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) { | ||
entry: | ||
%tmp = load <2 x i64>, <2 x i64>* %A | ||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32> | ||
%tmp3 = load <2 x i64>, <2 x i64>* %B | ||
%tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32> | ||
%tmp5 = extractelement <4 x i32> %tmp2, i32 1 | ||
%tmp6 = extractelement <4 x i32> %tmp2, i32 3 | ||
%tmp7 = extractelement <4 x i32> %tmp4, i32 1 | ||
%tmp8 = extractelement <4 x i32> %tmp4, i32 3 | ||
%tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0 | ||
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1 | ||
%tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2 | ||
%tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3 | ||
%tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64> | ||
store <2 x i64> %tmp13, <2 x i64>* %A | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @VPKUDUM | ||
; CHECK-NOT: vperm | ||
; CHECK: vpkudum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s | ||
|
||
define void @VPKUDUM_unary(<2 x i64>* %A) { | ||
entry: | ||
%tmp = load <2 x i64>, <2 x i64>* %A | ||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32> | ||
%tmp3 = extractelement <4 x i32> %tmp2, i32 0 | ||
%tmp4 = extractelement <4 x i32> %tmp2, i32 2 | ||
%tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0 | ||
%tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1 | ||
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2 | ||
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3 | ||
%tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64> | ||
store <2 x i64> %tmp9, <2 x i64>* %A | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @VPKUDUM_unary | ||
; CHECK-NOT: vperm | ||
; CHECK: vpkudum | ||
|
||
define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) { | ||
entry: | ||
%tmp = load <2 x i64>, <2 x i64>* %A | ||
%tmp2 = bitcast <2 x i64> %tmp to <4 x i32> | ||
%tmp3 = load <2 x i64>, <2 x i64>* %B | ||
%tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32> | ||
%tmp5 = extractelement <4 x i32> %tmp2, i32 0 | ||
%tmp6 = extractelement <4 x i32> %tmp2, i32 2 | ||
%tmp7 = extractelement <4 x i32> %tmp4, i32 0 | ||
%tmp8 = extractelement <4 x i32> %tmp4, i32 2 | ||
%tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0 | ||
%tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1 | ||
%tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2 | ||
%tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3 | ||
%tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64> | ||
store <2 x i64> %tmp13, <2 x i64>* %A | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @VPKUDUM | ||
; CHECK-NOT: vperm | ||
; CHECK: vpkudum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-unknown -mcpu=pwr8 | FileCheck %s | ||
|
||
# CHECK: vpksdss 2, 3, 4 | ||
0x10 0x43 0x25 0xce | ||
|
||
# CHECK: vpksdus 2, 3, 4 | ||
0x10 0x43 0x25 0x4e | ||
|
||
# CHECK: vpkudus 2, 3, 4 | ||
0x10 0x43 0x24 0xce | ||
|
||
# CHECK: vpkudum 2, 3, 4 | ||
0x10 0x43 0x24 0x4e | ||
|
||
# CHECK: vupkhsw 2, 3 | ||
0x10 0x40 0x1e 0x4e | ||
|
||
# CHECK: vupklsw 2, 3 | ||
0x10 0x40 0x1e 0xce |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s | ||
# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s | ||
|
||
# CHECK-BE: vpksdss 2, 3, 4 # encoding: [0x10,0x43,0x25,0xce] | ||
# CHECK-LE: vpksdss 2, 3, 4 # encoding: [0xce,0x25,0x43,0x10] | ||
vpksdss 2, 3, 4 | ||
|
||
# CHECK-BE: vpksdus 2, 3, 4 # encoding: [0x10,0x43,0x25,0x4e] | ||
# CHECK-LE: vpksdus 2, 3, 4 # encoding: [0x4e,0x25,0x43,0x10] | ||
vpksdus 2, 3, 4 | ||
|
||
# CHECK-BE: vpkudus 2, 3, 4 # encoding: [0x10,0x43,0x24,0xce] | ||
# CHECK-LE: vpkudus 2, 3, 4 # encoding: [0xce,0x24,0x43,0x10] | ||
vpkudus 2, 3, 4 | ||
|
||
# CHECK-BE: vpkudum 2, 3, 4 # encoding: [0x10,0x43,0x24,0x4e] | ||
# CHECK-LE: vpkudum 2, 3, 4 # encoding: [0x4e,0x24,0x43,0x10] | ||
vpkudum 2, 3, 4 | ||
|
||
# CHECK-BE: vupkhsw 2, 3 # encoding: [0x10,0x40,0x1e,0x4e] | ||
# CHECK-LE: vupkhsw 2, 3 # encoding: [0x4e,0x1e,0x40,0x10] | ||
vupkhsw 2, 3 | ||
|
||
# CHECK-BE: vupklsw 2, 3 # encoding: [0x10,0x40,0x1e,0xce] | ||
# CHECK-LE: vupklsw 2, 3 # encoding: [0xce,0x1e,0x40,0x10] | ||
vupklsw 2, 3 |