forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PPC64LE] More vector swap optimization TLC
This makes one substantive change and a few stylistic changes to the VSX swap optimization pass. The substantive change is to permit LXSDX and LXSSPX instructions to participate in swap optimization computations. The previous change to insert a swap following a SUBREG_TO_REG widening operation makes this almost trivial. I experimented with also permitting STXSDX and STXSSPX instructions. This can be done using similar techniques: we could insert a swap prior to a narrowing COPY operation, and then permit these stores to participate. I prototyped this, but discovered that the pattern of a narrowing COPY followed by an STXSDX does not occur in any of our test-suite code. So instead, I added commentary indicating that this could be done. Other TLC: - I changed SH_COPYSCALAR to SH_COPYWIDEN to more clearly indicate the direction of the copy. - I factored the insertion of swap instructions into a separate function. Finally, I added a new test case to check that the scalar-to-vector loads are working properly with swap optimization. llvm-svn: 242838
- Loading branch information
1 parent
c1fbb35
commit 2be8054
Showing
2 changed files
with
91 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s | ||
|
||
; These tests verify that VSX swap optimization works when loading a scalar | ||
; into a vector register. | ||
|
||
|
||
@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16 | ||
@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16 | ||
@y = global double 1.780000e+00, align 8 | ||
|
||
define void @bar0() { | ||
entry: | ||
%0 = load <2 x double>, <2 x double>* @x, align 16 | ||
%1 = load double, double* @y, align 8 | ||
%vecins = insertelement <2 x double> %0, double %1, i32 0 | ||
store <2 x double> %vecins, <2 x double>* @z, align 16 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @bar0 | ||
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] | ||
; CHECK-DAG: lxsdx [[REG2:[0-9]+]] | ||
; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]] | ||
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1 | ||
; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 | ||
; CHECK: stxvd2x [[REG5]] | ||
|
||
define void @bar1() { | ||
entry: | ||
%0 = load <2 x double>, <2 x double>* @x, align 16 | ||
%1 = load double, double* @y, align 8 | ||
%vecins = insertelement <2 x double> %0, double %1, i32 1 | ||
store <2 x double> %vecins, <2 x double>* @z, align 16 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @bar1 | ||
; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] | ||
; CHECK-DAG: lxsdx [[REG2:[0-9]+]] | ||
; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]] | ||
; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1 | ||
; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] | ||
; CHECK: stxvd2x [[REG5]] | ||
|