forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[x86][FastISel] Teach how to select nontemporal stores.
This patch teaches x86 fast-isel how to select nontemporal stores. On x86, we can use MOVNTI for nontemporal stores of doublewords/quadwords. Instructions (V)MOVNTPS/PD/DQ can be used for SSE2/AVX aligned nontemporal vector stores. Before this patch, fast-isel always selected 'movd/movq' instead of 'movnti' for doubleword/quadword nontemporal stores. In the case of nontemporal stores of aligned vectors, fast-isel always selected movaps/movapd/movdqa instead of movntps/movntpd/movntdq. With this patch, if we use SSE2/AVX intrinsics for nontemporal stores we now always get the expected (V)MOVNT instructions. The lack of fast-isel support for nontemporal stores was spotted when analyzing the -O0 codegen for nontemporal stores. Differential Revision: http://reviews.llvm.org/D13698 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250285 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Andrea Di Biagio
authored and
Andrea Di Biagio
committed
Oct 14, 2015
1 parent
8f0bf92
commit 6cb6b24
Showing
2 changed files
with
103 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2 | ||
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX | ||
|
||
define void @test_nti32(i32* nocapture %ptr, i32 %X) { | ||
; ALL-LABEL: test_nti32: | ||
; ALL: # BB#0: # %entry | ||
; ALL-NEXT: movntil %esi, (%rdi) | ||
; ALL-NEXT: retq | ||
entry: | ||
store i32 %X, i32* %ptr, align 4, !nontemporal !1 | ||
ret void | ||
} | ||
|
||
define void @test_nti64(i64* nocapture %ptr, i64 %X) { | ||
; ALL-LABEL: test_nti64: | ||
; ALL: # BB#0: # %entry | ||
; ALL-NEXT: movntiq %rsi, (%rdi) | ||
; ALL-NEXT: retq | ||
entry: | ||
store i64 %X, i64* %ptr, align 8, !nontemporal !1 | ||
ret void | ||
} | ||
|
||
define void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) { | ||
; SSE2-LABEL: test_nt4xfloat: | ||
; SSE2: # BB#0: # %entry | ||
; SSE2-NEXT: movntps %xmm0, (%rdi) | ||
; SSE2-NEXT: retq | ||
; | ||
; AVX-LABEL: test_nt4xfloat: | ||
; AVX: # BB#0: # %entry | ||
; AVX-NEXT: vmovntps %xmm0, (%rdi) | ||
; AVX-NEXT: retq | ||
entry: | ||
store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1 | ||
ret void | ||
} | ||
|
||
define void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) { | ||
; SSE2-LABEL: test_nt2xdouble: | ||
; SSE2: # BB#0: # %entry | ||
; SSE2-NEXT: movntpd %xmm0, (%rdi) | ||
; SSE2-NEXT: retq | ||
; | ||
; AVX-LABEL: test_nt2xdouble: | ||
; AVX: # BB#0: # %entry | ||
; AVX-NEXT: vmovntpd %xmm0, (%rdi) | ||
; AVX-NEXT: retq | ||
entry: | ||
store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1 | ||
ret void | ||
} | ||
|
||
define void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) { | ||
; SSE2-LABEL: test_nt2xi64: | ||
; SSE2: # BB#0: # %entry | ||
; SSE2-NEXT: movntdq %xmm0, (%rdi) | ||
; SSE2-NEXT: retq | ||
; | ||
; AVX-LABEL: test_nt2xi64: | ||
; AVX: # BB#0: # %entry | ||
; AVX-NEXT: vmovntdq %xmm0, (%rdi) | ||
; AVX-NEXT: retq | ||
entry: | ||
store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1 | ||
ret void | ||
} | ||
|
||
!1 = !{i32 1} |