forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
X86: Enable SSE memory intrinsics even when stack alignment is less t…
…han 16 bytes. The stack realignment code was fixed to work when there is stack realignment and a dynamic alloca is present so this shouldn't cause correctness issues anymore. Note that this also enables generation of AVX instructions for memset under the assumptions: - Unaligned loads/stores are always fast on CPUs supporting AVX - AVX is not slower than SSE We may need some tweaked heuristics if one of those assumptions turns out not to be true. Effectively reverts r58317. Part of PR2962. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167967 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
5 changed files
with
81 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we | ||
; need 16 bytes for SSE and 32 bytes for AVX. | ||
|
||
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s -check-prefix=NOSSE | ||
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s -check-prefix=SSE1 | ||
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s -check-prefix=SSE2 | ||
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX1 | ||
; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2 | ||
|
||
define void @test1(i32 %t) nounwind { | ||
%tmp1210 = alloca i8, i32 32, align 4 | ||
call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false) | ||
%x = alloca i8, i32 %t | ||
call void @dummy(i8* %x) | ||
ret void | ||
|
||
; NOSSE: test1: | ||
; NOSSE-NOT: and | ||
; NOSSE: movl $0 | ||
|
||
; SSE1: test1: | ||
; SSE1: andl $-16 | ||
; SSE1: movl %esp, %esi | ||
; SSE1: movaps | ||
|
||
; SSE2: test1: | ||
; SSE2: andl $-16 | ||
; SSE2: movl %esp, %esi | ||
; SSE2: movaps | ||
|
||
; AVX1: test1: | ||
; AVX1: andl $-32 | ||
; AVX1: movl %esp, %esi | ||
; AVX1: vmovaps %ymm | ||
|
||
; AVX2: test1: | ||
; AVX2: andl $-32 | ||
; AVX2: movl %esp, %esi | ||
; AVX2: vmovaps %ymm | ||
|
||
} | ||
|
||
define void @test2(i32 %t) nounwind { | ||
%tmp1210 = alloca i8, i32 16, align 4 | ||
call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 16, i32 4, i1 false) | ||
%x = alloca i8, i32 %t | ||
call void @dummy(i8* %x) | ||
ret void | ||
|
||
; NOSSE: test2: | ||
; NOSSE-NOT: and | ||
; NOSSE: movl $0 | ||
|
||
; SSE1: test2: | ||
; SSE1: andl $-16 | ||
; SSE1: movl %esp, %esi | ||
; SSE1: movaps | ||
|
||
; SSE2: test2: | ||
; SSE2: andl $-16 | ||
; SSE2: movl %esp, %esi | ||
; SSE2: movaps | ||
|
||
; AVX1: test2: | ||
; AVX1: andl $-16 | ||
; AVX1: movl %esp, %esi | ||
; AVX1: vmovaps %xmm | ||
|
||
; AVX2: test2: | ||
; AVX2: andl $-16 | ||
; AVX2: movl %esp, %esi | ||
; AVX2: vmovaps %xmm | ||
} | ||
|
||
declare void @dummy(i8*) | ||
|
||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters