forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[RegisterCoalescing] Remove partial redundent copy.
The patch is to solve the performance problem described in PR27827. Register coalescing sometimes cannot remove a copy because of interference. But if we can find a reverse copy in one of the predecessor block of the copy, the copy is partially redundent and we may remove the copy partially by moving it to the predecessor block without the reverse copy. Differential Revision: https://reviews.llvm.org/D28585 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292292 91177308-0d34-0410-b5e6-96231b3b80d8
- Loading branch information
Showing
3 changed files
with
344 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
; RUN: llc -regalloc=greedy -mtriple=x86_64-unknown-linux-gnu < %s -o - | FileCheck %s | ||
; | ||
; The test is to check no redundent mov as follows will be generated in %while.body loop. | ||
; .LBB0_2: | ||
; movsbl %cl, %ecx | ||
; movl %edx, %eax ==> This movl can be promoted outside of loop. | ||
; shll $5, %eax | ||
; ... | ||
; movl %eax, %edx | ||
; jne .LBB0_2 | ||
; | ||
; CHECK-LABEL: foo: | ||
; CHECK: [[L0:.LBB0_[0-9]+]]: # %while.body | ||
; CHECK: movl %[[REGA:.*]], %[[REGB:.*]] | ||
; CHECK-NOT: movl %[[REGB]], %[[REGA]] | ||
; CHECK: jne [[L0]] | ||
; | ||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
|
||
@b = common local_unnamed_addr global i8* null, align 8 | ||
@a = common local_unnamed_addr global i32 0, align 4 | ||
|
||
define i32 @foo() local_unnamed_addr { | ||
entry: | ||
%t0 = load i8*, i8** @b, align 8 | ||
%t1 = load i8, i8* %t0, align 1 | ||
%cmp4 = icmp eq i8 %t1, 0 | ||
%t2 = load i32, i32* @a, align 4 | ||
br i1 %cmp4, label %while.end, label %while.body.preheader | ||
|
||
while.body.preheader: ; preds = %entry | ||
br label %while.body | ||
|
||
while.body: ; preds = %while.body.preheader, %while.body | ||
%t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] | ||
%t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] | ||
%conv = sext i8 %t4 to i32 | ||
%add = mul i32 %t3, 33 | ||
%add3 = add nsw i32 %add, %conv | ||
store i32 %add3, i32* @a, align 4 | ||
%t5 = load i8, i8* %t0, align 1 | ||
%cmp = icmp eq i8 %t5, 0 | ||
br i1 %cmp, label %while.end.loopexit, label %while.body | ||
|
||
while.end.loopexit: ; preds = %while.body | ||
br label %while.end | ||
|
||
while.end: ; preds = %while.end.loopexit, %entry | ||
%.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.end.loopexit ] | ||
ret i32 %.lcssa | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass simple-register-coalescing -o - %s | FileCheck %s | ||
# Check there is no partial redundent copy left in the loop after register coalescing. | ||
--- | | ||
; ModuleID = '<stdin>' | ||
source_filename = "<stdin>" | ||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
@b = common local_unnamed_addr global i8* null, align 8 | ||
@a = common local_unnamed_addr global i32 0, align 4 | ||
|
||
define i32 @foo() local_unnamed_addr { | ||
entry: | ||
%t0 = load i8*, i8** @b, align 8 | ||
%t1 = load i8, i8* %t0, align 1 | ||
%cmp4 = icmp eq i8 %t1, 0 | ||
%t2 = load i32, i32* @a, align 4 | ||
br i1 %cmp4, label %while.end, label %while.body.preheader | ||
|
||
while.body.preheader: ; preds = %entry | ||
br label %while.body | ||
|
||
while.body: ; preds = %while.body, %while.body.preheader | ||
%t3 = phi i32 [ %add3, %while.body ], [ %t2, %while.body.preheader ] | ||
%t4 = phi i8 [ %t5, %while.body ], [ %t1, %while.body.preheader ] | ||
%conv = sext i8 %t4 to i32 | ||
%add = mul i32 %t3, 33 | ||
%add3 = add nsw i32 %add, %conv | ||
store i32 %add3, i32* @a, align 4 | ||
%t5 = load i8, i8* %t0, align 1 | ||
%cmp = icmp eq i8 %t5, 0 | ||
br i1 %cmp, label %while.end, label %while.body | ||
|
||
while.end: ; preds = %while.body, %entry | ||
%.lcssa = phi i32 [ %t2, %entry ], [ %add3, %while.body ] | ||
ret i32 %.lcssa | ||
} | ||
|
||
... | ||
--- | ||
# Check A = B and B = A copies will not exist in the loop at the same time. | ||
# CHECK: name: foo | ||
# CHECK: [[L1:bb.3.while.body]]: | ||
# CHECK: %[[REGA:.*]] = COPY %[[REGB:.*]] | ||
# CHECK-NOT: %[[REGB]] = COPY %[[REGA]] | ||
# CHECK: JNE_1 %[[L1]] | ||
|
||
name: foo | ||
alignment: 4 | ||
exposesReturnsTwice: false | ||
legalized: false | ||
regBankSelected: false | ||
selected: false | ||
tracksRegLiveness: true | ||
registers: | ||
- { id: 0, class: gr64 } | ||
- { id: 1, class: gr8 } | ||
- { id: 2, class: gr32 } | ||
- { id: 3, class: gr32 } | ||
- { id: 4, class: gr8 } | ||
- { id: 5, class: gr32 } | ||
- { id: 6, class: gr8 } | ||
- { id: 7, class: gr32 } | ||
- { id: 8, class: gr32 } | ||
- { id: 9, class: gr32 } | ||
- { id: 10, class: gr32 } | ||
- { id: 11, class: gr32 } | ||
- { id: 12, class: gr8 } | ||
- { id: 13, class: gr32 } | ||
frameInfo: | ||
isFrameAddressTaken: false | ||
isReturnAddressTaken: false | ||
hasStackMap: false | ||
hasPatchPoint: false | ||
stackSize: 0 | ||
offsetAdjustment: 0 | ||
maxAlignment: 0 | ||
adjustsStack: false | ||
hasCalls: false | ||
maxCallFrameSize: 0 | ||
hasOpaqueSPAdjustment: false | ||
hasVAStart: false | ||
hasMustTailInVarArgFunc: false | ||
body: | | ||
bb.0.entry: | ||
successors: %bb.4(0x30000000), %bb.1.while.body.preheader(0x50000000) | ||
%0 = MOV64rm %rip, 1, _, @b, _ :: (dereferenceable load 8 from @b) | ||
%12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) | ||
TEST8rr %12, %12, implicit-def %eflags | ||
%11 = MOV32rm %rip, 1, _, @a, _ :: (dereferenceable load 4 from @a) | ||
JNE_1 %bb.1.while.body.preheader, implicit killed %eflags | ||
bb.4: | ||
successors: %bb.3.while.end(0x80000000) | ||
%10 = COPY %11 | ||
JMP_1 %bb.3.while.end | ||
bb.1.while.body.preheader: | ||
successors: %bb.2.while.body(0x80000000) | ||
bb.2.while.body: | ||
successors: %bb.3.while.end(0x04000000), %bb.2.while.body(0x7c000000) | ||
%8 = MOVSX32rr8 %12 | ||
%10 = COPY %11 | ||
%10 = SHL32ri %10, 5, implicit-def dead %eflags | ||
%10 = ADD32rr %10, %11, implicit-def dead %eflags | ||
%10 = ADD32rr %10, %8, implicit-def dead %eflags | ||
MOV32mr %rip, 1, _, @a, _, %10 :: (store 4 into @a) | ||
%12 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.t0) | ||
TEST8rr %12, %12, implicit-def %eflags | ||
%11 = COPY %10 | ||
JNE_1 %bb.2.while.body, implicit killed %eflags | ||
JMP_1 %bb.3.while.end | ||
bb.3.while.end: | ||
%eax = COPY %10 | ||
RET 0, killed %eax | ||
... |