Skip to content

Commit 7b717b6

Browse files
committed
InstCombine/AMDGPU: Fix constant folding of llvm.amdgcn.{icmp,fcmp}
Summary: The return value of these intrinsics should always have 0 bits for inactive threads. This means that when all arguments are constant and the comparison evaluates to true, the intrinsic should return the current exec mask. Fixes some GL_ARB_shader_ballot tests. Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D32344 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301195 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8978f29 commit 7b717b6

File tree

2 files changed

+24
-4
lines changed

2 files changed

+24
-4
lines changed

lib/Transforms/InstCombine/InstCombineCalls.cpp

+20-2
Original file line numberDiff line numberDiff line change
@@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
34323432
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
34333433
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
34343434
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3435-
return replaceInstUsesWith(*II,
3436-
ConstantExpr::getSExt(CCmp, II->getType()));
3435+
if (CCmp->isNullValue()) {
3436+
return replaceInstUsesWith(
3437+
*II, ConstantExpr::getSExt(CCmp, II->getType()));
3438+
}
3439+
3440+
// The result of V_ICMP/V_FCMP assembly instructions (which this
3441+
// intrinsic exposes) is one bit per thread, masked with the EXEC
3442+
// register (which contains the bitmask of live threads). So a
3443+
// comparison that always returns true is the same as a read of the
3444+
// EXEC register.
3445+
Value *NewF = Intrinsic::getDeclaration(
3446+
II->getModule(), Intrinsic::read_register, II->getType());
3447+
Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3448+
MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3449+
Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3450+
CallInst *NewCall = Builder->CreateCall(NewF, Args);
3451+
NewCall->addAttribute(AttributeList::FunctionIndex,
3452+
Attribute::Convergent);
3453+
NewCall->takeName(II);
3454+
return replaceInstUsesWith(*II, NewCall);
34373455
}
34383456

34393457
// Canonicalize constants to RHS.

test/Transforms/InstCombine/amdgcn-intrinsics.ll

+4-2
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ define i64 @icmp_constant_inputs_false() {
12591259
}
12601260

12611261
; CHECK-LABEL: @icmp_constant_inputs_true(
1262-
; CHECK: ret i64 -1
1262+
; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
12631263
define i64 @icmp_constant_inputs_true() {
12641264
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
12651265
ret i64 %result
@@ -1524,7 +1524,7 @@ define i64 @fcmp_constant_inputs_false() {
15241524
}
15251525

15261526
; CHECK-LABEL: @fcmp_constant_inputs_true(
1527-
; CHECK: ret i64 -1
1527+
; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
15281528
define i64 @fcmp_constant_inputs_true() {
15291529
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
15301530
ret i64 %result
@@ -1536,3 +1536,5 @@ define i64 @fcmp_constant_to_rhs_olt(float %x) {
15361536
%result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
15371537
ret i64 %result
15381538
}
1539+
1540+
; CHECK: attributes #4 = { convergent }

0 commit comments

Comments
 (0)