Skip to content
This repository has been archived by the owner on Jan 1, 2023. It is now read-only.

Commit

Permalink
Fix vectorization of canonicalize
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342390 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Sep 17, 2018
1 parent ecfc726 commit cb96cd1
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 0 deletions.
3 changes: 3 additions & 0 deletions include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::fabs:
ISDs.push_back(ISD::FABS);
break;
case Intrinsic::canonicalize:
ISDs.push_back(ISD::FCANONICALIZE);
break;
case Intrinsic::minnum:
ISDs.push_back(ISD::FMINNUM);
if (FMF.noNaNs())
Expand Down
1 change: 1 addition & 0 deletions lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::powi:
case Intrinsic::canonicalize:
return true;
default:
return false;
Expand Down
17 changes: 17 additions & 0 deletions test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,25 @@ define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, ha
ret void
}

; GCN-LABEL: @canonicalize_v2f16
; GFX9: load <2 x half>
; GFX9: call <2 x half> @llvm.canonicalize.v2f16(
; GFX9: store <2 x half>
define amdgpu_kernel void @canonicalize_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) {
%i0 = load half, half addrspace(3)* %a, align 2
%canonicalize0 = call half @llvm.canonicalize.f16(half %i0)
%arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
%i3 = load half, half addrspace(3)* %arrayidx3, align 2
%canonicalize1 = call half @llvm.canonicalize.f16(half %i3)
store half %canonicalize0, half addrspace(3)* %c, align 2
%arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
store half %canonicalize1, half addrspace(3)* %arrayidx5, align 2
ret void
}

declare half @llvm.fabs.f16(half) #1
declare half @llvm.fma.f16(half, half, half) #1
declare half @llvm.canonicalize.f16(half) #1

attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }

0 comments on commit cb96cd1

Please sign in to comment.