Skip to content

Commit

Permalink
[SLPVectorizer] Vectorizing the libm sqrt to llvm's sqrt intrinsic re…
Browse files Browse the repository at this point in the history
…quires nnan

To quote the langref "Unlike sqrt in libm, however, llvm.sqrt has
undefined behavior for negative numbers other than -0.0 (which allows
for better optimization, because there is no need to worry about errno
being set). llvm.sqrt(-0.0) is defined to return -0.0 like IEEE sqrt."

This means that it's unsafe to replace sqrt with llvm.sqrt unless the
call is annotated with nnan.

Thanks to Hal Finkel for pointing this out!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265521 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
majnemer committed Apr 6, 2016
1 parent adb72b9 commit 8b680c2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
4 changes: 3 additions & 1 deletion lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,9 @@ Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
case LibFunc::sqrt:
case LibFunc::sqrtf:
case LibFunc::sqrtl:
return checkUnaryFloatSignature(*CI, Intrinsic::sqrt);
if (CI->hasNoNaNs())
return checkUnaryFloatSignature(*CI, Intrinsic::sqrt);
return Intrinsic::not_intrinsic;
}

return Intrinsic::not_intrinsic;
Expand Down
26 changes: 25 additions & 1 deletion test/Transforms/LoopVectorize/X86/veclib-calls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,31 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;CHECK-LABEL: @sqrt_f32(
;CHECK: vsqrtf{{.*}}<4 x float>
;CHECK: ret void
declare float @sqrtf(float) nounwind readnone
define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body, label %for.end

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
%call = tail call float @sqrtf(float %0) nounwind readnone
%arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
store float %call, float* %arrayidx2, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %entry
ret void
}

;CHECK-LABEL: @exp_f32(
;CHECK: vexpf{{.*}}<4 x float>
;CHECK: ret void
Expand Down Expand Up @@ -135,7 +160,6 @@ for.end: ; preds = %for.body, %entry
;CHECK-LABEL: @sqrt_f32_nobuiltin(
;CHECK-NOT: vsqrtf{{.*}}<4 x float>
;CHECK: ret void
declare float @sqrtf(float) nounwind readnone
define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
entry:
%cmp6 = icmp sgt i32 %n, 0
Expand Down
4 changes: 2 additions & 2 deletions test/Transforms/SLPVectorizer/X86/call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ entry:
%i0 = load double, double* %a, align 8
%i1 = load double, double* %b, align 8
%mul = fmul double %i0, %i1
%call = tail call double @sqrt(double %mul) nounwind readnone
%call = tail call nnan double @sqrt(double %mul) nounwind readnone
%arrayidx3 = getelementptr inbounds double, double* %a, i64 1
%i3 = load double, double* %arrayidx3, align 8
%arrayidx4 = getelementptr inbounds double, double* %b, i64 1
%i4 = load double, double* %arrayidx4, align 8
%mul5 = fmul double %i3, %i4
%call5 = tail call double @sqrt(double %mul5) nounwind readnone
%call5 = tail call nnan double @sqrt(double %mul5) nounwind readnone
store double %call, double* %c, align 8
%arrayidx5 = getelementptr inbounds double, double* %c, i64 1
store double %call5, double* %arrayidx5, align 8
Expand Down

0 comments on commit 8b680c2

Please sign in to comment.