From 699f4c431de74c28e66ace05d9e83223993c7a13 Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Mon, 26 Jun 2017 16:00:24 +0000 Subject: [PATCH] [X86][AVX-512] Don't raise inexact in ceil, floor, round, trunc. The non-AVX-512 behavior was changed in r248266 to match N1778 (C bindings for IEEE-754 (2008)), which defined the four functions to not raise the inexact exception ("rint" is still defined as raising it). Update the AVX-512 lowering of these functions to match that: it should not be different. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306299 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 24 ++++++++++++------------ test/CodeGen/X86/avx512-round.ll | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d46262573f71..01a70323224c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7265,13 +7265,13 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { let Predicates = [HasAVX512] in { def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x1))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>; def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x2))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>; def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), - (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x3))), _.FRC)>; + (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>; def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS (_.VT (!cast(NAME##r) (_.VT (IMPLICIT_DEF)), (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>; @@ -7281,13 +7281,13 @@ avx512_rndscale_scalar opc, string OpcodeStr, X86VectorVTInfo _> { def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x1))), _.FRC)>; + addr:$src, (i32 0x9))), _.FRC)>; def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x2))), _.FRC)>; + addr:$src, (i32 0xa))), _.FRC)>; def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), - addr:$src, (i32 0x3))), _.FRC)>; + addr:$src, (i32 0xb))), _.FRC)>; def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS (_.VT (!cast(NAME##m) (_.VT (IMPLICIT_DEF)), addr:$src, (i32 0x4))), _.FRC)>; @@ -8471,26 +8471,26 @@ multiclass avx512_shuff_packed_128; + (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>; def : Pat<(v16f32 (fnearbyint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>; def : Pat<(v16f32 (fceil VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>; def : Pat<(v16f32 (frint VR512:$src)), (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>; def : Pat<(v16f32 (ftrunc VR512:$src)), - (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>; def : Pat<(v8f64 (ffloor VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>; def : Pat<(v8f64 (fnearbyint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>; def : Pat<(v8f64 (fceil VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>; def : Pat<(v8f64 (frint VR512:$src)), (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>; def : Pat<(v8f64 (ftrunc VR512:$src)), - (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>; + (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>; } defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>, diff --git a/test/CodeGen/X86/avx512-round.ll b/test/CodeGen/X86/avx512-round.ll index c4f417e75ab0..b23af2b09a78 100644 --- a/test/CodeGen/X86/avx512-round.ll +++ b/test/CodeGen/X86/avx512-round.ll @@ -2,7 +2,7 @@ define <16 x float> @floor_v16f32(<16 x float> %a) { ; CHECK-LABEL: floor_v16f32 -; CHECK: vrndscaleps $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x01] +; CHECK: vrndscaleps $9, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x09] %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %a) ret <16 x float> %res } @@ -10,7 +10,7 @@ declare <16 x float> @llvm.floor.v16f32(<16 x float> %p) define <8 x double> @floor_v8f64(<8 x double> %a) { ; CHECK-LABEL: floor_v8f64 -; CHECK: vrndscalepd $1, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x01] +; CHECK: vrndscalepd $9, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x09] %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %a) ret <8 x double> %res } @@ -18,7 +18,7 @@ declare <8 x double> @llvm.floor.v8f64(<8 x double> %p) define <16 x float> @ceil_v16f32(<16 x float> %a) { ; CHECK-LABEL: ceil_v16f32 -; CHECK: vrndscaleps $2, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x02] +; CHECK: vrndscaleps $10, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0a] %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %a) ret <16 x float> %res } @@ -26,7 +26,7 @@ declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p) define <8 x double> @ceil_v8f64(<8 x double> %a) { ; CHECK-LABEL: ceil_v8f64 -; CHECK: vrndscalepd $2, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x02] +; CHECK: vrndscalepd $10, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0a] %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %a) ret <8 x double> %res } @@ -34,7 +34,7 @@ declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p) define <16 x float> @trunc_v16f32(<16 x float> %a) { ; CHECK-LABEL: trunc_v16f32 -; CHECK: vrndscaleps $3, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x03] +; CHECK: vrndscaleps $11, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b] %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %a) ret <16 x float> %res } @@ -42,7 +42,7 @@ declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p) define <8 x double> @trunc_v8f64(<8 x double> %a) { ; CHECK-LABEL: trunc_v8f64 -; CHECK: vrndscalepd $3, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x03] +; CHECK: vrndscalepd $11, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b] %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %a) ret <8 x double> %res } @@ -90,7 +90,7 @@ declare double @llvm.nearbyint.f64(double %p) define float @floor_f32(float %a) { ; CHECK-LABEL: floor_f32 -; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x01] +; CHECK: vrndscaless $9, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x09] %res = call float @llvm.floor.f32(float %a) ret float %res } @@ -98,7 +98,7 @@ declare float @llvm.floor.f32(float %p) define float @floor_f32m(float* %aptr) { ; CHECK-LABEL: floor_f32m -; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01] +; CHECK: vrndscaless $9, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x09] %a = load float, float* %aptr, align 4 %res = call float @llvm.floor.f32(float %a) ret float %res