Merge pull request numpy#14048 from r-devulap/transcendental-accuracy…

…-tests BUG, TEST: Adding validation test suite to validate float32 exp
esben · Jul 24, 2019 · 96951ed · 96951ed
2 parents 2d0f2dd + e086003
commit 96951ed
Show file tree

Hide file tree

Showing 7 changed files with 1,787 additions and 8 deletions.
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
@@ -1223,6 +1223,46 @@ avx2_get_mantissa(__m256 x)
                     _mm256_and_si256(
                         _mm256_castps_si256(x), mantissa_bits), exp_126_bits));
 }
+
+NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX2 __m256
+avx2_scalef_ps(__m256 poly, __m256 quadrant)
+{
+    /*
+     * Handle denormals (which occur when quadrant <= -125):
+     * 1) This function computes poly*(2^quad) by adding the exponent of
+     poly to quad
+     * 2) When quad <= -125, the output is a denormal and the above logic
+     breaks down
+     * 3) To handle such cases, we split quadrant: -125 + (quadrant + 125)
+     * 4) poly*(2^-125) is computed the usual way
+     * 5) 2^(quad-125) can be computed by: 2 << abs(quad-125)
+     * 6) The final div operation generates the denormal
+     */
+     __m256 minquadrant = _mm256_set1_ps(-125.0f);
+     __m256 denormal_mask = _mm256_cmp_ps(quadrant, minquadrant, _CMP_LE_OQ);
+     if (_mm256_movemask_ps(denormal_mask) != 0x0000) {
+        __m256 quad_diff = _mm256_sub_ps(quadrant, minquadrant);
+        quad_diff = _mm256_sub_ps(_mm256_setzero_ps(), quad_diff);
+        quad_diff = _mm256_blendv_ps(_mm256_setzero_ps(), quad_diff, denormal_mask);
+        __m256i two_power_diff = _mm256_sllv_epi32(
+                                   _mm256_set1_epi32(1), _mm256_cvtps_epi32(quad_diff));
+        quadrant = _mm256_max_ps(quadrant, minquadrant); //keep quadrant >= -126
+        __m256i exponent = _mm256_slli_epi32(_mm256_cvtps_epi32(quadrant), 23);
+        poly = _mm256_castsi256_ps(
+                   _mm256_add_epi32(
+                       _mm256_castps_si256(poly), exponent));
+        __m256 denorm_poly = _mm256_div_ps(poly, _mm256_cvtepi32_ps(two_power_diff));
+        return _mm256_blendv_ps(poly, denorm_poly, denormal_mask);
+     }
+     else {
+        __m256i exponent = _mm256_slli_epi32(_mm256_cvtps_epi32(quadrant), 23);
+        poly = _mm256_castsi256_ps(
+                   _mm256_add_epi32(
+                       _mm256_castps_si256(poly), exponent));
+        return poly;
+     }
+}
+
 #endif
 
 #if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
@@ -1276,6 +1316,12 @@ avx512_get_mantissa(__m512 x)
 {
     return _mm512_getmant_ps(x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
 }
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512
+avx512_scalef_ps(__m512 poly, __m512 quadrant)
+{
+    return _mm512_scalef_ps(poly, quadrant);
+}
 #endif
 
 /**begin repeat
@@ -1345,7 +1391,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     const npy_intp stride = steps/sizeof(npy_float);
     const npy_int num_lanes = @BYTES@/sizeof(npy_float);
     npy_float xmax = 88.72283935546875f;
-    npy_float xmin = -87.3365478515625f;
+    npy_float xmin = -103.97208404541015625f;
     npy_int indexarr[16];
     for (npy_int ii = 0; ii < 16; ii++) {
         indexarr[ii] = ii*stride;
@@ -1369,7 +1415,6 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     @vtype@ zeros_f = _mm@vsize@_set1_ps(0.0f);
     @vtype@ poly, num_poly, denom_poly, quadrant;
     @vtype@i vindex = _mm@vsize@_loadu_si@vsize@((@vtype@i*)&indexarr[0]);
-    @vtype@i exponent;
 
     @mask@ xmax_mask, xmin_mask, nan_mask, inf_mask;
     @mask@ overflow_mask = @isa@_get_partial_load_mask(0, num_lanes);
@@ -1426,10 +1471,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
          * exponent of quadrant to the exponent of poly. quadrant is an int,
          * so extracting exponent is simply extracting 8 bits.
          */
-        exponent = _mm@vsize@_slli_epi32(_mm@vsize@_cvtps_epi32(quadrant), 23);
-        poly = _mm@vsize@_castsi@vsize@_ps(
-                    _mm@vsize@_add_epi32(
-                        _mm@vsize@_castps_si@vsize@(poly), exponent));
+        poly = @isa@_scalef_ps(poly, quadrant);
 
         /*
          * elem > xmax; return inf
@@ -1494,6 +1536,7 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
     @vtype@ log_q5 = _mm@vsize@_set1_ps(NPY_COEFF_Q5_LOGf);
     @vtype@ loge2 = _mm@vsize@_set1_ps(NPY_LOGE2f);
     @vtype@ nan = _mm@vsize@_set1_ps(NPY_NANF);
+    @vtype@ neg_nan = _mm@vsize@_set1_ps(-NPY_NANF);
     @vtype@ neg_inf = _mm@vsize@_set1_ps(-NPY_INFINITYF);
     @vtype@ inf = _mm@vsize@_set1_ps(NPY_INFINITYF);
     @vtype@ zeros_f = _mm@vsize@_set1_ps(0.0f);
@@ -1560,11 +1603,12 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
         poly = @fmadd@(exponent, loge2, poly);
 
         /*
-         * x < 0.0f; return NAN
+         * x < 0.0f; return -NAN
          * x = +/- NAN; return NAN
          * x = 0.0f; return -INF
          */
-        poly = @isa@_set_masked_lanes(poly, nan, @or_masks@(negx_mask, nan_mask));
+        poly = @isa@_set_masked_lanes(poly, nan, nan_mask);
+        poly = @isa@_set_masked_lanes(poly, neg_nan, negx_mask);
         poly = @isa@_set_masked_lanes(poly, neg_inf, zero_mask);
         poly = @isa@_set_masked_lanes(poly, inf, inf_mask);
 

diff --git a/numpy/core/tests/data/umath-validation-set-README b/numpy/core/tests/data/umath-validation-set-README
@@ -0,0 +1,15 @@
+Steps to validate transcendental functions:
+1) Add a file 'umath-validation-set-<ufuncname>', where ufuncname is name of
+   the function in NumPy you want to validate
+2) The file should contain 4 columns: dtype,input,expected output,ulperror
+    a. dtype: one of np.float16, np.float32, np.float64
+    b. input: floating point input to ufunc in hex. Example: 0x414570a4
+       represents 12.340000152587890625
+    c. expected output: floating point output for the corresponding input in hex.
+       This should be computed using a high(er) precision library and then rounded to
+       same format as the input.
+    d. ulperror: expected maximum ulp error of the function. This
+       should be same across all rows of the same dtype. Otherwise, the function is
+       tested for the maximum ulp error among all entries of that dtype.
+3) Add file umath-validation-set-<ufuncname> to the test file test_umath_accuracy.py
+   which will then validate your ufunc.