Skip to content

Commit

Permalink
Merge pull request numpy#13520 from r-devulap/issue13512
Browse files Browse the repository at this point in the history
BUG: exp, log AVX loops do not use steps
  • Loading branch information
mattip authored May 16, 2019
2 parents b82869e + 4b4d2ab commit 7be5f11
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 14 deletions.
26 changes: 14 additions & 12 deletions numpy/core/src/umath/loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -1621,21 +1621,23 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
UNARY_LOOP {
/*
* We use the AVX function to compute exp/log for scalar elements as well.
* This is needed to ensure the output of strided and non-strided
* cases match. But this worsens the performance of strided arrays.
* There is plan to fix this in a subsequent patch by using gather
* instructions for strided arrays in the AVX function.
*/
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
@ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1);
#else
/*
* This is the path it would take if ISA was runtime detected, but not
* compiled for. It fixes the error on clang6.0 which fails to compile
* AVX512F version. Not sure if I like this idea, if during runtime it
* detects AXV512F, it will end up running the scalar version instead
* of AVX2.
*/
UNARY_LOOP {
const npy_float in1 = *(npy_float *)ip1;
*(npy_float *)op1 = @scalarf@(in1);
}
const npy_float in1 = *(npy_float *)ip1;
*(npy_float *)op1 = @scalarf@(in1);
#endif
}
}
}

/**end repeat1**/
Expand Down
20 changes: 18 additions & 2 deletions numpy/core/src/umath/simd.inc.src
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b)

/**begin repeat
* #ISA = AVX2, AVX512F#
* #isa = avx2, avx512f#
* #REGISTER_SIZE = 32, 64#
*/

/* prototypes */
#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS

/**begin repeat1
* #func = exp, log#
*/

#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
static NPY_INLINE void
@ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n);
#endif

/**end repeat1**/
static NPY_INLINE int
run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps)
{
#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) {
@ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
return 1;
}
else
return 0;
#endif
return 0;
}

/**end repeat1**/

/**end repeat**/

Expand Down
25 changes: 25 additions & 0 deletions numpy/core/tests/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,3 +1964,28 @@ def test_ufunc_types(ufunc):
assert r.dtype == np.dtype(t)
else:
assert res.dtype == np.dtype(out)

@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np)
if isinstance(getattr(np, x), np.ufunc)])
def test_ufunc_noncontiguous(ufunc):
'''
Check that contiguous and non-contiguous calls to ufuncs
have the same results for values in range(9)
'''
for typ in ufunc.types:
# types is a list of strings like ii->i
if any(set('O?mM') & set(typ)):
# bool, object, datetime are too irregular for this simple test
continue
inp, out = typ.split('->')
args_c = [np.empty(6, t) for t in inp]
args_n = [np.empty(18, t)[::3] for t in inp]
for a in args_c:
a.flat = range(1,7)
for a in args_n:
a.flat = range(1,7)
with warnings.catch_warnings(record=True):
warnings.filterwarnings("always")
res_c = ufunc(*args_c)
res_n = ufunc(*args_n)
assert_equal(res_c, res_n)

0 comments on commit 7be5f11

Please sign in to comment.