From 812e359b50919fb99942c674098ce8b8c506ab5d Mon Sep 17 00:00:00 2001 From: Allan Haldane Date: Thu, 27 Dec 2018 21:45:26 -0500 Subject: [PATCH] BUG: fix uint alignment asserts in lowlevel loops --- doc/source/reference/alignment.rst | 24 ++++++----- .../core/src/multiarray/array_assign_array.c | 12 +++++- .../core/src/multiarray/array_assign_scalar.c | 17 +++++--- numpy/core/src/multiarray/common.h | 2 + numpy/core/src/multiarray/ctors.c | 3 +- numpy/core/src/multiarray/dtype_transfer.c | 15 ++++--- .../multiarray/lowlevel_strided_loops.c.src | 40 +++++++++---------- numpy/core/src/multiarray/mapping.c | 6 ++- numpy/core/src/multiarray/nditer_constr.c | 34 ++++++++++++---- numpy/core/tests/test_multiarray.py | 16 ++++++-- 10 files changed, 114 insertions(+), 55 deletions(-) diff --git a/doc/source/reference/alignment.rst b/doc/source/reference/alignment.rst index c749972b420d..b182a1e7b6b3 100644 --- a/doc/source/reference/alignment.rst +++ b/doc/source/reference/alignment.rst @@ -34,6 +34,14 @@ datatype is implemented as ``struct { float real, imag; }``. This has "true" alignment of 4 and "uint" alignment of 8 (equal to the true alignment of ``uint64``). +Some cases where uint and true alignment are different (default gcc linux): + arch type true-aln uint-aln + ---- ---- -------- -------- + x86_64 complex64 4 8 + x86_64 float128 16 8 + x86 float96 4 - + + Variables in Numpy which control and describe alignment ------------------------------------------------------- @@ -82,17 +90,15 @@ Here is how the variables above are used: appropriate N. Otherwise numpy copies by doing ``memcpy(dst, src, N)``. 5. Nditer code: Since this often calls the strided copy code, it must check for "uint alignment". - 6. Cast code: if the array is "uint aligned" this will essentially do - ``*dst = CASTFUNC(*src)``. If not, it does + 6. Cast code: This checks for "true" alignment, as it does + ``*dst = CASTFUNC(*src)`` if aligned. Otherwise, it does ``memmove(srcval, src); dstval = CASTFUNC(srcval); memmove(dst, dstval)`` where dstval/srcval are aligned. -Note that in principle, only "true alignment" is required for casting code. -However, because the casting code and copy code are deeply intertwined they -both use "uint" alignment. This should be safe assuming uint alignment is -always larger than true alignment, though it can cause unnecessary buffering if -an array is "true aligned" but not "uint aligned". If there is ever a big -rewrite of this code it would be good to allow them to use different -alignments. +Note that the strided-copy and strided-cast code are deeply intertwined and so +any arrays being processed by them must be both uint and true aligned, even +though te copy-code only needs uint alignment and the cast code only true +alignment. If there is ever a big rewrite of this code it would be good to +allow them to use different alignments. diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c index f692e03076f3..69a527dfab55 100644 --- a/numpy/core/src/multiarray/array_assign_array.c +++ b/numpy/core/src/multiarray/array_assign_array.c @@ -48,11 +48,15 @@ raw_array_assign_array(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; - /* Check alignment */ + /* Check both uint and true alignment */ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, npy_uint_alignment(dst_dtype->elsize)) && + raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + dst_dtype->alignment) && raw_array_is_aligned(ndim, shape, src_data, src_strides, npy_uint_alignment(src_dtype->elsize)); + raw_array_is_aligned(ndim, shape, src_data, src_strides, + src_dtype->alignment); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareTwoRawArrayIter( @@ -133,11 +137,15 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; - /* Check alignment */ + /* Check both uint and true alignment */ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, npy_uint_alignment(dst_dtype->elsize)) && + raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + dst_dtype->alignment) && raw_array_is_aligned(ndim, shape, src_data, src_strides, npy_uint_alignment(src_dtype->elsize)); + raw_array_is_aligned(ndim, shape, src_data, src_strides, + src_dtype->alignment); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareThreeRawArrayIter( diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c index 841a41850793..ecb5be47b50a 100644 --- a/numpy/core/src/multiarray/array_assign_scalar.c +++ b/numpy/core/src/multiarray/array_assign_scalar.c @@ -45,10 +45,13 @@ raw_array_assign_scalar(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; - /* Check alignment */ + /* Check both uint and true alignment */ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, npy_uint_alignment(dst_dtype->elsize)) && - npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)); + raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + dst_dtype->alignment) && + npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize) && + npy_is_aligned(src_data, src_dtype->alignment)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareOneRawArrayIter( @@ -116,10 +119,13 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; - /* Check alignment */ + /* Check both uint and true alignment */ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, npy_uint_alignment(dst_dtype->elsize)) && - npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)); + raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + dst_dtype->alignment) && + npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize) && + npy_is_aligned(src_data, src_dtype->alignment)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareTwoRawArrayIter( @@ -220,7 +226,8 @@ PyArray_AssignRawScalar(PyArrayObject *dst, * we also skip this if 'dst' has an object dtype. */ if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) || - !npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize))) && + !(npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)) && + npy_is_aligned(src_data, src_dtype->alignment))) && PyArray_SIZE(dst) > 1 && !PyDataType_REFCHK(PyArray_DESCR(dst))) { char *tmp_src_data; diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 2b8d3d3a42ea..0e162903d4ea 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -182,6 +182,7 @@ check_and_adjust_axis(int *axis, int ndim) /* used for some alignment checks */ #define _ALIGN(type) offsetof(struct {char c; type v;}, v) +#define _UINT_ALIGN(type) npy_uint_alignment(sizeof(type)) /* * Disable harmless compiler warning "4116: unnamed type definition in * parentheses" which is caused by the _ALIGN macro. @@ -201,6 +202,7 @@ npy_is_aligned(const void * p, const npy_uintp alignment) * Assumes cast from pointer to uintp gives a sensible representation we * can use bitwise & on (not required by C standard, but used by glibc). * This test is faster than a direct modulo. + * Note alignment value of 0 is allowed and returns False. */ return ((npy_uintp)(p) & ((alignment) - 1)) == 0; } diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index f77e414daa6d..a17621946014 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2827,7 +2827,8 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) * contiguous strides, etc. */ if (PyArray_GetDTypeTransferFunction( - IsUintAligned(src) && IsUintAligned(dst), + IsUintAligned(src) && IsAligned(src) && + IsUintAligned(dst) && IsAligned(dst), src_stride, dst_stride, PyArray_DESCR(src), PyArray_DESCR(dst), 0, diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 2b29d4f8c9dc..63b1ead25cb3 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -26,6 +26,7 @@ #include "_datetime.h" #include "datetime_strings.h" #include "descriptor.h" +#include "array_assign.h" #include "shape.h" #include "lowlevel_strided_loops.h" @@ -3765,11 +3766,15 @@ PyArray_CastRawArrays(npy_intp count, return NPY_SUCCEED; } - /* Check data alignment */ - aligned = (((npy_intp)src | src_stride) & - (src_dtype->alignment - 1)) == 0 && - (((npy_intp)dst | dst_stride) & - (dst_dtype->alignment - 1)) == 0; + /* Check data alignment, both uint and true */ + aligned = raw_array_is_aligned(1, &count, dst, &dst_stride, + npy_uint_alignment(dst_dtype->elsize)) && + raw_array_is_aligned(1, &count, dst, &dst_stride, + dst_dtype->alignment) && + raw_array_is_aligned(1, &count, src, &src_stride, + npy_uint_alignment(src_dtype->elsize)) && + raw_array_is_aligned(1, &count, src, &src_stride, + src_dtype->alignment); /* Get the function to do the casting */ if (PyArray_GetDTypeTransferFunction(aligned, diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 896e466c8930..16bacf1abc48 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -82,7 +82,7 @@ /**begin repeat * #elsize = 1, 2, 4, 8, 16# * #elsize_half = 0, 1, 2, 4, 8# - * #type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint128# + * #type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint64# */ /**begin repeat1 * #oper = strided_to_strided, strided_to_contig, @@ -119,10 +119,10 @@ static void npy_intp N, npy_intp NPY_UNUSED(src_itemsize), NpyAuxData *NPY_UNUSED(data)) { -#if @is_aligned@ && @elsize@ != 16 +#if @is_aligned@ /* sanity check */ - assert(N == 0 || npy_is_aligned(dst, _ALIGN(@type@))); - assert(N == 0 || npy_is_aligned(src, _ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@))); #endif /*printf("fn @prefix@_@oper@_size@elsize@\n");*/ while (N > 0) { @@ -201,8 +201,8 @@ static NPY_GCC_OPT_3 void } #if @is_aligned@ && @elsize@ != 16 /* sanity check */ - assert(N == 0 || npy_is_aligned(dst, _ALIGN(@type@))); - assert(N == 0 || npy_is_aligned(src, _ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@))); + assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@))); #endif #if @elsize@ == 1 && @dst_contig@ memset(dst, *src, N); @@ -808,12 +808,8 @@ static NPY_GCC_OPT_3 void #if @aligned@ /* sanity check */ -# if !@is_complex1@ assert(N == 0 || npy_is_aligned(src, _ALIGN(_TYPE1))); -# endif -# if !@is_complex2@ assert(N == 0 || npy_is_aligned(dst, _ALIGN(_TYPE2))); -# endif #endif /*printf("@prefix@_cast_@name1@_to_@name2@\n");*/ @@ -1425,7 +1421,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, while (itersize--) { char * self_ptr; npy_intp indval = *((npy_intp*)ind_ptr); - assert(npy_is_aligned(ind_ptr, _ALIGN(npy_intp))); + assert(npy_is_aligned(ind_ptr, _UINT_ALIGN(npy_intp))); #if @isget@ if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) { return -1; @@ -1439,8 +1435,8 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, #if @isget@ #if @elsize@ - assert(npy_is_aligned(result_ptr, _ALIGN(@copytype@))); - assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@))); + assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@))); *(@copytype@ *)result_ptr = *(@copytype@ *)self_ptr; #else copyswap(result_ptr, self_ptr, 0, self); @@ -1448,8 +1444,8 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, #else /* !@isget@ */ #if @elsize@ - assert(npy_is_aligned(result_ptr, _ALIGN(@copytype@))); - assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@))); + assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@))); *(@copytype@ *)self_ptr = *(@copytype@ *)result_ptr; #else copyswap(self_ptr, result_ptr, 0, self); @@ -1571,7 +1567,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) for (i=0; i < @numiter@; i++) { npy_intp indval = *((npy_intp*)outer_ptrs[i]); assert(npy_is_aligned(outer_ptrs[i], - _ALIGN(npy_intp))); + _UINT_ALIGN(npy_intp))); #if @isget@ && @one_iter@ if (check_and_adjust_index(&indval, fancy_dims[i], @@ -1591,16 +1587,20 @@ mapiter_@name@(PyArrayMapIterObject *mit) #if @isget@ #if @elsize@ - assert(npy_is_aligned(outer_ptrs[i], _ALIGN(@copytype@))); - assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@))); + assert(npy_is_aligned(outer_ptrs[i], + _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, + _UINT_ALIGN(@copytype@))); *(@copytype@ *)(outer_ptrs[i]) = *(@copytype@ *)self_ptr; #else copyswap(outer_ptrs[i], self_ptr, 0, array); #endif #else /* !@isget@ */ #if @elsize@ - assert(npy_is_aligned(outer_ptrs[i], _ALIGN(@copytype@))); - assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@))); + assert(npy_is_aligned(outer_ptrs[i], + _UINT_ALIGN(@copytype@))); + assert(npy_is_aligned(self_ptr, + _UINT_ALIGN(@copytype@))); *(@copytype@ *)self_ptr = *(@copytype@ *)(outer_ptrs[i]); #else copyswap(self_ptr, outer_ptrs[i], 0, array); diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 1b05faeebf37..17edd2bbf474 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -1064,7 +1064,8 @@ array_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); - if (PyArray_GetDTypeTransferFunction(IsUintAligned(self), + if (PyArray_GetDTypeTransferFunction( + IsUintAligned(self) && IsAligned(self), fixed_strides[0], itemsize, dtype, dtype, 0, @@ -1253,7 +1254,8 @@ array_assign_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); if (PyArray_GetDTypeTransferFunction( - IsUintAligned(self) && IsUintAligned(v), + IsUintAligned(self) && IsAligned(self) && + IsUintAligned(v) && IsAligned(v), v_stride, fixed_strides[0], PyArray_DESCR(v), PyArray_DESCR(self), 0, diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index dbb24f26bc00..c0341f8828bf 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -1132,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op, /* Check if the operand is aligned */ if (op_flags & NPY_ITER_ALIGNED) { /* Check alignment */ - if (!IsUintAligned(*op)) { + if (!(IsUintAligned(*op) && IsAligned(*op))) { NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " "because of NPY_ITER_ALIGNED\n"); *op_itflags |= NPY_OP_ITFLAG_CAST; @@ -2851,8 +2851,14 @@ npyiter_allocate_arrays(NpyIter *iter, npyiter_replace_axisdata(iter, iop, op[iop], ondim, PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); - /* New arrays are aligned and need no cast */ - op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + /* + * New arrays are guaranteed true-aligned, but copy/cast code + * needs uint-alignment in addition. + */ + if (IsUintAligned(out)) { + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + } + /* New arrays need no cast */ op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; } /* @@ -2888,11 +2894,17 @@ npyiter_allocate_arrays(NpyIter *iter, PyArray_DATA(op[iop]), NULL); /* - * New arrays are aligned need no cast, and in the case + * New arrays are guaranteed true-aligned, but copy/cast code + * needs uint-alignment in addition. + */ + if (IsUintAligned(temp)) { + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + } + /* + * New arrays need no cast, and in the case * of scalars, always have stride 0 so never need buffering */ - op_itflags[iop] |= (NPY_OP_ITFLAG_ALIGNED | - NPY_OP_ITFLAG_BUFNEVER); + op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER; op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; if (itflags & NPY_ITFLAG_BUFFER) { NBF_STRIDES(bufferdata)[iop] = 0; @@ -2953,8 +2965,14 @@ npyiter_allocate_arrays(NpyIter *iter, npyiter_replace_axisdata(iter, iop, op[iop], ondim, PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL); - /* The temporary copy is aligned and needs no cast */ - op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + /* + * New arrays are guaranteed true-aligned, but copy/cast code + * additionally needs uint-alignment in addition. + */ + if (IsUintAligned(temp)) { + op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; + } + /* The temporary copy needs no cast */ op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST; } else { diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 91f4526bd439..70cf74c4a19d 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -54,7 +54,12 @@ def _aligned_zeros(shape, dtype=float, order="C", align=None): - """Allocate a new ndarray with aligned memory.""" + """ + Allocate a new ndarray with aligned memory. + + The ndarray is guaranteed *not* aligned to twice the requested alignment. + Eg, if align=4, guarantees it is not aligned to 8. If align=None uses + dtype.alignment.""" dtype = np.dtype(dtype) if dtype == np.dtype(object): # Can't do this, fall back to standard allocation (which @@ -67,10 +72,15 @@ def _aligned_zeros(shape, dtype=float, order="C", align=None): if not hasattr(shape, '__len__'): shape = (shape,) size = functools.reduce(operator.mul, shape) * dtype.itemsize - buf = np.empty(size + align + 1, np.uint8) - offset = buf.__array_interface__['data'][0] % align + buf = np.empty(size + 2*align + 1, np.uint8) + + ptr = buf.__array_interface__['data'][0] + offset = ptr % align if offset != 0: offset = align - offset + if (ptr % (2*align)) == 0: + offset += align + # Note: slices producing 0-size arrays do not necessarily change # data pointer --- so we use and allocate size+1 buf = buf[offset:offset+size+1][:-1]