Skip to content

Commit

Permalink
Merge pull request numpy#12626 from ahaldane/further_uint_align_fix
Browse files Browse the repository at this point in the history
BUG: fix uint alignment asserts in lowlevel loops
  • Loading branch information
charris authored Jan 3, 2019
2 parents 0ea890a + 812e359 commit fd89a41
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 55 deletions.
24 changes: 15 additions & 9 deletions doc/source/reference/alignment.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ datatype is implemented as ``struct { float real, imag; }``. This has "true"
alignment of 4 and "uint" alignment of 8 (equal to the true alignment of
``uint64``).

Some cases where uint and true alignment are different (default gcc linux):
arch type true-aln uint-aln
---- ---- -------- --------
x86_64 complex64 4 8
x86_64 float128 16 8
x86 float96 4 -


Variables in Numpy which control and describe alignment
-------------------------------------------------------

Expand Down Expand Up @@ -82,17 +90,15 @@ Here is how the variables above are used:
appropriate N. Otherwise numpy copies by doing ``memcpy(dst, src, N)``.
5. Nditer code: Since this often calls the strided copy code, it must
check for "uint alignment".
6. Cast code: if the array is "uint aligned" this will essentially do
``*dst = CASTFUNC(*src)``. If not, it does
6. Cast code: This checks for "true" alignment, as it does
``*dst = CASTFUNC(*src)`` if aligned. Otherwise, it does
``memmove(srcval, src); dstval = CASTFUNC(srcval); memmove(dst, dstval)``
where dstval/srcval are aligned.

Note that in principle, only "true alignment" is required for casting code.
However, because the casting code and copy code are deeply intertwined they
both use "uint" alignment. This should be safe assuming uint alignment is
always larger than true alignment, though it can cause unnecessary buffering if
an array is "true aligned" but not "uint aligned". If there is ever a big
rewrite of this code it would be good to allow them to use different
alignments.
Note that the strided-copy and strided-cast code are deeply intertwined and so
any arrays being processed by them must be both uint and true aligned, even
though te copy-code only needs uint alignment and the cast code only true
alignment. If there is ever a big rewrite of this code it would be good to
allow them to use different alignments.


12 changes: 10 additions & 2 deletions numpy/core/src/multiarray/array_assign_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,15 @@ raw_array_assign_array(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check alignment */
/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
raw_array_is_aligned(ndim, shape, src_data, src_strides,
npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, src_data, src_strides,
src_dtype->alignment);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
Expand Down Expand Up @@ -133,11 +137,15 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check alignment */
/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
raw_array_is_aligned(ndim, shape, src_data, src_strides,
npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, src_data, src_strides,
src_dtype->alignment);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareThreeRawArrayIter(
Expand Down
17 changes: 12 additions & 5 deletions numpy/core/src/multiarray/array_assign_scalar.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@ raw_array_assign_scalar(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check alignment */
/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize) &&
npy_is_aligned(src_data, src_dtype->alignment));

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareOneRawArrayIter(
Expand Down Expand Up @@ -116,10 +119,13 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,

NPY_BEGIN_THREADS_DEF;

/* Check alignment */
/* Check both uint and true alignment */
aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
npy_uint_alignment(dst_dtype->elsize)) &&
npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
dst_dtype->alignment) &&
npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize) &&
npy_is_aligned(src_data, src_dtype->alignment));

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
Expand Down Expand Up @@ -220,7 +226,8 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
* we also skip this if 'dst' has an object dtype.
*/
if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) ||
!npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize))) &&
!(npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)) &&
npy_is_aligned(src_data, src_dtype->alignment))) &&
PyArray_SIZE(dst) > 1 &&
!PyDataType_REFCHK(PyArray_DESCR(dst))) {
char *tmp_src_data;
Expand Down
2 changes: 2 additions & 0 deletions numpy/core/src/multiarray/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ check_and_adjust_axis(int *axis, int ndim)

/* used for some alignment checks */
#define _ALIGN(type) offsetof(struct {char c; type v;}, v)
#define _UINT_ALIGN(type) npy_uint_alignment(sizeof(type))
/*
* Disable harmless compiler warning "4116: unnamed type definition in
* parentheses" which is caused by the _ALIGN macro.
Expand All @@ -201,6 +202,7 @@ npy_is_aligned(const void * p, const npy_uintp alignment)
* Assumes cast from pointer to uintp gives a sensible representation we
* can use bitwise & on (not required by C standard, but used by glibc).
* This test is faster than a direct modulo.
* Note alignment value of 0 is allowed and returns False.
*/
return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
}
Expand Down
3 changes: 2 additions & 1 deletion numpy/core/src/multiarray/ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -2828,7 +2828,8 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
* contiguous strides, etc.
*/
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(src) && IsUintAligned(dst),
IsUintAligned(src) && IsAligned(src) &&
IsUintAligned(dst) && IsAligned(dst),
src_stride, dst_stride,
PyArray_DESCR(src), PyArray_DESCR(dst),
0,
Expand Down
15 changes: 10 additions & 5 deletions numpy/core/src/multiarray/dtype_transfer.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "_datetime.h"
#include "datetime_strings.h"
#include "descriptor.h"
#include "array_assign.h"

#include "shape.h"
#include "lowlevel_strided_loops.h"
Expand Down Expand Up @@ -3765,11 +3766,15 @@ PyArray_CastRawArrays(npy_intp count,
return NPY_SUCCEED;
}

/* Check data alignment */
aligned = (((npy_intp)src | src_stride) &
(src_dtype->alignment - 1)) == 0 &&
(((npy_intp)dst | dst_stride) &
(dst_dtype->alignment - 1)) == 0;
/* Check data alignment, both uint and true */
aligned = raw_array_is_aligned(1, &count, dst, &dst_stride,
npy_uint_alignment(dst_dtype->elsize)) &&
raw_array_is_aligned(1, &count, dst, &dst_stride,
dst_dtype->alignment) &&
raw_array_is_aligned(1, &count, src, &src_stride,
npy_uint_alignment(src_dtype->elsize)) &&
raw_array_is_aligned(1, &count, src, &src_stride,
src_dtype->alignment);

/* Get the function to do the casting */
if (PyArray_GetDTypeTransferFunction(aligned,
Expand Down
40 changes: 20 additions & 20 deletions numpy/core/src/multiarray/lowlevel_strided_loops.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
/**begin repeat
* #elsize = 1, 2, 4, 8, 16#
* #elsize_half = 0, 1, 2, 4, 8#
* #type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint128#
* #type = npy_uint8, npy_uint16, npy_uint32, npy_uint64, npy_uint64#
*/
/**begin repeat1
* #oper = strided_to_strided, strided_to_contig,
Expand Down Expand Up @@ -119,10 +119,10 @@ static void
npy_intp N, npy_intp NPY_UNUSED(src_itemsize),
NpyAuxData *NPY_UNUSED(data))
{
#if @is_aligned@ && @elsize@ != 16
#if @is_aligned@
/* sanity check */
assert(N == 0 || npy_is_aligned(dst, _ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(src, _ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@)));
#endif
/*printf("fn @prefix@_@oper@_size@elsize@\n");*/
while (N > 0) {
Expand Down Expand Up @@ -201,8 +201,8 @@ static NPY_GCC_OPT_3 void
}
#if @is_aligned@ && @elsize@ != 16
/* sanity check */
assert(N == 0 || npy_is_aligned(dst, _ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(src, _ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(dst, _UINT_ALIGN(@type@)));
assert(N == 0 || npy_is_aligned(src, _UINT_ALIGN(@type@)));
#endif
#if @elsize@ == 1 && @dst_contig@
memset(dst, *src, N);
Expand Down Expand Up @@ -808,12 +808,8 @@ static NPY_GCC_OPT_3 void

#if @aligned@
/* sanity check */
# if !@is_complex1@
assert(N == 0 || npy_is_aligned(src, _ALIGN(_TYPE1)));
# endif
# if !@is_complex2@
assert(N == 0 || npy_is_aligned(dst, _ALIGN(_TYPE2)));
# endif
#endif

/*printf("@prefix@_cast_@name1@_to_@name2@\n");*/
Expand Down Expand Up @@ -1425,7 +1421,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
while (itersize--) {
char * self_ptr;
npy_intp indval = *((npy_intp*)ind_ptr);
assert(npy_is_aligned(ind_ptr, _ALIGN(npy_intp)));
assert(npy_is_aligned(ind_ptr, _UINT_ALIGN(npy_intp)));
#if @isget@
if (check_and_adjust_index(&indval, fancy_dim, 0, _save) < 0 ) {
return -1;
Expand All @@ -1439,17 +1435,17 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,

#if @isget@
#if @elsize@
assert(npy_is_aligned(result_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@)));
*(@copytype@ *)result_ptr = *(@copytype@ *)self_ptr;
#else
copyswap(result_ptr, self_ptr, 0, self);
#endif

#else /* !@isget@ */
#if @elsize@
assert(npy_is_aligned(result_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(result_ptr, _UINT_ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _UINT_ALIGN(@copytype@)));
*(@copytype@ *)self_ptr = *(@copytype@ *)result_ptr;
#else
copyswap(self_ptr, result_ptr, 0, self);
Expand Down Expand Up @@ -1571,7 +1567,7 @@ mapiter_@name@(PyArrayMapIterObject *mit)
for (i=0; i < @numiter@; i++) {
npy_intp indval = *((npy_intp*)outer_ptrs[i]);
assert(npy_is_aligned(outer_ptrs[i],
_ALIGN(npy_intp)));
_UINT_ALIGN(npy_intp)));

#if @isget@ && @one_iter@
if (check_and_adjust_index(&indval, fancy_dims[i],
Expand All @@ -1591,16 +1587,20 @@ mapiter_@name@(PyArrayMapIterObject *mit)

#if @isget@
#if @elsize@
assert(npy_is_aligned(outer_ptrs[i], _ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(outer_ptrs[i],
_UINT_ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr,
_UINT_ALIGN(@copytype@)));
*(@copytype@ *)(outer_ptrs[i]) = *(@copytype@ *)self_ptr;
#else
copyswap(outer_ptrs[i], self_ptr, 0, array);
#endif
#else /* !@isget@ */
#if @elsize@
assert(npy_is_aligned(outer_ptrs[i], _ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr, _ALIGN(@copytype@)));
assert(npy_is_aligned(outer_ptrs[i],
_UINT_ALIGN(@copytype@)));
assert(npy_is_aligned(self_ptr,
_UINT_ALIGN(@copytype@)));
*(@copytype@ *)self_ptr = *(@copytype@ *)(outer_ptrs[i]);
#else
copyswap(self_ptr, outer_ptrs[i], 0, array);
Expand Down
6 changes: 4 additions & 2 deletions numpy/core/src/multiarray/mapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,8 @@ array_boolean_subscript(PyArrayObject *self,

/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
if (PyArray_GetDTypeTransferFunction(IsUintAligned(self),
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(self) && IsAligned(self),
fixed_strides[0], itemsize,
dtype, dtype,
0,
Expand Down Expand Up @@ -1253,7 +1254,8 @@ array_assign_boolean_subscript(PyArrayObject *self,
/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
if (PyArray_GetDTypeTransferFunction(
IsUintAligned(self) && IsUintAligned(v),
IsUintAligned(self) && IsAligned(self) &&
IsUintAligned(v) && IsAligned(v),
v_stride, fixed_strides[0],
PyArray_DESCR(v), PyArray_DESCR(self),
0,
Expand Down
34 changes: 26 additions & 8 deletions numpy/core/src/multiarray/nditer_constr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
/* Check if the operand is aligned */
if (op_flags & NPY_ITER_ALIGNED) {
/* Check alignment */
if (!IsUintAligned(*op)) {
if (!(IsUintAligned(*op) && IsAligned(*op))) {
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
"because of NPY_ITER_ALIGNED\n");
*op_itflags |= NPY_OP_ITFLAG_CAST;
Expand Down Expand Up @@ -2851,8 +2851,14 @@ npyiter_allocate_arrays(NpyIter *iter,
npyiter_replace_axisdata(iter, iop, op[iop], ondim,
PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL);

/* New arrays are aligned and need no cast */
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
/*
* New arrays are guaranteed true-aligned, but copy/cast code
* needs uint-alignment in addition.
*/
if (IsUintAligned(out)) {
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
}
/* New arrays need no cast */
op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
}
/*
Expand Down Expand Up @@ -2888,11 +2894,17 @@ npyiter_allocate_arrays(NpyIter *iter,
PyArray_DATA(op[iop]), NULL);

/*
* New arrays are aligned need no cast, and in the case
* New arrays are guaranteed true-aligned, but copy/cast code
* needs uint-alignment in addition.
*/
if (IsUintAligned(temp)) {
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
}
/*
* New arrays need no cast, and in the case
* of scalars, always have stride 0 so never need buffering
*/
op_itflags[iop] |= (NPY_OP_ITFLAG_ALIGNED |
NPY_OP_ITFLAG_BUFNEVER);
op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
if (itflags & NPY_ITFLAG_BUFFER) {
NBF_STRIDES(bufferdata)[iop] = 0;
Expand Down Expand Up @@ -2953,8 +2965,14 @@ npyiter_allocate_arrays(NpyIter *iter,
npyiter_replace_axisdata(iter, iop, op[iop], ondim,
PyArray_DATA(op[iop]), op_axes ? op_axes[iop] : NULL);

/* The temporary copy is aligned and needs no cast */
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
/*
* New arrays are guaranteed true-aligned, but copy/cast code
* additionally needs uint-alignment in addition.
*/
if (IsUintAligned(temp)) {
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
}
/* The temporary copy needs no cast */
op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
}
else {
Expand Down
16 changes: 13 additions & 3 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,12 @@


def _aligned_zeros(shape, dtype=float, order="C", align=None):
"""Allocate a new ndarray with aligned memory."""
"""
Allocate a new ndarray with aligned memory.
The ndarray is guaranteed *not* aligned to twice the requested alignment.
Eg, if align=4, guarantees it is not aligned to 8. If align=None uses
dtype.alignment."""
dtype = np.dtype(dtype)
if dtype == np.dtype(object):
# Can't do this, fall back to standard allocation (which
Expand All @@ -67,10 +72,15 @@ def _aligned_zeros(shape, dtype=float, order="C", align=None):
if not hasattr(shape, '__len__'):
shape = (shape,)
size = functools.reduce(operator.mul, shape) * dtype.itemsize
buf = np.empty(size + align + 1, np.uint8)
offset = buf.__array_interface__['data'][0] % align
buf = np.empty(size + 2*align + 1, np.uint8)

ptr = buf.__array_interface__['data'][0]
offset = ptr % align
if offset != 0:
offset = align - offset
if (ptr % (2*align)) == 0:
offset += align

# Note: slices producing 0-size arrays do not necessarily change
# data pointer --- so we use and allocate size+1
buf = buf[offset:offset+size+1][:-1]
Expand Down

0 comments on commit fd89a41

Please sign in to comment.