Skip to content

Commit

Permalink
Merge pull request numpy#6053 from ahaldane/multifield_structassign
Browse files Browse the repository at this point in the history
MAINT: struct assignment "by field position", multi-field indices return views
  • Loading branch information
charris authored Sep 9, 2017
2 parents f217318 + 9f27418 commit 68a58e0
Show file tree
Hide file tree
Showing 17 changed files with 481 additions and 602 deletions.
37 changes: 37 additions & 0 deletions doc/release/1.14.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,40 @@ Integer scalars are now unaffected by ``np.set_string_function``
Previously the str/repr of integer scalars could be controlled by
``np.set_string_function``, unlike most other numpy scalars. This is no longer
the case.

Multiple-field indexing/assignment of structured arrays
-------------------------------------------------------
The indexing and assignment of structured arrays with multiple fields has
changed in a number of ways:

First, indexing a structured array with multiple fields (eg,
``arr[['f1', 'f3']]``) returns a view into the original array instead of a
copy. The returned view will have extra padding bytes corresponding to
intervening fields in the original array, unlike the copy in 1.13, which will
affect code such as ``arr[['f1', 'f3']].view(newdtype)``.

Second, assignment between structured arrays will now occur "by position"
instead of "by field name". The Nth field of the destination will be set to the
Nth field of the source regardless of field name, unlike in numpy versions 1.6
to 1.13 in which fields in the destination array were set to the
identically-named field in the source array or to 0 if the source did not have
a field.

Correspondingly, the order of fields in a structured dtypes now matters when
computing dtype equality. For example with the dtypes
`x = dtype({'names': ['A', 'B'], 'formats': ['i4', 'f4'], 'offsets': [0, 4]})`
`y = dtype({'names': ['B', 'A'], 'formats': ['f4', 'i4'], 'offsets': [4, 0]})`
now `x == y` will return `False`, unlike before. This makes dictionary-based
dtype specifications like `dtype({'a': ('i4', 0), 'b': ('f4', 4)})` dangerous
in python < 3.6 since dict key-order is not preserved in those versions.

Assignment from a structured array to a boolean array now raises a ValueError,
unlike in 1.13 where it always set the destination elements to `True`.

Assignment from structured array with more than one field to a non-structured
array now raises a ValueError. In 1.13 this copied just the first field of the
source to the destination.

Using field "titles" in multiple-field indexing is now disallowed, as is
repeating a field name in a multiple-field index.

3 changes: 2 additions & 1 deletion numpy/core/src/multiarray/array_assign_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,8 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
if (((PyArray_NDIM(dst) == 1 && PyArray_NDIM(src) >= 1 &&
PyArray_STRIDES(dst)[0] *
PyArray_STRIDES(src)[PyArray_NDIM(src) - 1] < 0) ||
PyArray_NDIM(dst) > 1) && arrays_overlap(src, dst)) {
PyArray_NDIM(dst) > 1 || PyArray_HASFIELDS(dst)) &&
arrays_overlap(src, dst)) {
PyArrayObject *tmp;

/*
Expand Down
210 changes: 156 additions & 54 deletions numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -774,66 +774,173 @@ VOID_getitem(void *input, void *vap)

NPY_NO_EXPORT int PyArray_CopyObject(PyArrayObject *, PyObject *);

/* Given a structured PyArrayObject arr, index i and structured datatype descr,
* modify the dtype of arr to contain a single field corresponding to the ith
* field of descr, recompute the alignment flag, and return the offset of the
* field (in offset_p). This is useful in preparation for calling copyswap on
* individual fields of a numpy structure, in VOID_setitem. Compare to inner
* loops in VOID_getitem and VOID_nonzero.
*
* WARNING: Clobbers arr's dtype and alignment flag.
*/
NPY_NO_EXPORT int
_setup_field(int i, PyArray_Descr *descr, PyArrayObject *arr,
npy_intp *offset_p)
{
PyObject *key;
PyObject *tup;
PyArray_Descr *new;
npy_intp offset;

key = PyTuple_GET_ITEM(descr->names, i);
tup = PyDict_GetItem(descr->fields, key);
if (_unpack_field(tup, &new, &offset) < 0) {
return -1;
}

((PyArrayObject_fields *)(arr))->descr = new;
if ((new->alignment > 1) && ((offset % new->alignment) != 0)) {
PyArray_CLEARFLAGS(arr, NPY_ARRAY_ALIGNED);
}
else {
PyArray_ENABLEFLAGS(arr, NPY_ARRAY_ALIGNED);
}

*offset_p = offset;
return 0;
}

/* Helper function for VOID_setitem, which uses the copyswap or casting code to
* copy structured datatypes between numpy arrays or scalars.
*/
static int
_copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata,
PyArray_Descr *srcdescr, char *srcdata){
PyArrayObject_fields dummy_struct;
PyArrayObject *dummy = (PyArrayObject *)&dummy_struct;
npy_int names_size = PyTuple_GET_SIZE(dstdescr->names);
npy_intp offset;
npy_int i;
int ret;

/* Fast path if dtypes are equal */
if (PyArray_EquivTypes(srcdescr, dstdescr)) {
for (i = 0; i < names_size; i++) {
/* neither line can ever fail, in principle */
if (_setup_field(i, dstdescr, dummy, &offset)) {
return -1;
}
PyArray_DESCR(dummy)->f->copyswap(dstdata + offset,
srcdata + offset, 0, dummy);
}
return 0;
}

/* Slow path */
ret = PyArray_CastRawArrays(1, srcdata, dstdata, 0, 0,
srcdescr, dstdescr, 0);
if (ret != NPY_SUCCEED) {
return -1;
}
return 0;
}

static int
VOID_setitem(PyObject *op, void *input, void *vap)
{
char *ip = input;
PyArrayObject *ap = vap;
PyArray_Descr *descr;
int flags;
int itemsize=PyArray_DESCR(ap)->elsize;
int res;

descr = PyArray_DESCR(ap);
if (descr->names && PyTuple_Check(op)) {
PyObject *key;
PyObject *names;
int i, n;
PyObject *tup;
int savedflags;

res = 0;
/* get the names from the fields dictionary*/
names = descr->names;
n = PyTuple_GET_SIZE(names);
if (PyTuple_GET_SIZE(op) != n) {
PyErr_SetString(PyExc_ValueError,
"size of tuple must match number of fields.");
return -1;
}
savedflags = PyArray_FLAGS(ap);
for (i = 0; i < n; i++) {
PyArray_Descr *new;
npy_intp offset;
key = PyTuple_GET_ITEM(names, i);
tup = PyDict_GetItem(descr->fields, key);
if (_unpack_field(tup, &new, &offset) < 0) {
((PyArrayObject_fields *)ap)->descr = descr;
flags = PyArray_FLAGS(ap);
if (PyDataType_HASFIELDS(descr)) {
PyObject *errmsg;
npy_int i;
npy_intp offset;
int failed = 0;

/* If op is 0d-ndarray or numpy scalar, directly get dtype & data ptr */
if (PyArray_Check(op)) {
PyArrayObject *oparr = (PyArrayObject *)op;
if (PyArray_SIZE(oparr) != 1) {
PyErr_SetString(PyExc_ValueError,
"setting an array element with a sequence.");
return -1;
}
/*
* TODO: temporarily modifying the array like this
* is bad coding style, should be changed.
*/
((PyArrayObject_fields *)ap)->descr = new;
/* remember to update alignment flags */
if ((new->alignment > 1)
&& ((((npy_intp)(ip+offset)) % new->alignment) != 0)) {
PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED);
return _copy_and_return_void_setitem(descr, ip,
PyArray_DESCR(oparr), PyArray_DATA(oparr));
}
else if (PyArray_IsScalar(op, Void)) {
PyArray_Descr *srcdescr = ((PyVoidScalarObject *)op)->descr;
char *srcdata = ((PyVoidScalarObject *)op)->obval;
return _copy_and_return_void_setitem(descr, ip, srcdescr, srcdata);
}
else if (PyTuple_Check(op)) {
/* if it's a tuple, copy field-by-field to ap, */
npy_intp names_size = PyTuple_GET_SIZE(descr->names);

if (names_size != PyTuple_Size(op)) {
errmsg = PyUString_FromFormat(
"could not assign tuple of length %zd to structure "
"with %" NPY_INTP_FMT " fields.",
PyTuple_Size(op), names_size);
PyErr_SetObject(PyExc_ValueError, errmsg);
Py_DECREF(errmsg);
return -1;
}
else {
PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED);

for (i = 0; i < names_size; i++) {
PyObject *item;

/* temporarily make ap have only this field */
if (_setup_field(i, descr, ap, &offset) == -1) {
failed = 1;
break;
}
item = PyTuple_GetItem(op, i);
if (item == NULL) {
failed = 1;
break;
}
/* use setitem to set this field */
if (PyArray_DESCR(ap)->f->setitem(item, ip + offset, ap) < 0) {
failed = 1;
break;
}
}
res = new->f->setitem(PyTuple_GET_ITEM(op, i), ip+offset, ap);
((PyArrayObject_fields *)ap)->flags = savedflags;
if (res < 0) {
break;
}
else {
/* Otherwise must be non-void scalar. Try to assign to each field */
npy_intp names_size = PyTuple_GET_SIZE(descr->names);

for (i = 0; i < names_size; i++) {
/* temporarily make ap have only this field */
if (_setup_field(i, descr, ap, &offset) == -1) {
failed = 1;
break;
}
/* use setitem to set this field */
if (PyArray_DESCR(ap)->f->setitem(op, ip + offset, ap) < 0) {
failed = 1;
break;
}
}
}
((PyArrayObject_fields *)ap)->descr = descr;
return res;
}

if (descr->subarray) {
/* reset clobbered attributes */
((PyArrayObject_fields *)(ap))->descr = descr;
((PyArrayObject_fields *)(ap))->flags = flags;

if (failed) {
return -1;
}
return 0;
}
else if (PyDataType_HASSUBARRAY(descr)) {
/* copy into an array of the same basic type */
PyArray_Dims shape = {NULL, -1};
PyArrayObject *ret;
Expand Down Expand Up @@ -862,29 +969,24 @@ VOID_setitem(PyObject *op, void *input, void *vap)
return res;
}

/* Default is to use buffer interface to set item */
/*
* Fall through case - non-structured void datatype. This is a very
* undiscerning case: It interprets any object as a buffer
* and reads as many bytes as possible, padding with 0.
*/
{
const void *buffer;
Py_ssize_t buflen;
if (PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT)
|| PyDataType_FLAGCHK(descr, NPY_ITEM_IS_POINTER)) {
PyErr_SetString(PyExc_ValueError,
"Setting void-array with object members using buffer.");
return -1;
}
res = PyObject_AsReadBuffer(op, &buffer, &buflen);
if (res == -1) {
goto fail;
return -1;
}
memcpy(ip, buffer, PyArray_MIN(buflen, itemsize));
if (itemsize > buflen) {
memset(ip + buflen, 0, itemsize - buflen);
}
}
return 0;

fail:
return -1;
}

static PyObject *
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/descriptor.c
Original file line number Diff line number Diff line change
Expand Up @@ -3119,7 +3119,7 @@ static PyMethodDef arraydescr_methods[] = {
*
* Returns 1 if it has a simple layout, 0 otherwise.
*/
static int
NPY_NO_EXPORT int
is_dtype_struct_simple_unaligned_layout(PyArray_Descr *dtype)
{
PyObject *names, *fields, *key, *tup, *title;
Expand Down
4 changes: 4 additions & 0 deletions numpy/core/src/multiarray/descriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ array_set_typeDict(PyObject *NPY_UNUSED(ignored), PyObject *args);
NPY_NO_EXPORT PyArray_Descr *
_arraydescr_fromobj(PyObject *obj);


NPY_NO_EXPORT int
is_dtype_struct_simple_unaligned_layout(PyArray_Descr *dtype);

/*
* Creates a string repr of the dtype, excluding the 'dtype()' part
* surrounding the object. This object may be a string, a list, or
Expand Down
Loading

0 comments on commit 68a58e0

Please sign in to comment.