Skip to content

Commit

Permalink
Merge pull request numpy#10411 from ahaldane/revert_multifield_view
Browse files Browse the repository at this point in the history
BUG: Revert multifield-indexing adds padding bytes for NumPy 1.15.
  • Loading branch information
charris authored Jun 11, 2018
2 parents 8525d3e + e08eced commit 1b92080
Show file tree
Hide file tree
Showing 10 changed files with 297 additions and 34 deletions.
10 changes: 10 additions & 0 deletions doc/release/1.15.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,16 @@ Since ``np.ma.masked`` is a readonly scalar, copying should be a no-op. These
functions now behave consistently with ``np.copy()``.


Multifield Indexing of Structured Arrays will still return a copy
-----------------------------------------------------------------
The change that multi-field indexing of structured arrays returns a view
instead of a copy is pushed back to 1.16. A new method
``numpy.lib.recfunctions.repack_fields`` has been introduced to help mitigate
the effects of this change, which can be used to write code compatible with
both numpy 1.15 and 1.16. For more information on how to update code to account
for this future change see "basics/structured arrays/accessing multiple fields"
in the user guide.

C API changes
=============

Expand Down
2 changes: 1 addition & 1 deletion numpy/core/src/multiarray/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
* __len__ is not defined.
*/
if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) {
// clear any PySequence_Size error, which corrupts further calls to it
/* clear any PySequence_Size error which corrupts further calls */
PyErr_Clear();

if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) {
Expand Down
8 changes: 5 additions & 3 deletions numpy/core/src/multiarray/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -613,11 +613,14 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)
subtype = Py_TYPE(self);
}

if (type != NULL && (PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
dtype = PyArray_DESCR(self);

if (type != NULL && !PyArray_EquivTypes(dtype, type) &&
(PyArray_FLAGS(self) & NPY_ARRAY_WARN_ON_WRITE)) {
const char *msg =
"Numpy has detected that you may be viewing or writing to an array "
"returned by selecting multiple fields in a structured array. \n\n"
"This code may break in numpy 1.13 because this will return a view "
"This code may break in numpy 1.16 because this will return a view "
"instead of a copy -- see release notes for details.";
/* 2016-09-19, 1.12 */
if (DEPRECATE_FUTUREWARNING(msg) < 0) {
Expand All @@ -629,7 +632,6 @@ PyArray_View(PyArrayObject *self, PyArray_Descr *type, PyTypeObject *pytype)

flags = PyArray_FLAGS(self);

dtype = PyArray_DESCR(self);
Py_INCREF(dtype);
ret = (PyArrayObject *)PyArray_NewFromDescr_int(
subtype, dtype,
Expand Down
60 changes: 53 additions & 7 deletions numpy/core/src/multiarray/mapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -1386,15 +1386,55 @@ array_subscript_asarray(PyArrayObject *self, PyObject *op)
return PyArray_EnsureAnyArray(array_subscript(self, op));
}

/*
* Helper function for _get_field_view which turns a multifield
* view into a "packed" copy, as done in numpy 1.15 and before.
* In numpy 1.16 this function should be removed.
*/
NPY_NO_EXPORT int
_multifield_view_to_copy(PyArrayObject **view) {
static PyObject *copyfunc = NULL;
PyObject *viewcopy;

/* return a repacked copy of the view */
npy_cache_import("numpy.lib.recfunctions", "repack_fields", &copyfunc);
if (copyfunc == NULL) {
goto view_fail;
}

PyArray_CLEARFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
viewcopy = PyObject_CallFunction(copyfunc, "O", *view);
if (viewcopy == NULL) {
goto view_fail;
}
Py_DECREF(*view);
*view = (PyArrayObject*)viewcopy;

/* warn when writing to the copy */
PyArray_ENABLEFLAGS(*view, NPY_ARRAY_WARN_ON_WRITE);
return 0;

view_fail:
Py_DECREF(*view);
*view = NULL;
return 0;
}

/*
* Attempts to subscript an array using a field name or list of field names.
*
* If an error occurred, return 0 and set view to NULL. If the subscript is not
* a string or list of strings, return -1 and set view to NULL. Otherwise
* return 0 and set view to point to a new view into arr for the given fields.
*
* In numpy 1.15 and before, in the case of a list of field names the returned
* view will actually be a copy by default, with fields packed together.
* The `force_view` argument causes a view to be returned. This argument can be
* removed in 1.16 when we plan to return a view always.
*/
NPY_NO_EXPORT int
_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
_get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view,
int force_view)
{
*view = NULL;

Expand Down Expand Up @@ -1489,12 +1529,12 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
Py_DECREF(names);
return 0;
}
// disallow use of titles as index
/* disallow use of titles as index */
if (PyTuple_Size(tup) == 3) {
PyObject *title = PyTuple_GET_ITEM(tup, 2);
int titlecmp = PyObject_RichCompareBool(title, name, Py_EQ);
if (titlecmp == 1) {
// if title == name, we were given a title, not a field name
/* if title == name, we got a title, not a field name */
PyErr_SetString(PyExc_KeyError,
"cannot use field titles in multi-field index");
}
Expand All @@ -1507,7 +1547,7 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
}
Py_DECREF(title);
}
// disallow duplicate field indices
/* disallow duplicate field indices */
if (PyDict_Contains(fields, name)) {
PyObject *errmsg = PyUString_FromString(
"duplicate field of name ");
Expand Down Expand Up @@ -1552,10 +1592,16 @@ _get_field_view(PyArrayObject *arr, PyObject *ind, PyArrayObject **view)
PyArray_FLAGS(arr),
(PyObject *)arr, (PyObject *)arr,
0, 1);

if (*view == NULL) {
return 0;
}
return 0;

/* the code below can be replaced by "return 0" in 1.16 */
if (force_view) {
return 0;
}
return _multifield_view_to_copy(view);
}
return -1;
}
Expand Down Expand Up @@ -1583,7 +1629,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
/* return fields if op is a string index */
if (PyDataType_HASFIELDS(PyArray_DESCR(self))) {
PyArrayObject *view;
int ret = _get_field_view(self, op, &view);
int ret = _get_field_view(self, op, &view, 0);
if (ret == 0){
if (view == NULL) {
return NULL;
Expand Down Expand Up @@ -1865,7 +1911,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
/* field access */
if (PyDataType_HASFIELDS(PyArray_DESCR(self))){
PyArrayObject *view;
int ret = _get_field_view(self, ind, &view);
int ret = _get_field_view(self, ind, &view, 1);
if (ret == 0){
if (view == NULL) {
return -1;
Expand Down
14 changes: 14 additions & 0 deletions numpy/core/src/private/npy_import.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,18 @@ npy_cache_import(const char *module, const char *attr, PyObject **cache)
}
}

NPY_INLINE static PyObject *
npy_import(const char *module, const char *attr)
{
PyObject *mod = PyImport_ImportModule(module);
PyObject *ret = NULL;

if (mod != NULL) {
ret = PyObject_GetAttrString(mod, attr);
}
Py_XDECREF(mod);

return ret;
}

#endif
67 changes: 64 additions & 3 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4796,9 +4796,25 @@ def test_field_names(self):
fn2 = func('f2')
b[fn2] = 3

assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
# In 1.16 code below can be replaced by:
# assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
# assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
# assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
with suppress_warnings() as sup:
sup.filter(FutureWarning,
".* selecting multiple fields .*")

assert_equal(b[['f1', 'f2']][0].tolist(), (2, 3))
assert_equal(b[['f2', 'f1']][0].tolist(), (3, 2))
assert_equal(b[['f1', 'f3']][0].tolist(), (2, (1,)))
# view of subfield view/copy
assert_equal(b[['f1', 'f2']][0].view(('i4', 2)).tolist(),
(2, 3))
assert_equal(b[['f2', 'f1']][0].view(('i4', 2)).tolist(),
(3, 2))
view_dtype = [('f1', 'i4'), ('f3', [('', 'i4')])]
assert_equal(b[['f1', 'f3']][0].view(view_dtype).tolist(),
(2, (1,)))

# non-ascii unicode field indexing is well behaved
if not is_py3:
Expand All @@ -4808,6 +4824,51 @@ def test_field_names(self):
assert_raises(ValueError, a.__setitem__, u'\u03e0', 1)
assert_raises(ValueError, a.__getitem__, u'\u03e0')

# can be removed in 1.16
def test_field_names_deprecation(self):

def collect_warnings(f, *args, **kwargs):
with warnings.catch_warnings(record=True) as log:
warnings.simplefilter("always")
f(*args, **kwargs)
return [w.category for w in log]

a = np.zeros((1,), dtype=[('f1', 'i4'),
('f2', 'i4'),
('f3', [('sf1', 'i4')])])
a['f1'][0] = 1
a['f2'][0] = 2
a['f3'][0] = (3,)
b = np.zeros((1,), dtype=[('f1', 'i4'),
('f2', 'i4'),
('f3', [('sf1', 'i4')])])
b['f1'][0] = 1
b['f2'][0] = 2
b['f3'][0] = (3,)

# All the different functions raise a warning, but not an error
assert_equal(collect_warnings(a[['f1', 'f2']].__setitem__, 0, (10, 20)),
[FutureWarning])
# For <=1.12 a is not modified, but it will be in 1.13
assert_equal(a, b)

# Views also warn
subset = a[['f1', 'f2']]
subset_view = subset.view()
assert_equal(collect_warnings(subset_view['f1'].__setitem__, 0, 10),
[FutureWarning])
# But the write goes through:
assert_equal(subset['f1'][0], 10)
# Only one warning per multiple field indexing, though (even if there
# are multiple views involved):
assert_equal(collect_warnings(subset['f1'].__setitem__, 0, 10), [])

# make sure views of a multi-field index warn too
c = np.zeros(3, dtype='i8,i8,i8')
assert_equal(collect_warnings(c[['f0', 'f2']].view, 'i8,i8'),
[FutureWarning])


def test_record_hash(self):
a = np.array([(1, 2), (1, 2)], dtype='i1,i2')
a.flags.writeable = False
Expand Down
2 changes: 2 additions & 0 deletions numpy/core/tests/test_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import warnings
import textwrap
from os import path
import pytest

import numpy as np
from numpy.testing import (
Expand Down Expand Up @@ -360,6 +361,7 @@ def test_nonwriteable_setfield(self):
with assert_raises(ValueError):
r.setfield([2,3], *r.dtype.fields['f'])

@pytest.mark.xfail(reason="See gh-10411, becomes real error in 1.16")
def test_out_of_order_fields(self):
# names in the same order, padding added to descr
x = self.data[['col1', 'col2']]
Expand Down
74 changes: 56 additions & 18 deletions numpy/doc/structured_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,9 @@
Offsets may be chosen such that the fields overlap, though this will mean
that assigning to one field may clobber any overlapping field's data. As
an exception, fields of :class:`numpy.object` type .. (see
:ref:`object arrays <arrays.object>`) cannot overlap with other fields,
because of the risk of clobbering the internal object pointer and then
dereferencing it.
an exception, fields of :class:`numpy.object` type cannot overlap with
other fields, because of the risk of clobbering the internal object
pointer and then dereferencing it.
The optional 'aligned' value can be set to ``True`` to make the automatic
offset computation use aligned offsets (see :ref:`offsets-and-alignment`),
Expand Down Expand Up @@ -235,6 +234,11 @@
alignment conditions, the array will have the ``ALIGNED`` :ref:`flag
<numpy.ndarray.flags>` set.
A convenience function :func:`numpy.lib.recfunctions.repack_fields` converts an
aligned dtype or array to a packed one and vice versa. It takes either a dtype
or structured ndarray as an argument, and returns a copy with fields re-packed,
with or without padding bytes.
.. _titles:
Field Titles
Expand Down Expand Up @@ -396,27 +400,61 @@
Accessing Multiple Fields
```````````````````````````
One can index a structured array with a multi-field index, where the index is a
list of field names::
One can index and assign to a structured array with a multi-field index, where
the index is a list of field names.
.. warning::
The behavior of multi-field indexes will change from Numpy 1.15 to Numpy
1.16.
>>> a = np.zeros(3, dtype=[('a', 'i8'), ('b', 'i4'), ('c', 'f8')])
In Numpy 1.16, the result of indexing with a multi-field index will be a view
into the original array, as follows::
>>> a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'i4'), ('c', 'f4')])
>>> a[['a', 'c']]
array([(0, 0.0), (0, 0.0), (0, 0.0)],
dtype={'names':['a','c'], 'formats':['<i8','<f8'], 'offsets':[0,11], 'itemsize':19})
array([(0, 0.), (0, 0.), (0, 0.)],
dtype={'names':['a','c'], 'formats':['<i4','<f4'], 'offsets':[0,8], 'itemsize':12})
Assignment to the view modifies the original array. The view's fields will be
in the order they were indexed. Note that unlike for single-field indexing, the
view's dtype has the same itemsize as the original array, and has fields at the
same offsets as in the original array, and unindexed fields are merely missing.
In Numpy 1.15, indexing an array with a multi-field index returns a copy of
the result above for 1.16, but with fields packed together in memory as if
passed through :func:`numpy.lib.recfunctions.repack_fields`. This is the
behavior since Numpy 1.7.
.. warning::
The new behavior in Numpy 1.16 leads to extra "padding" bytes at the
location of unindexed fields. You will need to update any code which depends
on the data having a "packed" layout. For instance code such as::
>>> a[['a','c']].view('i8') # will fail in Numpy 1.16
ValueError: When changing to a smaller dtype, its size must be a divisor of the size of original dtype
will need to be changed. This code has raised a ``FutureWarning`` since
Numpy 1.12.
The following is a recommended fix, which will behave identically in Numpy
1.15 and Numpy 1.16::
>>> from numpy.lib.recfunctions import repack_fields
>>> repack_fields(a[['a','c']]).view('i8') # supported 1.15 and 1.16
array([0, 0, 0])
Assigning to an array with a multi-field index will behave the same in Numpy
1.15 and Numpy 1.16. In both versions the assignment will modify the original
array::
>>> a[['a', 'c']] = (2, 3)
>>> a
array([(2, 0, 3.0), (2, 0, 3.0), (2, 0, 3.0)],
dtype=[('a', '<i8'), ('b', '<i4'), ('c', '<f8')])
The resulting array is a view into the original array, such that assignment to
the view modifies the original array. The view's fields will be in the order
they were indexed. Note that unlike for single-field indexing, the view's dtype
has the same itemsize as the original array, and has fields at the same offsets
as in the original array, and unindexed fields are merely missing.
Since the view is a structured array itself, it obeys the assignment rules
described above. For example, this means that one can swap the values of two
fields using appropriate multi-field indexes::
This obeys the structured array assignment rules described above. For example,
this means that one can swap the values of two fields using appropriate
multi-field indexes::
>>> a[['a', 'c']] = a[['c', 'a']]
Expand Down
Loading

0 comments on commit 1b92080

Please sign in to comment.