Skip to content

Commit

Permalink
Merge pull request numpy#22385 from seberg/deprecate-out-of-bound-pyi…
Browse files Browse the repository at this point in the history
…nt-conversion

DEP: Deprecate conversion of out-of-bound Python integers
  • Loading branch information
mattip authored Oct 11, 2022
2 parents 7036b16 + 5f11024 commit 226f9c5
Show file tree
Hide file tree
Showing 20 changed files with 280 additions and 60 deletions.
17 changes: 17 additions & 0 deletions doc/release/upcoming_changes/22393.deprecation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Conversion of out-of-bound Python integers
------------------------------------------
Attempting a conversion from a Python integer to a NumPy
value will now always check whether the result can be
represented by NumPy. This means the following examples will
fail in the future and give a ``DeprecationWarning`` now::

np.uint8(-1)
np.array([3000], dtype=np.int8)

Many of these did succeed before. Such code was mainly
useful for unsigned integers with negative values such as
`np.uint8(-1)` giving `np.iinfo(np.uint8).max`.

Note that conversion between NumPy integers is unaffected,
so that `np.array(-1).astype(np.uint8)` continues to work
and use C integer overflow logic.
133 changes: 123 additions & 10 deletions numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "npy_sort.h"
#include "common.h"
#include "ctors.h"
#include "convert_datatype.h"
#include "dtypemeta.h"
#include "lowlevel_strided_loops.h"
#include "usertypes.h"
Expand Down Expand Up @@ -174,6 +175,13 @@ MyPyLong_As@Type@ (PyObject *obj)
return ret;
}

static @type@
MyPyLong_As@Type@WithWrap(PyObject *obj, int *wraparound)
{
*wraparound = 0; /* Never happens within the function */
return MyPyLong_As@Type@(obj);
}

/**end repeat**/

/**begin repeat
Expand All @@ -182,9 +190,10 @@ MyPyLong_As@Type@ (PyObject *obj)
* #type = npy_ulong, npy_ulonglong#
*/
static @type@
MyPyLong_AsUnsigned@Type@ (PyObject *obj)
MyPyLong_AsUnsigned@Type@WithWrap(PyObject *obj, int *wraparound)
{
@type@ ret;
*wraparound = 0;
PyObject *num = PyNumber_Long(obj);

if (num == NULL) {
Expand All @@ -193,12 +202,21 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
ret = PyLong_AsUnsigned@Type@(num);
if (PyErr_Occurred()) {
PyErr_Clear();
*wraparound = 1; /* negative wrapped to positive */
ret = PyLong_As@Type@(num);
}
Py_DECREF(num);
return ret;
}

static @type@
MyPyLong_AsUnsigned@Type@(PyObject *obj)
{
int wraparound;
return MyPyLong_AsUnsigned@Type@WithWrap(obj, &wraparound);
}


/**end repeat**/

/*
Expand All @@ -217,6 +235,88 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
#endif


/**begin repeat
*
* #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
* npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
* #TYPE = BYTE, SHORT, INT, LONG, LONGLONG,
* UBYTE, USHORT, UINT, ULONG, ULONGLONG#
* #STYPE = BYTE, SHORT, INT, LONG, LONGLONG,
* BYTE, SHORT, INT, LONG, LONGLONG#
* #conv_type = npy_long*4, npy_longlong, npy_ulong*4, npy_ulonglong#
* #CSTYPE = LONG*4, LONGLONG, LONG*4, LONGLONG#
* #func = MyPyLong_AsLong*4, MyPyLong_AsLongLong,
* MyPyLong_AsLong*2, MyPyLong_AsUnsignedLong*2,
* MyPyLong_AsUnsignedLongLong#
*/

/*
* Helper for conversion from Python integers. This uses the same conversion
* function as below for compatibility (which may seem strange).
* However, it adds more strict integer overflow checks to prevent mainly
* conversion of negative integers. These are considered deprecated, which is
* related to NEP 50 (but somewhat independent).
*/
static int
@TYPE@_safe_pyint_setitem(PyObject *obj, @type@ *result)
{
/* Input is guaranteed to be a Python integer */
assert(PyLong_Check(obj));
int wraparound;
@conv_type@ value = @func@WithWrap(obj, &wraparound);
if (value == (@conv_type@)-1 && PyErr_Occurred()) {
return -1;
}
*result = (@type@)value;

if (wraparound
#if NPY_SIZEOF_@STYPE@ < NPY_SIZEOF_@CSTYPE@
|| *result != value
#endif
) {
PyArray_Descr *descr = PyArray_DescrFromType(NPY_@TYPE@);

if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION || (
npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN
&& !npy_give_promotion_warnings())) {
/*
* This path will be taken both for the "promotion" case such as
* `uint8_arr + 123` as well as the assignment case.
* The "legacy" path should only ever be taken for assignment
* (legacy promotion will prevent overflows by promoting up)
* so a normal deprecation makes sense.
* When weak promotion is active, we use "future" behavior unless
* warnings were explicitly opt-in.
*/
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"NumPy will stop allowing conversion of out-of-bound "
"Python integers to integer arrays. The conversion "
"of %.100R to %S will fail in the future.\n"
"For the old behavior, usually:\n"
" np.array(value).astype(dtype)`\n"
"will give the desired result (the cast overflows).",
obj, descr) < 0) {
Py_DECREF(descr);
return -1;
}
Py_DECREF(descr);
return 0;
}
else {
/* Live in the future, outright error: */
PyErr_Format(PyExc_OverflowError,
"Python int %R too large to convert to %S", obj, descr);
Py_DECREF(descr);
return -1;
}
assert(0);
}
return 0;
}

/**end repeat**/


/**begin repeat
*
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG,
Expand All @@ -235,7 +335,8 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
* npy_half, npy_float, npy_double#
* #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong,
* LongLong, ULongLong, Half, Float, Double#
*/
* #is_int = 0,1*10,0*3#
*/
static PyObject *
@TYPE@_getitem(void *input, void *vap)
{
Expand All @@ -253,12 +354,26 @@ static PyObject *
}
}

static int
NPY_NO_EXPORT int
@TYPE@_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
@type@ temp; /* ensures alignment */

#if @is_int@
if (PyLong_Check(op)) {
/*
* When weak promotion is enabled (using NEP 50) we also use more
* strict parsing of integers: All out-of-bound Python integer
* parsing fails.
*/
if (@TYPE@_safe_pyint_setitem(op, &temp) < 0) {
return -1;
}
}
else /* continue with if below */
#endif

if (PyArray_IsScalar(op, @kind@)) {
temp = PyArrayScalar_VAL(op, @kind@);
}
Expand Down Expand Up @@ -291,6 +406,7 @@ static int

/**end repeat**/


/**begin repeat
*
* #TYPE = CFLOAT, CDOUBLE#
Expand Down Expand Up @@ -328,13 +444,12 @@ static PyObject *
* #ftype = npy_float, npy_double, npy_longdouble#
* #kind = CFloat, CDouble, CLongDouble#
*/
static int
NPY_NO_EXPORT int
@NAME@_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
Py_complex oop;
@type@ temp;
int rsize;

if (PyArray_IsZeroDim(op)) {
return convert_to_scalar_and_retry(op, ov, vap, @NAME@_setitem);
Expand Down Expand Up @@ -401,12 +516,10 @@ static int
#endif
}

memcpy(ov, &temp, PyArray_DESCR(ap)->elsize);
if (PyArray_ISBYTESWAPPED(ap)) {
memcpy(ov, &temp, NPY_SIZEOF_@NAME@);
if (ap != NULL && PyArray_ISBYTESWAPPED(ap)) {
byte_swap_vector(ov, 2, sizeof(@ftype@));
}
rsize = sizeof(@ftype@);
copy_and_swap(ov, &temp, rsize, 2, rsize, PyArray_ISBYTESWAPPED(ap));
return 0;
}

Expand Down Expand Up @@ -487,7 +600,7 @@ LONGDOUBLE_getitem(void *ip, void *ap)
return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL);
}

static int
NPY_NO_EXPORT int
LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap)
{
PyArrayObject *ap = vap;
Expand Down
17 changes: 17 additions & 0 deletions numpy/core/src/multiarray/arraytypes.h.src
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,23 @@ small_correlate(const char * d_, npy_intp dstride,
npy_intp nk, enum NPY_TYPES ktype,
char * out_, npy_intp ostride);

/**begin repeat
* #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
* LONG, ULONG, LONGLONG, ULONGLONG,
* HALF, FLOAT, DOUBLE, LONGDOUBLE,
* CFLOAT, CDOUBLE, CLONGDOUBLE#
*/
/*
* The setitem functions are currently directly used in certain branches
* of the scalar-math code. (Yes, this would be nice to refactor...)
*/

NPY_NO_EXPORT int
@TYPE@_setitem(PyObject *obj, void *data_ptr, void *arr);

/**end repeat**/


#ifndef NPY_DISABLE_OPTIMIZATION
#include "argfunc.dispatch.h"
#endif
Expand Down
42 changes: 31 additions & 11 deletions numpy/core/src/umath/ufunc_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
int iaxes, ndim;
npy_bool reorderable;
npy_bool axis_flags[NPY_MAXDIMS];
PyArrayObject *result = NULL;
PyObject *identity;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
/* These parameters come from a TLS global */
Expand All @@ -2983,11 +2984,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
return NULL;
}

/*
* Promote and fetch ufuncimpl (currently needed to fix up the identity).
*/
PyArray_Descr *descrs[3];
PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
arr, out, signature, NPY_FALSE, descrs, "reduce");
if (ufuncimpl == NULL) {
return NULL;
}

/* Get the identity */
/* TODO: Both of these should be provided by the ArrayMethod! */
identity = _get_identity(ufunc, &reorderable);
if (identity == NULL) {
return NULL;
goto finish;
}

/* Get the initial value */
Expand All @@ -3003,33 +3014,42 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
initial = Py_None;
Py_INCREF(initial);
}
else if (PyTypeNum_ISUNSIGNED(descrs[2]->type_num)
&& PyLong_CheckExact(initial)) {
/*
* This is a bit of a hack until we have truly loop specific
* identities. Python -1 cannot be cast to unsigned so convert
* it to a NumPy scalar, but we use -1 for bitwise functions to
* signal all 1s.
* (A builtin identity would not overflow here, although we may
* unnecessary convert 0 and 1.)
*/
Py_SETREF(initial, PyObject_CallFunctionObjArgs(
(PyObject *)&PyLongArrType_Type, initial, NULL));
if (initial == NULL) {
goto finish;
}
}
} else {
Py_DECREF(identity);
Py_INCREF(initial); /* match the reference count in the if above */
}

PyArray_Descr *descrs[3];
PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
arr, out, signature, NPY_FALSE, descrs, "reduce");
if (ufuncimpl == NULL) {
Py_DECREF(initial);
return NULL;
}

PyArrayMethod_Context context = {
.caller = (PyObject *)ufunc,
.method = ufuncimpl,
.descriptors = descrs,
};

PyArrayObject *result = PyUFunc_ReduceWrapper(&context,
result = PyUFunc_ReduceWrapper(&context,
arr, out, wheremask, axis_flags, reorderable, keepdims,
initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask);

finish:
for (int i = 0; i < 3; i++) {
Py_DECREF(descrs[i]);
}
Py_DECREF(initial);
Py_XDECREF(initial);
return result;
}

Expand Down
2 changes: 1 addition & 1 deletion numpy/core/tests/test_array_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def test_default_dtype_instance(self, dtype_char):
@pytest.mark.parametrize("dtype", np.typecodes["Integer"])
@pytest.mark.parametrize(["scalar", "error"],
[(np.float64(np.nan), ValueError),
(np.ulonglong(-1), OverflowError)])
(np.array(-1).astype(np.ulonglong)[()], OverflowError)])
def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error):
"""
Signed integers are currently different in that they do not cast other
Expand Down
6 changes: 6 additions & 0 deletions numpy/core/tests/test_casting_unittests.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ def get_data(self, dtype1, dtype2):

for i, value in enumerate(values):
# Use item assignment to ensure this is not using casting:
if value < 0 and dtype1.kind == "u":
# Manually rollover unsigned integers (-1 -> int.max)
value = value + np.iinfo(dtype1).max + 1
arr1[i] = value

if dtype2 is None:
Expand All @@ -185,6 +188,9 @@ def get_data(self, dtype1, dtype2):

for i, value in enumerate(values):
# Use item assignment to ensure this is not using casting:
if value < 0 and dtype2.kind == "u":
# Manually rollover unsigned integers (-1 -> int.max)
value = value + np.iinfo(dtype2).max + 1
arr2[i] = value

return arr1, arr2, values
Expand Down
Loading

0 comments on commit 226f9c5

Please sign in to comment.