Merge pull request numpy#22385 from seberg/deprecate-out-of-bound-pyi…

…nt-conversion DEP: Deprecate conversion of out-of-bound Python integers
pmli · Oct 11, 2022 · 226f9c5 · 226f9c5
2 parents 7036b16 + 5f11024
commit 226f9c5
Show file tree

Hide file tree

Showing 20 changed files with 280 additions and 60 deletions.
diff --git a/doc/release/upcoming_changes/22393.deprecation.rst b/doc/release/upcoming_changes/22393.deprecation.rst
@@ -0,0 +1,17 @@
+Conversion of out-of-bound Python integers
+------------------------------------------
+Attempting a conversion from a Python integer to a NumPy
+value will now always check whether the result can be
+represented by NumPy.  This means the following examples will
+fail in the future and give a ``DeprecationWarning`` now::
+
+    np.uint8(-1)
+    np.array([3000], dtype=np.int8)
+
+Many of these did succeed before.  Such code was mainly
+useful for unsigned integers with negative values such as
+`np.uint8(-1)` giving `np.iinfo(np.uint8).max`.
+
+Note that conversion between NumPy integers is unaffected,
+so that `np.array(-1).astype(np.uint8)` continues to work
+and use C integer overflow logic.
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -21,6 +21,7 @@
 #include "npy_sort.h"
 #include "common.h"
 #include "ctors.h"
+#include "convert_datatype.h"
 #include "dtypemeta.h"
 #include "lowlevel_strided_loops.h"
 #include "usertypes.h"
@@ -174,6 +175,13 @@ MyPyLong_As@Type@ (PyObject *obj)
     return ret;
 }
 
+static @type@
+MyPyLong_As@Type@WithWrap(PyObject *obj, int *wraparound)
+{
+    *wraparound = 0;  /* Never happens within the function */
+    return MyPyLong_As@Type@(obj);
+}
+
 /**end repeat**/
 
 /**begin repeat
@@ -182,9 +190,10 @@ MyPyLong_As@Type@ (PyObject *obj)
  * #type = npy_ulong, npy_ulonglong#
  */
 static @type@
-MyPyLong_AsUnsigned@Type@ (PyObject *obj)
+MyPyLong_AsUnsigned@Type@WithWrap(PyObject *obj, int *wraparound)
 {
     @type@ ret;
+    *wraparound = 0;
     PyObject *num = PyNumber_Long(obj);
 
     if (num == NULL) {
@@ -193,12 +202,21 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
     ret = PyLong_AsUnsigned@Type@(num);
     if (PyErr_Occurred()) {
         PyErr_Clear();
+        *wraparound = 1;  /* negative wrapped to positive */
         ret = PyLong_As@Type@(num);
     }
     Py_DECREF(num);
     return ret;
 }
 
+static @type@
+MyPyLong_AsUnsigned@Type@(PyObject *obj)
+{
+    int wraparound;
+    return MyPyLong_AsUnsigned@Type@WithWrap(obj, &wraparound);
+}
+
+
 /**end repeat**/
 
 /*
@@ -217,6 +235,88 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 #endif
 
 
+/**begin repeat
+ *
+ * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong,
+ *         npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
+ * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG,
+ *         UBYTE, USHORT, UINT, ULONG, ULONGLONG#
+ * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG,
+ *          BYTE, SHORT, INT, LONG, LONGLONG#
+ * #conv_type = npy_long*4, npy_longlong, npy_ulong*4, npy_ulonglong#
+ * #CSTYPE = LONG*4, LONGLONG, LONG*4, LONGLONG#
+ * #func = MyPyLong_AsLong*4, MyPyLong_AsLongLong,
+ *         MyPyLong_AsLong*2, MyPyLong_AsUnsignedLong*2,
+ *         MyPyLong_AsUnsignedLongLong#
+ */
+
+/*
+ * Helper for conversion from Python integers.  This uses the same conversion
+ * function as below for compatibility (which may seem strange).
+ * However, it adds more strict integer overflow checks to prevent mainly
+ * conversion of negative integers.  These are considered deprecated, which is
+ * related to NEP 50 (but somewhat independent).
+ */
+static int
+@TYPE@_safe_pyint_setitem(PyObject *obj, @type@ *result)
+{
+    /* Input is guaranteed to be a Python integer */
+    assert(PyLong_Check(obj));
+    int wraparound;
+    @conv_type@ value = @func@WithWrap(obj, &wraparound);
+    if (value == (@conv_type@)-1 && PyErr_Occurred()) {
+        return -1;
+    }
+    *result = (@type@)value;
+
+    if (wraparound
+#if NPY_SIZEOF_@STYPE@ < NPY_SIZEOF_@CSTYPE@
+            || *result != value
+#endif
+            ) {
+        PyArray_Descr *descr = PyArray_DescrFromType(NPY_@TYPE@);
+
+        if (npy_promotion_state == NPY_USE_LEGACY_PROMOTION || (
+                    npy_promotion_state == NPY_USE_WEAK_PROMOTION_AND_WARN
+                        && !npy_give_promotion_warnings())) {
+            /*
+             * This path will be taken both for the "promotion" case such as
+             * `uint8_arr + 123` as well as the assignment case.
+             * The "legacy" path should only ever be taken for assignment
+             * (legacy promotion will prevent overflows by promoting up)
+             * so a normal deprecation makes sense.
+             * When weak promotion is active, we use "future" behavior unless
+             * warnings were explicitly opt-in.
+             */
+            if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                    "NumPy will stop allowing conversion of out-of-bound "
+                    "Python integers to integer arrays.  The conversion "
+                    "of %.100R to %S will fail in the future.\n"
+                    "For the old behavior, usually:\n"
+                    "    np.array(value).astype(dtype)`\n"
+                    "will give the desired result (the cast overflows).",
+                    obj, descr) < 0) {
+                Py_DECREF(descr);
+                return -1;
+            }
+            Py_DECREF(descr);
+            return 0;
+        }
+        else {
+            /* Live in the future, outright error: */
+            PyErr_Format(PyExc_OverflowError,
+                    "Python int %R too large to convert to %S", obj, descr);
+            Py_DECREF(descr);
+            return -1;
+            }
+        assert(0);
+    }
+    return 0;
+}
+
+/**end repeat**/
+
+
 /**begin repeat
  *
  * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, LONG, UINT, ULONG,
@@ -235,7 +335,8 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
  *          npy_half, npy_float, npy_double#
  * #kind = Bool, Byte, UByte, Short, UShort, Int, Long, UInt, ULong,
  *         LongLong, ULongLong, Half, Float, Double#
-*/
+ * #is_int = 0,1*10,0*3#
+ */
 static PyObject *
 @TYPE@_getitem(void *input, void *vap)
 {
@@ -253,12 +354,26 @@ static PyObject *
     }
 }
 
-static int
+NPY_NO_EXPORT int
 @TYPE@_setitem(PyObject *op, void *ov, void *vap)
 {
     PyArrayObject *ap = vap;
     @type@ temp;  /* ensures alignment */
 
+#if @is_int@
+    if (PyLong_Check(op)) {
+        /*
+         * When weak promotion is enabled (using NEP 50) we also use more
+         * strict parsing of integers:  All out-of-bound Python integer
+         * parsing fails.
+         */
+        if (@TYPE@_safe_pyint_setitem(op, &temp) < 0) {
+            return -1;
+        }
+    }
+    else  /* continue with if below */
+#endif
+
     if (PyArray_IsScalar(op, @kind@)) {
         temp = PyArrayScalar_VAL(op, @kind@);
     }
@@ -291,6 +406,7 @@ static int
 
 /**end repeat**/
 
+
 /**begin repeat
  *
  * #TYPE = CFLOAT, CDOUBLE#
@@ -328,13 +444,12 @@ static PyObject *
  * #ftype = npy_float, npy_double, npy_longdouble#
  * #kind = CFloat, CDouble, CLongDouble#
  */
-static int
+NPY_NO_EXPORT int
 @NAME@_setitem(PyObject *op, void *ov, void *vap)
 {
     PyArrayObject *ap = vap;
     Py_complex oop;
     @type@ temp;
-    int rsize;
 
     if (PyArray_IsZeroDim(op)) {
         return convert_to_scalar_and_retry(op, ov, vap, @NAME@_setitem);
@@ -401,12 +516,10 @@ static int
 #endif
     }
 
-    memcpy(ov, &temp, PyArray_DESCR(ap)->elsize);
-    if (PyArray_ISBYTESWAPPED(ap)) {
+    memcpy(ov, &temp, NPY_SIZEOF_@NAME@);
+    if (ap != NULL && PyArray_ISBYTESWAPPED(ap)) {
         byte_swap_vector(ov, 2, sizeof(@ftype@));
     }
-    rsize = sizeof(@ftype@);
-    copy_and_swap(ov, &temp, rsize, 2, rsize, PyArray_ISBYTESWAPPED(ap));
     return 0;
 }
 
@@ -487,7 +600,7 @@ LONGDOUBLE_getitem(void *ip, void *ap)
     return PyArray_Scalar(ip, PyArray_DESCR((PyArrayObject *)ap), NULL);
 }
 
-static int
+NPY_NO_EXPORT int
 LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap)
 {
     PyArrayObject *ap = vap;

diff --git a/numpy/core/src/multiarray/arraytypes.h.src b/numpy/core/src/multiarray/arraytypes.h.src
@@ -28,6 +28,23 @@ small_correlate(const char * d_, npy_intp dstride,
                 npy_intp nk, enum NPY_TYPES ktype,
                 char * out_, npy_intp ostride);
 
+/**begin repeat
+ * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ *         LONG, ULONG, LONGLONG, ULONGLONG,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE#
+ */
+/*
+ * The setitem functions are currently directly used in certain branches
+ * of the scalar-math code. (Yes, this would be nice to refactor...)
+ */
+
+NPY_NO_EXPORT int
+@TYPE@_setitem(PyObject *obj, void *data_ptr, void *arr);
+
+/**end repeat**/
+
+
 #ifndef NPY_DISABLE_OPTIMIZATION
     #include "argfunc.dispatch.h"
 #endif

diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
@@ -2958,6 +2958,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
     int iaxes, ndim;
     npy_bool reorderable;
     npy_bool axis_flags[NPY_MAXDIMS];
+    PyArrayObject *result = NULL;
     PyObject *identity;
     const char *ufunc_name = ufunc_get_name_cstr(ufunc);
     /* These parameters come from a TLS global */
@@ -2983,11 +2984,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
         return NULL;
     }
 
+    /*
+     * Promote and fetch ufuncimpl (currently needed to fix up the identity).
+     */
+    PyArray_Descr *descrs[3];
+    PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+            arr, out, signature, NPY_FALSE, descrs, "reduce");
+    if (ufuncimpl == NULL) {
+        return NULL;
+    }
+
     /* Get the identity */
     /* TODO: Both of these should be provided by the ArrayMethod! */
     identity = _get_identity(ufunc, &reorderable);
     if (identity == NULL) {
-        return NULL;
+        goto finish;
     }
 
     /* Get the initial value */
@@ -3003,33 +3014,42 @@ PyUFunc_Reduce(PyUFuncObject *ufunc,
             initial = Py_None;
             Py_INCREF(initial);
         }
+        else if (PyTypeNum_ISUNSIGNED(descrs[2]->type_num)
+                    && PyLong_CheckExact(initial)) {
+            /*
+             * This is a bit of a hack until we have truly loop specific
+             * identities.  Python -1 cannot be cast to unsigned so convert
+             * it to a NumPy scalar, but we use -1 for bitwise functions to
+             * signal all 1s.
+             * (A builtin identity would not overflow here, although we may
+             * unnecessary convert 0 and 1.)
+             */
+            Py_SETREF(initial, PyObject_CallFunctionObjArgs(
+                        (PyObject *)&PyLongArrType_Type, initial, NULL));
+            if (initial == NULL) {
+                goto finish;
+            }
+        }
     } else {
         Py_DECREF(identity);
         Py_INCREF(initial);  /* match the reference count in the if above */
     }
 
-    PyArray_Descr *descrs[3];
-    PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
-            arr, out, signature, NPY_FALSE, descrs, "reduce");
-    if (ufuncimpl == NULL) {
-        Py_DECREF(initial);
-        return NULL;
-    }
-
     PyArrayMethod_Context context = {
         .caller = (PyObject *)ufunc,
         .method = ufuncimpl,
         .descriptors = descrs,
     };
 
-    PyArrayObject *result = PyUFunc_ReduceWrapper(&context,
+    result = PyUFunc_ReduceWrapper(&context,
             arr, out, wheremask, axis_flags, reorderable, keepdims,
             initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask);
 
+  finish:
     for (int i = 0; i < 3; i++) {
         Py_DECREF(descrs[i]);
     }
-    Py_DECREF(initial);
+    Py_XDECREF(initial);
     return result;
 }
 

diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
@@ -375,7 +375,7 @@ def test_default_dtype_instance(self, dtype_char):
     @pytest.mark.parametrize("dtype", np.typecodes["Integer"])
     @pytest.mark.parametrize(["scalar", "error"],
             [(np.float64(np.nan), ValueError),
-             (np.ulonglong(-1), OverflowError)])
+             (np.array(-1).astype(np.ulonglong)[()], OverflowError)])
     def test_scalar_to_int_coerce_does_not_cast(self, dtype, scalar, error):
         """
         Signed integers are currently different in that they do not cast other

diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py
@@ -169,6 +169,9 @@ def get_data(self, dtype1, dtype2):
 
         for i, value in enumerate(values):
             # Use item assignment to ensure this is not using casting:
+            if value < 0 and dtype1.kind == "u":
+                # Manually rollover unsigned integers (-1 -> int.max)
+                value = value + np.iinfo(dtype1).max + 1
             arr1[i] = value
 
         if dtype2 is None:
@@ -185,6 +188,9 @@ def get_data(self, dtype1, dtype2):
 
         for i, value in enumerate(values):
             # Use item assignment to ensure this is not using casting:
+            if value < 0 and dtype2.kind == "u":
+                # Manually rollover unsigned integers (-1 -> int.max)
+                value = value + np.iinfo(dtype2).max + 1
             arr2[i] = value
 
         return arr1, arr2, values