BUG: fix numpy#4381: precision loss on string -> longdouble conversion

Avoid going through python floats when converting string to longdouble. This makes it dramatically easier to produce full-precision long double numbers. Fixed are the constructor (np.longdouble("1.01")), np.fromfile, np.fromstring, np.loadtxt, and np.genfromtxt (and functions based on it). Also fixed is precision loss when using np.tofile. This also fixes numpy#1481, poor handling of bad data in fromfile and fromstring. If the function strtod_l is not available, almost none of this will work, and many tests will fail.
miclark · Aug 28, 2015 · 6cbd724 · 6cbd724
1 parent b478ded
commit 6cbd724
Show file tree

Hide file tree

Showing 10 changed files with 494 additions and 22 deletions.
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
@@ -104,7 +104,7 @@ def check_api_version(apiversion, codegen_dir):
 OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh",
         "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow",
         "copysign", "nextafter", "ftello", "fseeko",
-        "strtoll", "strtoull", "cbrt"]
+        "strtoll", "strtoull", "cbrt", "strtold_l",]
 
 
 OPTIONAL_HEADERS = [

diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
@@ -33,7 +33,6 @@
 #include "npy_cblas.h"
 #include <limits.h>
 
-
 /*
  *****************************************************************************
  **                        PYTHON TYPES TO C TYPES                          **
@@ -309,6 +308,54 @@ static int
 
 /**end repeat**/
 
+static NPY_INLINE npy_longdouble
+string_to_long_double(PyObject*op)
+{
+    char *s;
+    char *end;
+    npy_longdouble temp;
+    PyObject* b;
+
+    if (PyUnicode_Check(op)) {
+        b = PyUnicode_AsUTF8String(op);
+        if (!b) {
+            return 0;
+        }
+    }
+    else {
+        b = op;
+        Py_XINCREF(b);
+    }
+    s = PyBytes_AsString(b);
+    if (s) {
+        errno = 0;
+        temp = NumPyOS_ascii_strtold(s, &end);
+        if (end==s || *end) {
+            PyErr_Format(PyExc_ValueError,
+                         "invalid literal for long double: %s",
+                         s);
+            Py_XDECREF(b);
+            return 0;
+        }
+        else if (errno) {
+            PyErr_Format(PyExc_ValueError,
+                         "invalid literal for long double: %s (%s)",
+                         s,
+                         strerror(errno));
+            Py_XDECREF(b);
+            return 0;
+        }
+        Py_XDECREF(b);
+    }
+    else {
+        /* Probably wasn't a string, try converting it via a python double */
+        PyErr_Clear();
+        Py_XDECREF(b);
+        temp = (npy_longdouble) MyPyFloat_AsDouble(op);
+    }
+    return temp;
+}
+
 /*
  * These return array scalars which are different than other date-types.
  */
@@ -330,7 +377,11 @@ LONGDOUBLE_setitem(PyObject *op, void *ov, void *vap)
         temp = ((PyLongDoubleScalarObject *)op)->obval;
     }
     else {
-        temp = (npy_longdouble) MyPyFloat_AsDouble(op);
+        /* In case something funny happened in PyArray_IsScalar */
+        if (PyErr_Occurred()) {
+            return -1;
+        }
+        temp = string_to_long_double(op);
     }
     if (PyErr_Occurred()) {
         return -1;
@@ -1572,8 +1623,8 @@ static int
 /**end repeat**/
 
 /**begin repeat
- * #fname = FLOAT, DOUBLE, LONGDOUBLE#
- * #type = npy_float, npy_double, npy_longdouble#
+ * #fname = FLOAT, DOUBLE#
+ * #type = npy_float, npy_double#
  */
 static int
 @fname@_scan(FILE *fp, @type@ *ip, void *NPY_UNUSED(ignore),
@@ -1588,6 +1639,18 @@ static int
 }
 /**end repeat**/
 
+static int
+LONGDOUBLE_scan(FILE *fp, npy_longdouble *ip, void *NPY_UNUSED(ignore),
+        PyArray_Descr *NPY_UNUSED(ignored))
+{
+    long double result;
+    int ret;
+
+    ret = NumPyOS_ascii_ftoLf(fp, &result);
+    *ip = (npy_longdouble) result;
+    return ret;
+}
+
 static int
 HALF_scan(FILE *fp, npy_half *ip, void *NPY_UNUSED(ignore),
         PyArray_Descr *NPY_UNUSED(ignored))
@@ -1675,8 +1738,8 @@ static int
 
 /**begin repeat
  *
- * #fname = FLOAT, DOUBLE, LONGDOUBLE#
- * #type = npy_float, npy_double, npy_longdouble#
+ * #fname = FLOAT, DOUBLE#
+ * #type = npy_float, npy_double#
  */
 static int
 @fname@_fromstr(char *str, void *ip, char **endptr,
@@ -1690,6 +1753,17 @@ static int
 }
 /**end repeat**/
 
+static int
+LONGDOUBLE_fromstr(char *str, void *ip, char **endptr,
+        PyArray_Descr *NPY_UNUSED(ignore))
+{
+    long double result;
+
+    result = NumPyOS_ascii_strtold(str, endptr);
+    *(npy_longdouble *)ip = result;
+    return 0;
+}
+
 static int
 HALF_fromstr(char *str, void *ip, char **endptr,
         PyArray_Descr *NPY_UNUSED(ignore))

diff --git a/numpy/core/src/multiarray/convert.c b/numpy/core/src/multiarray/convert.c
@@ -171,7 +171,7 @@ PyArray_ToFile(PyArrayObject *self, FILE *fp, char *sep, char *format)
                 /*
                  * standard writing
                  */
-                strobj = PyObject_Str(obj);
+                strobj = PyObject_Repr(obj);
                 Py_DECREF(obj);
                 if (strobj == NULL) {
                     Py_DECREF(it);

diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
@@ -45,7 +45,18 @@ static int
 fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype,
                      const char *end)
 {
-    int r = dtype->f->fromstr(*s, dptr, s, dtype);
+    char *e = *s;
+    int r = dtype->f->fromstr(*s, dptr, &e, dtype);
+    /*
+     * fromstr always returns 0 for basic dtypes
+     * s points to the end of the parsed string
+     * if an error occurs s is not changed
+     */
+    if (*s == e) {
+        /* Nothing read */
+        return -1;
+    }
+    *s = e;
     if (end != NULL && *s > end) {
         return -1;
     }
@@ -57,7 +68,14 @@ fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype,
                       void *NPY_UNUSED(stream_data))
 {
     /* the NULL argument is for backwards-compatibility */
-    return dtype->f->scanfunc(*fp, dptr, NULL, dtype);
+    int r = dtype->f->scanfunc(*fp, dptr, NULL, dtype);
+    /* r can be EOF or the number of items read (0 or 1) */
+    if (r == 1) {
+        return 0;
+    }
+    else {
+        return -1;
+    }
 }
 
 /*
@@ -3279,6 +3297,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char *sep, size_t *nread,
     dptr = PyArray_DATA(r);
     for (i= 0; num < 0 || i < num; i++) {
         if (next(&stream, dptr, dtype, stream_data) < 0) {
+            /* EOF */
             break;
         }
         *nread += 1;