Merge pull request pandas-dev#5601 from jreback/na_values

TST/API: test the list of NA values in the csv parser. add N/A, #NA as independent default values (GH5521)
bingo · Nov 27, 2013 · 53e61c6 · 53e61c6
2 parents d057fc9 + 3989060
commit 53e61c6
Show file tree

Hide file tree

Showing 4 changed files with 30 additions and 3 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -564,7 +564,7 @@ the corresponding equivalent values will also imply a missing value (in this cas
 ``[5.0,5]`` are recognized as ``NaN``.
 
 To completely override the default values that are recognized as missing, specify ``keep_default_na=False``.
-The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', 'NA',
+The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A','N/A', 'NA',
 '#NA', 'NULL', 'NaN', 'nan']``.
 
 .. code-block:: python

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -126,7 +126,7 @@ Improvements to existing features
     (:issue:`4039`) with improved validation for all (:issue:`4039`,
     :issue:`4794`)
   - A Series of dtype ``timedelta64[ns]`` can now be divided/multiplied
-    by an integer series (:issue`4521`)
+    by an integer series (:issue:`4521`)
   - A Series of dtype ``timedelta64[ns]`` can now be divided by another
     ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
     is frequency conversion; astyping is also supported.
@@ -410,6 +410,8 @@ API Changes
 
   - raise/warn ``SettingWithCopyError/Warning`` exception/warning when setting of a
     copy thru chained assignment is detected, settable via option ``mode.chained_assignment``
+  - test the list of ``NA`` values in the csv parser. add ``N/A``, ``#NA`` as independent default
+    na values (:issue:`5521`)
 
 Internal Refactoring
 ~~~~~~~~~~~~~~~~~~~~

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -438,7 +438,7 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, **kwds):
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
 _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
-                 '#N/A N/A', 'NA', '#NA', 'NULL', 'NaN',
+                 '#N/A','N/A', 'NA', '#NA', 'NULL', 'NaN',
                  'nan', ''])
 
 

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -683,6 +683,31 @@ def test_non_string_na_values(self):
             tm.assert_frame_equal(result6,good_compare)
             tm.assert_frame_equal(result7,good_compare)
 
+    def test_default_na_values(self):
+        _NA_VALUES = set(['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN',
+                          '#N/A','N/A', 'NA', '#NA', 'NULL', 'NaN',
+                          'nan', ''])
+
+        nv = len(_NA_VALUES)
+        def f(i, v):
+            if i == 0:
+                buf = ''
+            elif i > 0:
+                buf = ''.join([','] * i)
+
+            buf = "{0}{1}".format(buf,v)
+
+            if i < nv-1:
+                buf = "{0}{1}".format(buf,''.join([','] * (nv-i-1)))
+
+            return buf
+
+        data = StringIO('\n'.join([ f(i, v) for i, v in enumerate(_NA_VALUES) ]))
+
+        expected = DataFrame(np.nan,columns=range(nv),index=range(nv))
+        df = self.read_csv(data, header=None)
+        tm.assert_frame_equal(df, expected)
+
     def test_custom_na_values(self):
         data = """A,B,C
 ignore,this,row