DOC: Remove repeated words and wrong /it's/ usage (pandas-dev#38047)

ricoyudog · Nov 26, 2020 · e9c91f1 · e9c91f1
1 parent acdc94f
commit e9c91f1
Show file tree

Hide file tree

Showing 46 changed files with 62 additions and 64 deletions.
diff --git a/doc/source/development/policies.rst b/doc/source/development/policies.rst
@@ -35,7 +35,7 @@ We will not introduce new deprecations in patch releases.
 Deprecations will only be enforced in **major** releases. For example, if a
 behavior is deprecated in pandas 1.2.0, it will continue to work, with a
 warning, for all releases in the 1.x series. The behavior will change and the
-deprecation removed in the next next major release (2.0.0).
+deprecation removed in the next major release (2.0.0).
 
 .. note::
 

diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst
@@ -439,7 +439,7 @@ Data Classes as introduced in `PEP557 <https://www.python.org/dev/peps/pep-0557>
 can be passed into the DataFrame constructor.
 Passing a list of dataclasses is equivalent to passing a list of dictionaries.
 
-Please be aware, that that all values in the list should be dataclasses, mixing
+Please be aware, that all values in the list should be dataclasses, mixing
 types in the list would result in a TypeError.
 
 .. ipython:: python

diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst
@@ -117,7 +117,7 @@ dtype if needed.
    # coerce when needed
    s + 0.01
 
-These dtypes can operate as part of of ``DataFrame``.
+These dtypes can operate as part of ``DataFrame``.
 
 .. ipython:: python
 

diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst
@@ -419,7 +419,7 @@ Bug fixes
 ~~~~~~~~~
 
   - Plotting functions now raise a ``TypeError`` before trying to plot anything
-    if the associated objects have have a dtype of ``object`` (:issue:`1818`,
+    if the associated objects have a dtype of ``object`` (:issue:`1818`,
     :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to
     numeric arrays if possible so that you can still plot, for example, an
     object array with floats. This happens before any drawing takes place which
@@ -430,8 +430,8 @@ Bug fixes
 
   - ``Series.str`` now supports iteration (:issue:`3638`). You can iterate over the
     individual elements of each string in the ``Series``. Each iteration yields
-    yields a ``Series`` with either a single character at each index of the
-    original ``Series`` or ``NaN``. For example,
+    a ``Series`` with either a single character at each index of the original
+    ``Series`` or ``NaN``. For example,
 
     .. ipython:: python
         :okwarning:

diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst
@@ -923,7 +923,7 @@ Bug fixes
 - ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`)
 - ``HDFStore.select_as_coordinates`` and ``select_column`` works with a ``where`` clause that results in filters (:issue:`6177`)
 - Regression in join of non_unique_indexes (:issue:`6329`)
-- Issue with groupby ``agg`` with a single function and a a mixed-type frame (:issue:`6337`)
+- Issue with groupby ``agg`` with a single function and a mixed-type frame (:issue:`6337`)
 - Bug in ``DataFrame.replace()`` when passing a non- ``bool``
   ``to_replace`` argument (:issue:`6332`)
 - Raise when trying to align on different levels of a MultiIndex assignment (:issue:`3738`)

diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst
@@ -136,7 +136,7 @@ Enhancements
 
 - Added ability to export Categorical data to Stata (:issue:`8633`).  See :ref:`here <io.stata-categorical>` for limitations of categorical variables exported to Stata data files.
 - Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`).  See :ref:`here <io.stata-categorical>` for more information on importing categorical variables from Stata data files.
-- Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here <io.hdf5-categorical>` for an example and caveats w.r.t. prior versions of pandas.
+- Added ability to export Categorical data to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here <io.hdf5-categorical>` for an example and caveats w.r.t. prior versions of pandas.
 - Added support for ``searchsorted()`` on ``Categorical`` class (:issue:`8420`).
 
 Other enhancements:

diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst
@@ -6,7 +6,7 @@ Version 0.16.1 (May 11, 2015)
 {{ header }}
 
 
-This is a minor bug-fix release from 0.16.0 and includes a a large number of
+This is a minor bug-fix release from 0.16.0 and includes a large number of
 bug fixes along several new features, enhancements, and performance improvements.
 We recommend that all users upgrade to this version.
 
@@ -72,7 +72,7 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv
     Out[4]: Index(['c', 'a', 'b'], dtype='object')
 
 
-setting the index, will create create a ``CategoricalIndex``
+setting the index, will create a ``CategoricalIndex``
 
 .. code-block:: ipython
 

diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst
@@ -6,7 +6,7 @@ Version 0.16.2 (June 12, 2015)
 {{ header }}
 
 
-This is a minor bug-fix release from 0.16.1 and includes a a large number of
+This is a minor bug-fix release from 0.16.1 and includes a large number of
 bug fixes along some new features (:meth:`~DataFrame.pipe` method), enhancements, and performance improvements.
 
 We recommend that all users upgrade to this version.

diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst
@@ -610,7 +610,7 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
    pd.Timestamp('2012-01-01') - ser
 
 
-``NaT.isoformat()`` now returns ``'NaT'``. This change allows allows
+``NaT.isoformat()`` now returns ``'NaT'``. This change allows
 ``pd.Timestamp`` to rehydrate any timestamp like object from its isoformat
 (:issue:`12300`).
 

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -1167,7 +1167,7 @@ Other API changes
 - ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`)
 - ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`)
 - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
-- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
+- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than one byte (:issue:`11592`)
 - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)
 - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype``
 - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`)
@@ -1663,11 +1663,11 @@ Indexing
 - Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`)
 - Bug in ``.reset_index()`` when raising error for index name already present in ``MultiIndex`` columns (:issue:`16120`)
 - Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`)
-- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
+- Bug in the HTML display with a ``MultiIndex`` and truncation (:issue:`14882`)
 - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
 - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`)
 - Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`)
-- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`)
+- Bug in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`)
 - Bug in indexing with a scalar and a ``CategoricalIndex`` (:issue:`16123`)
 
 IO

diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst
@@ -50,7 +50,7 @@ Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, su
 dtypes, including extension dtypes such as datetime with timezones.
 
 This functionality depends on either the `pyarrow <http://arrow.apache.org/docs/python/>`__ or `fastparquet <https://fastparquet.readthedocs.io/en/latest/>`__ library.
-For more details, see see :ref:`the IO docs on Parquet <io.parquet>`.
+For more details, see :ref:`the IO docs on Parquet <io.parquet>`.
 
 
 .. _whatsnew_0210.enhancements.infer_objects:

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1622,7 +1622,7 @@ Timedelta
 - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`, :issue:`22163`)
 - Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`)
 - Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
-- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
+- Bug in :class:`Series` with numeric dtype when adding or subtracting an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
 - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
 - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`)
 - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`)
@@ -1868,7 +1868,7 @@ Reshaping
 - :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`)
 - Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`)
 - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`)
-- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
+- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
 - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
 - Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`)
 - Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)

diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst
@@ -15,7 +15,7 @@ New features
 ~~~~~~~~~~~~
 - :ref:`Added <reshaping.melt>` ``melt`` function to ``pandas.core.reshape``
 - :ref:`Added <groupby.multiindex>` ``level`` parameter to group by level in Series and DataFrame descriptive statistics (:issue:`313`)
-- :ref:`Added <basics.head_tail>` ``head`` and ``tail`` methods to Series, analogous to to DataFrame (:issue:`296`)
+- :ref:`Added <basics.head_tail>` ``head`` and ``tail`` methods to Series, analogous to DataFrame (:issue:`296`)
 - :ref:`Added <indexing.boolean>` ``Series.isin`` function which checks if each value is contained in a passed sequence (:issue:`289`)
 - :ref:`Added <io.formatting>` ``float_format`` option to ``Series.to_string``
 - :ref:`Added <io.parse_dates>` ``skip_footer`` (:issue:`291`) and ``converters`` (:issue:`343`) options to ``read_csv`` and ``read_table``

diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst
@@ -81,7 +81,7 @@ Time Series changes and improvements
   timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time
   zone set will be localized to local time. Time zone conversions are therefore
   essentially free. User needs to know very little about pytz library now; only
-  time zone names as as strings are required. Time zone-aware timestamps are
+  time zone names as strings are required. Time zone-aware timestamps are
   equal if and only if their UTC timestamps match. Operations between time
   zone-aware time series with different time zones will result in a UTC-indexed
   time series.

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -1768,7 +1768,7 @@ def box_expected(expected, box_cls, transpose=True):
     elif box_cls is pd.DataFrame:
         expected = pd.Series(expected).to_frame()
         if transpose:
-            # for vector operations, we we need a DataFrame to be a single-row,
+            # for vector operations, we need a DataFrame to be a single-row,
             #  not a single-column, in order to operate against non-DataFrame
             #  vectors of the same length.
             expected = expected.T

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -458,7 +458,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
     # Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array),
     # in1d is faster for small sizes
     if len(comps) > 1_000_000 and len(values) <= 26 and not is_object_dtype(comps):
-        # If the the values include nan we need to check for nan explicitly
+        # If the values include nan we need to check for nan explicitly
         # since np.nan it not equal to np.nan
         if isna(values).any():
             f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
@@ -1563,7 +1563,7 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None)
 
         * True: negative values in `indices` indicate
           missing values. These values are set to `fill_value`. Any other
-          other negative values raise a ``ValueError``.
+          negative values raise a ``ValueError``.
 
     fill_value : any, optional
         Fill value to use for NA-indices when `allow_fill` is True.

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -77,7 +77,7 @@ def func(self, other):
                     "Unordered Categoricals can only compare equality or not"
                 )
         if isinstance(other, Categorical):
-            # Two Categoricals can only be be compared if the categories are
+            # Two Categoricals can only be compared if the categories are
             # the same (maybe up to ordering, depending on ordered)
 
             msg = "Categoricals can only be compared if 'categories' are the same."

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -120,7 +120,7 @@ def coerce_to_array(
     -------
     tuple of (values, mask)
     """
-    # if values is floating numpy array, preserve it's dtype
+    # if values is floating numpy array, preserve its dtype
     if dtype is None and hasattr(values, "dtype"):
         if is_float_dtype(values.dtype):
             dtype = values.dtype

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -183,7 +183,7 @@ def coerce_to_array(
     -------
     tuple of (values, mask)
     """
-    # if values is integer numpy array, preserve it's dtype
+    # if values is integer numpy array, preserve its dtype
     if dtype is None and hasattr(values, "dtype"):
         if is_integer_dtype(values.dtype):
             dtype = values.dtype

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -144,7 +144,7 @@ class PandasArray(
 
     # If you're wondering why pd.Series(cls) doesn't put the array in an
     # ExtensionBlock, search for `ABCPandasArray`. We check for
-    # that _typ to ensure that that users don't unnecessarily use EAs inside
+    # that _typ to ensure that users don't unnecessarily use EAs inside
     # pandas internals, which turns off things like block consolidation.
     _typ = "npy_extension"
     __array_priority__ = 1000

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
@@ -99,9 +99,8 @@ def __eq__(self, other: Any) -> bool:
         By default, 'other' is considered equal if either
 
         * it's a string matching 'self.name'.
-        * it's an instance of this type and all of the
-          the attributes in ``self._metadata`` are equal between
-          `self` and `other`.
+        * it's an instance of this type and all of the attributes
+          in ``self._metadata`` are equal between `self` and `other`.
 
         Parameters
         ----------

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -391,7 +391,7 @@ def maybe_cast_to_extension_array(
     assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"
     assert issubclass(cls, ABCExtensionArray), assertion_msg
 
-    # Everything can be be converted to StringArrays, but we may not want to convert
+    # Everything can be converted to StringArrays, but we may not want to convert
     if (
         issubclass(cls, (StringArray, ArrowStringArray))
         and lib.infer_dtype(obj) != "string"
@@ -1200,7 +1200,7 @@ def soft_convert_objects(
     elif conversion_count > 1 and coerce:
         raise ValueError(
             "Only one of 'datetime', 'numeric' or "
-            "'timedelta' can be True when when coerce=True."
+            "'timedelta' can be True when coerce=True."
         )
 
     if not is_object_dtype(values.dtype):

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1727,7 +1727,7 @@ def _validate_date_like_dtype(dtype) -> None:
     ------
     TypeError : The dtype could not be casted to a date-like dtype.
     ValueError : The dtype is an illegal date-like dtype (e.g. the
-                 the frequency provided is too specific)
+                 frequency provided is too specific)
     """
     try:
         typ = np.datetime_data(dtype)[0]

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -47,7 +47,7 @@ class PandasExtensionDtype(ExtensionDtype):
     type: Any
     kind: Any
     # The Any type annotations above are here only because mypy seems to have a
-    # problem dealing with with multiple inheritance from PandasExtensionDtype
+    # problem dealing with multiple inheritance from PandasExtensionDtype
     # and ExtensionDtype's @properties in the subclasses below. The kind and
     # type variables in those subclasses are explicitly typed below.
     subdtype = None

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6532,7 +6532,7 @@ def update(
         1  b  e
         2  c  f
 
-        For Series, it's name attribute must be set.
+        For Series, its name attribute must be set.
 
         >>> df = pd.DataFrame({'A': ['a', 'b', 'c'],
         ...                    'B': ['x', 'y', 'z']})

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1114,7 +1114,7 @@ def rename_axis(self, mapper=lib.no_default, **kwargs):
         In this case, the parameter ``copy`` is ignored.
 
         The second calling convention will modify the names of the
-        the corresponding index if mapper is a list or a scalar.
+        corresponding index if mapper is a list or a scalar.
         However, if mapper is dict-like or a function, it will use the
         deprecated behavior of modifying the axis *labels*.
 
@@ -2722,7 +2722,7 @@ def to_sql(
         >>> engine.execute("SELECT * FROM users").fetchall()
         [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
 
-        An `sqlalchemy.engine.Connection` can also be passed to to `con`:
+        An `sqlalchemy.engine.Connection` can also be passed to `con`:
 
         >>> with engine.begin() as connection:
         ...     df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
@@ -5490,7 +5490,7 @@ def __setattr__(self, name: str, value) -> None:
     def _dir_additions(self) -> Set[str]:
         """
         add the string-like attributes from the info_axis.
-        If info_axis is a MultiIndex, it's first level values are used.
+        If info_axis is a MultiIndex, its first level values are used.
         """
         additions = super()._dir_additions()
         if self._info_axis._can_hold_strings:

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -262,7 +262,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                 return self._python_agg_general(func, *args, **kwargs)
             except (ValueError, KeyError):
                 # TODO: KeyError is raised in _python_agg_general,
-                #  see see test_groupby.test_basic
+                #  see test_groupby.test_basic
                 result = self._aggregate_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
@@ -1390,8 +1390,7 @@ def _transform_fast(self, result: DataFrame) -> DataFrame:
         """
         obj = self._obj_with_exclusions
 
-        # for each col, reshape to to size of original frame
-        # by take operation
+        # for each col, reshape to size of original frame by take operation
         ids, _, ngroup = self.grouper.group_info
         result = result.reindex(self.grouper.result_index, copy=False)
         output = [