From 90b9145ac16af174216595f83c26904926f6b299 Mon Sep 17 00:00:00 2001
From: tailaiw <29800495+tailaiw@users.noreply.github.com>
Date: Tue, 18 Feb 2020 14:01:05 -0600
Subject: [PATCH] Optimized the logic applying univariate model to DataFrame
 (#67)

* optimized applying univariate model to DF

* updated version number and changelogs

* minor optimization

* Fixed a bug that model trained with Series cannot be applied to DataFrame due to name matching error

* modified docstrings

* updated version number

* updated changelog
---
 docs/conf.py                           |  2 +-
 docs/releasehistory.rst                |  7 +++
 setup.cfg                              |  2 +-
 src/adtk/__init__.py                   |  2 +-
 src/adtk/_base.py                      | 79 +++++++++++++++++++++-----
 src/adtk/_detector_base.py             | 21 ++++---
 src/adtk/_transformer_base.py          | 16 ++++--
 src/adtk/detector/detector_1d.py       | 77 +------------------------
 src/adtk/transformer/transformer_1d.py | 60 +------------------
 tests/test_detector1d.py               | 21 +++++++
 tests/test_series_name.py              | 44 ++++++++++++--
 11 files changed, 164 insertions(+), 167 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index d6fabd9..51b976d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -66,7 +66,7 @@
 # The short X.Y version.
 version = "0.5"
 # The full version, including alpha/beta/rc tags.
-release = "0.5.3"
+release = "0.5.4"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst
index b86b061..f62eb85 100644
--- a/docs/releasehistory.rst
+++ b/docs/releasehistory.rst
@@ -2,6 +2,13 @@
 Release History
 ***************
 
+Version 0.5.4 (Feb 18, 2020)
+===================================
+- Optimized the workflow of how a univariate model is applied to pandas DataFrame
+    - Added more informative error messages
+    - Fixed some bugs resulting in model-column matching error due to inconsistency between output Series names and DataFrame columns
+    - Clarified the workflow in the documentation
+
 Version 0.5.3 (Feb 12, 2020)
 ===================================
 - Quick hotfix to avoid errors caused by statsmodels v0.11 by requiring statsmodels dependency <0.11
diff --git a/setup.cfg b/setup.cfg
index 2127f22..3ad827b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = adtk
-version = 0.5.3
+version = 0.5.4
 author = Arundo Analytics, Inc.
 maintainer = Tailai Wen
 maintainer_email = tailai.wen@arundo.com
diff --git a/src/adtk/__init__.py b/src/adtk/__init__.py
index ef749f2..19cf1b7 100644
--- a/src/adtk/__init__.py
+++ b/src/adtk/__init__.py
@@ -20,4 +20,4 @@
 
 """
 
-__version__ = "0.5.3"
+__version__ = "0.5.4"
diff --git a/src/adtk/_base.py b/src/adtk/_base.py
index cdb57ca..268663c 100644
--- a/src/adtk/_base.py
+++ b/src/adtk/_base.py
@@ -11,7 +11,9 @@ class _Model(ABC):
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
-            self._fitted = False
+        self._fitted = (
+            0
+        )  # 0 for not fitted, 1 for fitted, 2 for univariate model fitted by DF
 
     @abstractmethod
     def _fit(self, ts):
@@ -91,21 +93,35 @@ def _fit(self, ts):
             s = ts.copy()
             self._fit_core(s)
             self._models = None
+            self._fitted = 1
         elif isinstance(ts, pd.DataFrame):
             df = ts.copy()
+            if df.columns.duplicated().any():
+                raise ValueError(
+                    "Input DataFrame must have unique column names."
+                )
             if self._need_fit:
                 self._update_models(df.columns)
                 # fit model for each column
                 for col in df.columns:
                     self._models[col].fit(df[col])
+                self._fitted = 2
+            else:
+                pass
         else:
             raise TypeError("Input must be a pandas Series or DataFrame.")
-        self._fitted = True
 
     def _predict(self, ts):
-        if self._need_fit and (not self._fitted):
+        if self._need_fit and (self._fitted == 0):
             raise RuntimeError("The model must be trained first.")
         if isinstance(ts, pd.Series):
+            if self._need_fit and (
+                self._fitted == 2
+            ):  # fitted by DF, to be applied to Series
+                raise RuntimeError(
+                    "The model was trained by a pandas DataFrame object, "
+                    "it can only be applied to a pandas DataFrame object."
+                )
             s = ts.copy()
             predicted = self._predict_core(s)
             # if a Series-to-Series operation, make sure Series name keeps
@@ -113,15 +129,42 @@ def _predict(self, ts):
                 predicted.name = ts.name
         elif isinstance(ts, pd.DataFrame):
             df = ts.copy()
-            # if the model doesn't neef fit, initialize or reset a model for
-            # each column
-            if not self._need_fit:
-                self._update_models(df.columns)
-            # predict for each column
-            predicted = pd.concat(
-                [self._models[col]._predict(df[col]) for col in df.columns],
-                axis=1,
-            )
+            if df.columns.duplicated().any():
+                raise ValueError(
+                    "Input DataFrame must have unique column names."
+                )
+            if (not self._need_fit) or (self._fitted == 1):
+                # apply the model to each column
+                predicted = []
+                for col in df.columns:
+                    predicted_this_col = self._predict(df[col])
+                    if isinstance(predicted_this_col, pd.DataFrame):
+                        predicted_this_col = predicted_this_col.rename(
+                            columns={
+                                col1: "{}_{}".format(col, col1)
+                                for col1 in predicted_this_col.columns
+                            }
+                        )
+                    predicted.append(predicted_this_col)
+                predicted = pd.concat(predicted, axis=1)
+            else:
+                # predict for each column
+                if not (set(self._models.keys()) >= set(df.columns)):
+                    raise ValueError(
+                        "The model was trained by a pandas DataFrame with "
+                        "columns {}, but the input DataFrame contains columns "
+                        "{} which are unknown to the model.".format(
+                            list(set(self._models.keys())),
+                            list(set(df.columns) - set(self._models.keys())),
+                        )
+                    )
+                predicted = pd.concat(
+                    [
+                        self._models[col]._predict(df[col])
+                        for col in df.columns
+                    ],
+                    axis=1,
+                )
         else:
             raise TypeError("Input must be a pandas Series or DataFrame.")
         # make sure index freq is the same (because pandas has a bug that some
@@ -153,16 +196,24 @@ def fit_predict(self, ts):
 class _ModelHD(_Model):
     def _fit(self, df):
         if isinstance(df, pd.DataFrame):
+            if df.columns.duplicated().any():
+                raise ValueError(
+                    "Input DataFrame must have unique column names."
+                )
             df_copy = df.copy()
             self._fit_core(df_copy)
         else:
             raise TypeError("Input must be a pandas DataFrame.")
-        self._fitted = True
+        self._fitted = 1
 
     def _predict(self, df):
-        if self._need_fit and (not self._fitted):
+        if self._need_fit and (self._fitted == 0):
             raise RuntimeError("The model must be trained first.")
         if isinstance(df, pd.DataFrame):
+            if df.columns.duplicated().any():
+                raise ValueError(
+                    "Input DataFrame must have unique column names."
+                )
             df_copy = df.copy()
             predicted = self._predict_core(df_copy)
         else:
diff --git a/src/adtk/_detector_base.py b/src/adtk/_detector_base.py
index d855290..e9eb4bf 100644
--- a/src/adtk/_detector_base.py
+++ b/src/adtk/_detector_base.py
@@ -25,9 +25,14 @@ def detect(self, ts, return_list=False):
         Parameters
         ----------
         ts: pandas.Series or pandas.DataFrame
-            Time series to detect anomalies from.
-            If a DataFrame with k columns, k univariate detectors will be
-            applied to them independently.
+            Time series to detect anomalies from. If a DataFrame with k
+            columns, it is treated as k independent univariate time series.
+
+            - If the detector was trained with a Series, the detector will be
+              applied to each univariate series independently;
+            - If the detector was trained with a DataFrame, i.e. the detector
+              is essentially k detectors, those detectors will be applied to
+              each univariate series respectivley.
 
         return_list: bool, optional
             Whether to return a list of anomalous time stamps, or a binary
@@ -66,8 +71,9 @@ def fit_detect(self, ts, return_list=False):
         ----------
         ts: pandas.Series or pandas.DataFrame
             Time series to be used for training and be detected for anomalies.
-            If a DataFrame with k columns, k univariate detectors will be
-            trained and applied to them independently.
+            If a DataFrame with k columns, it is treated as k independent
+            univariate time series, and k univariate detectors will be trained
+            and applied to each series independently.
 
         return_list: bool, optional
             Whether to return a list of anomalous time stamps, or a binary
@@ -109,8 +115,9 @@ def score(self, ts, anomaly_true, scoring="recall", **kwargs):
         ----------
         ts: pandas Series or pandas.DataFrame
             Time series to detect anomalies from.
-            If a DataFrame with k columns, k univariate detectors will be
-            applied to them independently.
+            If a DataFrame with k columns, it is treated as k independent
+            univariate time series, and k univariate detectors will be trained
+            and applied to each series independently.
 
         anomaly_true: pandas.Series, pandas.DataFrame, list, or dict
             True anomalies.
diff --git a/src/adtk/_transformer_base.py b/src/adtk/_transformer_base.py
index ec49b2c..823cebf 100644
--- a/src/adtk/_transformer_base.py
+++ b/src/adtk/_transformer_base.py
@@ -21,9 +21,14 @@ def transform(self, ts):
         Parameters
         ----------
         ts: pandas.Series or pandas.DataFrame
-            Time series to be transformed.
-            If a DataFrame with k columns, k univariate transformers will be
-            applied to them independently.
+            Time series to be transformed. If a DataFrame with k columns, it is
+            treated as k independent univariate time series.
+
+            - If the transformer was trained with a Series, the transformer
+              will be applied to each univariate series independently;
+            - If the transformer was trained with a DataFrame, i.e. the
+              transformer is essentially k transformers, those transformers
+              will be applied to each univariate series respectivley.
 
         Returns
         -------
@@ -41,8 +46,9 @@ def fit_transform(self, ts):
         ----------
         ts: pandas.Series or pandas.DataFrame
             Time series to be used for training and be transformed.
-            If a DataFrame with k columns, k univariate transformers will be
-            applied to them independently.
+            If a DataFrame with k columns, it is treated as k independent
+            univariate time series, and k univariate transformers will be
+            trained and applied to each series independently.
 
         Returns
         -------
diff --git a/src/adtk/detector/detector_1d.py b/src/adtk/detector/detector_1d.py
index 48151ab..d795d46 100644
--- a/src/adtk/detector/detector_1d.py
+++ b/src/adtk/detector/detector_1d.py
@@ -39,13 +39,6 @@
 class CustomizedDetector1D(_Detector1D):
     """Detector derived from a user-given function and parameters.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     detect_func: function
@@ -133,13 +126,6 @@ class ThresholdAD(_Detector1D):
     This detector compares time series values with user-given thresholds, and
     identifies time points as anomalous when values are beyond the thresholds.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     low: float, optional
@@ -178,13 +164,6 @@ class QuantileAD(_Detector1D):
     of historical data, and identifies time points as anomalous when values
     are beyond the thresholds.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     low: float, optional
@@ -239,13 +218,6 @@ class InterQuartileRangeAD(_Detector1D):
     historical data, and identifies time points as anomalous when differences
     are beyond the inter-quartile range times a user-given factor c.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     c: float, or 2-tuple (float, float), optional
@@ -317,13 +289,6 @@ class GeneralizedESDTestAD(_Detector1D):
     follow an approximately normal distribution. Please only use this detector
     when this assumption holds.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     [1] Rosner, Bernard (May 1983), Percentage Points for a Generalized ESD
     Many-Outlier Procedure,Technometrics, 25(2), pp. 165-172.
 
@@ -412,13 +377,6 @@ class PersistAD(_Detector1D):
     This detector is internally implemented as a `Pipenet` object. Advanced
     users may learn more details by checking attribute `pipe_`.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     window: int, optional
@@ -575,13 +533,6 @@ class LevelShiftAD(_Detector1D):
     This detector is internally implemented as a `Pipenet` object. Advanced
     users may learn more details by checking attribute `pipe_`.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     window: int, optional
@@ -723,13 +674,6 @@ class VolatilityShiftAD(_Detector1D):
     This detector is internally implemented as a `Pipenet` object. Advanced
     users may learn more details by checking attribute `pipe_`.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     window: int, optional
@@ -886,13 +830,6 @@ class AutoregressionAD(_Detector1D):
     This detector is internally implemented aattribute `pipe_`.nced
     users may learn more details by checking attribute `pipe_`.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     n_steps: int, optional
@@ -1042,13 +979,6 @@ class SeasonalAD(_Detector1D):
     This detector is internally implemented aattribute `pipe_`.nced
     users may learn more details by checking attribute `pipe_`.
 
-    This is an univariate detector. When it is applied to a multivariate time
-    series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     freq: int, optional
@@ -1084,12 +1014,7 @@ class SeasonalAD(_Detector1D):
 
     """
 
-    _default_params = {
-        "freq": None,
-        "side": "both",
-        "c": 3.0,
-        "trend": False,
-    }
+    _default_params = {"freq": None, "side": "both", "c": 3.0, "trend": False}
 
     def __init__(
         self,
diff --git a/src/adtk/transformer/transformer_1d.py b/src/adtk/transformer/transformer_1d.py
index ab1500c..4679282 100644
--- a/src/adtk/transformer/transformer_1d.py
+++ b/src/adtk/transformer/transformer_1d.py
@@ -49,14 +49,6 @@ class CustomizedTransformer1D(_Transformer1D):
     fit_func_params: dict, optional
         Parameters of fit_func. Default: None.
 
-
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     """
 
     _need_fit = False
@@ -125,10 +117,6 @@ class StandardScale(_Transformer1D):
     """Transformer that scales time series such that mean is equal to 0 and
     standard deviation is equal to 1.
 
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently.
-
     """
 
     _need_fit = False
@@ -153,13 +141,6 @@ class RollingAggregate(_Transformer1D):
     """Transformer that roll a sliding window along a time series, and
     aggregates using a user-selected operation.
 
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     agg: str or function
@@ -372,11 +353,7 @@ def agg_wrapped(x):
 
         if isinstance(s_rolling, pd.Series):
             s_rolling.name = s.name
-        else:
-            if s.name is not None:
-                s_rolling.columns = [
-                    "{}_{}".format(s.name, col) for col in s_rolling.columns
-                ]
+
         return s_rolling
 
 
@@ -385,13 +362,6 @@ class DoubleRollingAggregate(_Transformer1D):
     series, aggregates using a user-given operation, and calcuates the
     difference of aggregated metrics between two sliding windows.
 
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     agg: str, function, or tuple
@@ -698,14 +668,6 @@ class ClassicSeasonalDecomposition(_Transformer1D):
     seasonal_: pandas.Series
         Seasonal pattern extracted from training series.
 
-
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     """
 
     _default_params = {"freq": None, "trend": False}
@@ -859,13 +821,6 @@ def _predict_core(self, s):
 def _identify_seasonal_period(s, low_autocorr=0.1, high_autocorr=0.3):
     """Identify seasonal period of a time series based on autocorrelation.
 
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     s: pandas Series or DataFrame
@@ -925,13 +880,6 @@ class Retrospect(_Transformer1D):
     u_[t-5], and a series y_t are needed to learn the relationship between
     control and outcome.
 
-    This is an univariate transformer. When it is applied to a multivariate
-    time series (i.e. pandas DataFrame), it will be applied to every series
-    independently. All parameters can be defined as a dict object where key-
-    value pairs are series names (i.e. column names of DataFrame) and the
-    model parameter for that series. If not, then the same parameter will be
-    applied to all series.
-
     Parameters
     ----------
     n_steps: int, optional
@@ -1003,11 +951,7 @@ def _predict_core(self, s):
         df = pd.DataFrame(index=s.index)
         df = df.assign(
             **{
-                (
-                    "t-{}".format(i)
-                    if s.name is None
-                    else "{}_t-{}".format(s.name, i)
-                ): s.shift(i)
+                ("t-{}".format(i)): s.shift(i)
                 for i in range(till, till + n_steps * step_size, step_size)
             }
         )
diff --git a/tests/test_detector1d.py b/tests/test_detector1d.py
index 4cec3dc..386ab5f 100644
--- a/tests/test_detector1d.py
+++ b/tests/test_detector1d.py
@@ -382,6 +382,27 @@ def test_dataframe(testCase):
         pd.testing.assert_frame_equal(a, a_true, check_dtype=False)
 
 
+@pytest.mark.parametrize("testCase", testCases)
+def test_fit_series_predict_dataframe(testCase):
+    """Test fit the detector with a series and predict with dataframe."""
+    s = pd.Series(
+        testCase["s"],
+        pd.date_range(start="2017-1-1", periods=len(testCase["s"]), freq="D"),
+    )
+    df = pd.concat([s.rename("A"), s.rename("B")], axis=1)
+    model = testCase["model"](**testCase["params"])
+    a_true = pd.Series(testCase["a"], index=s.index)
+    a_true = pd.concat([a_true.rename("A"), a_true.rename("B")], axis=1)
+    if testCase["pandas_bug"] and (parse(pd.__version__) < parse("0.25")):
+        with pytest.raises(PandasBugError):
+            model.fit(s)
+            a = model.detect(df)
+    else:
+        model.fit(s)
+        a = model.detect(df)
+        pd.testing.assert_frame_equal(a, a_true, check_dtype=False)
+
+
 def test_autoregressive_ad_dataframe():
     """Make sure deepcopy works
     """
diff --git a/tests/test_series_name.py b/tests/test_series_name.py
index a92843b..dde3546 100644
--- a/tests/test_series_name.py
+++ b/tests/test_series_name.py
@@ -11,6 +11,12 @@
 from sklearn.cluster import KMeans
 from sklearn.linear_model import LinearRegression
 
+# We have 4 types of models
+#   - one-to-one: input a univariate series, output a univariate series
+#   - one-to-many: input a univariate series, output a multivariate series
+#   - many-to-one: input a multivariate series, output a univariate series
+#   - many-to-many: input a multivariate series, output a multivariate series
+
 one2one_models = [
     detector.ThresholdAD(),
     detector.QuantileAD(),
@@ -58,7 +64,11 @@
 
 
 @pytest.mark.parametrize("model", one2one_models)
-def test_one2one_s2s_wo_name(model):
+def test_one2one_s2s_w_name(model):
+    """
+    if a one-to-one model is applied to a Series, it should keep the Series
+    name unchanged
+    """
     s_name = pd.Series(
         np.arange(100),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
@@ -69,7 +79,11 @@ def test_one2one_s2s_wo_name(model):
 
 
 @pytest.mark.parametrize("model", one2one_models)
-def test_one2one_s2s_w_name(model):
+def test_one2one_s2s_wo_name(model):
+    """
+    if a one-to-one model is applied to a Series, it should keep the Series
+    name unchanged
+    """
     s_no_name = pd.Series(
         np.arange(100),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
@@ -80,6 +94,10 @@ def test_one2one_s2s_w_name(model):
 
 @pytest.mark.parametrize("model", one2one_models)
 def test_one2one_df2df(model):
+    """
+    if a one-to-one model is applied to a DataFrame, it should keep the column
+    names unchanged
+    """
     df = pd.DataFrame(
         np.arange(300).reshape(100, 3),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
@@ -91,6 +109,10 @@ def test_one2one_df2df(model):
 
 @pytest.mark.parametrize("model", one2one_models)
 def test_one2one_df2list(model):
+    """
+    if a one-to-one model (detector) is applied to a DataFrame and returns a
+    dict, the output dict keys should match the input column names
+    """
     if hasattr(model, "fit_detect"):
         df = pd.DataFrame(
             np.arange(300).reshape(100, 3),
@@ -106,18 +128,25 @@ def test_one2one_df2list(model):
 
 @pytest.mark.parametrize("model", one2many_models)
 def test_one2many_s2df_w_name(model):
+    """
+    if a one-to-many model is applied to a Series, the output should not have
+    prefix in column names, no matter whether the input Series has a name.
+    """
     s_name = pd.Series(
         np.arange(100),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
         name="A",
     )
     result = model.fit_predict(s_name)
-    assert all([col[:2] == "A_" for col in result.columns])
-    assert all([col[2:4] != "A_" for col in result.columns])
+    assert all([col[:2] != "A_" for col in result.columns])
 
 
 @pytest.mark.parametrize("model", one2many_models)
 def test_one2many_s2df_wo_name(model):
+    """
+    if a one-to-many model is applied to a Series, the output should not have
+    prefix in column names, no matter whether the input Series has a name.
+    """
     s_no_name = pd.Series(
         np.arange(100),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
@@ -128,6 +157,10 @@ def test_one2many_s2df_wo_name(model):
 
 @pytest.mark.parametrize("model", one2many_models)
 def test_one2many_df2df(model):
+    """
+    if a one-to-many model is applied to a DataFrame, the output should have
+    prefix in column names to indicate the input columns they correspond.
+    """
     df = pd.DataFrame(
         np.arange(300).reshape(100, 3),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),
@@ -149,6 +182,9 @@ def test_one2many_df2df(model):
 
 @pytest.mark.parametrize("model", many2one_models)
 def test_many2one(model):
+    """
+    The output Series from a many-to-one model should NOT have name
+    """
     df = pd.DataFrame(
         np.arange(300).reshape(100, 3),
         index=pd.date_range(start="2017-1-1", periods=100, freq="D"),