From 8cd6a4ccf8eaaca68f014bbe25b85ed384a6ddcd Mon Sep 17 00:00:00 2001
From: Nils Braun <nils-braun@users.noreply.github.com>
Date: Sat, 25 Mar 2017 14:31:10 +0100
Subject: [PATCH] Rolling timeseries (#170)

* Added a rolling parameter to the extract function and the normalize function, to roll out time series in time in both directions

* Added documentation in the code on the new rolling feature

* Fixed the normalize test and added a test case for rolling

* Increase coverage

* Added text documentation for the new feature

* Included a warning if the time is not uniformly sampled. For this I had to move the id check before the sort check

* Added some formulas to the docu

* Do only enable the test when rolling is enabled...

* Faktored out the rolling into a new function

* Fixed documentation for the new function

* Forget to upload some changes
---
 docs/index.rst                              |   1 +
 docs/text/faq.rst                           |   8 +-
 docs/text/rolling.rst                       | 167 ++++++++++++++++++
 tests/utilities/test_dataframe_functions.py | 179 +++++++++++++++++++-
 tsfresh/feature_extraction/extraction.py    |   7 +-
 tsfresh/utilities/dataframe_functions.py    | 130 ++++++++++++--
 6 files changed, 476 insertions(+), 16 deletions(-)
 create mode 100644 docs/text/rolling.rst
diff --git a/docs/index.rst b/docs/index.rst
index 333755dbc..2f11ff1a1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -27,6 +27,7 @@ The following chapters will explain the tsfresh package in detail:
    Feature Filtering <text/feature_filtering>
    How to write custom Feature Calculators <text/how_to_add_custom_feature>
    Parallelization <text/parallelization>
+   How to handle rolling time series <text/rolling>
    FAQ <text/faq>
    Authors <authors>
    License <license>
diff --git a/docs/text/faq.rst b/docs/text/faq.rst
index 6acffaec6..41641d23b 100644
--- a/docs/text/faq.rst
+++ b/docs/text/faq.rst
@@ -1,8 +1,14 @@
 FAQ
-=================
+===
 
 
     1. *Does tsfresh support different time series lengths?*
        Yes, it supports different time series lengths. However, some feature calculators can demand a minimal length
        of the time series. If a shorter time series is passed to the calculator, normally a NaN is returned.
 
+
+    2. *Is it possible to extract features from rolling/shifted time series?*
+       Yes, there is the option `rolling` for the :func:`tsfresh.feature_extraction.extract_features` function.
+       Set it to a non-zero value to enable rolling. In the moment, this just rolls the input data into
+       as many time series as there are time steps - so there is no internal optimization for rolling calculations.
+       Please see :ref:`rolling-label` for more information.
diff --git a/docs/text/rolling.rst b/docs/text/rolling.rst
new file mode 100644
index 000000000..304d88abe
--- /dev/null
+++ b/docs/text/rolling.rst
@@ -0,0 +1,167 @@
+.. _rolling-label:
+
+How to handle rolling time series
+=================================
+
+In many application with time series on real-world problems, the "time" column
+(we will call it time in the following, although it can be anything)
+gives a certain sequential order to the data. We can exploit this sequence to generate
+more input data out of single time series, by *rolling* over the data.
+
+Imagine the following situation: you have the data of EEG measurements, that
+you want to use to classify patients into healthy and not healthy (we oversimplify the problem here).
+You have e.g. 100 time steps of data, so you can extract features that may forecast the healthiness
+of the patients. But what would happen if you had only the recorded measurement for 50 time steps?
+The patients would be as healthy as with 100 time steps. So you can easily increase the amount of
+training data by reusing time series cut into smaller pieces.
+
+Another example is streaming data, e.g. in Industry 4.0 applications. Here you typically get one
+new data row at a time and use this to predict machine failures for example. To train you model,
+you could act as if you would stream the data, by feeding your classifier the data after one time step,
+the data after the first two time steps etc.
+
+Both examples imply, that you extract the features not only on the full data set, but also
+on all temporal coherent subsets of data, which is the process of *rolling*. You can do this easily,
+by calling the function :func:`tsfresh.utilities.dataframe_functions.roll_time_series`.
+
+The rolling mechanism takes a time series :math:`x` with its data rows :math:`[x_1, x_2, x_3, ..., x_n]`
+and creates :math:`n` new time series :math:`\hat x^k`, each of them with a different consecutive part
+of :math:`x`:
+
+.. math::
+    \hat x^k = [x_k, x_{k-1}, x_{k-2}, ..., x_1]
+
+To see what this does in real-world applications, we look into the following example data frame (we show only one possible data format,
+but rolling works on all 3 data formats :ref:`data-formats-label`):
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2	 | 6  |
++----+------+----+----+
+| 1  | t3   | 3	 | 7  |
++----+------+----+----+
+| 1  | t4   | 4	 | 8  |
++----+------+----+----+
+| 2  | t8   | 10 | 12 |
++----+------+----+----+
+| 2  | t9   | 11 | 13 |
++----+------+----+----+
+
+where you have measured two values (x and y) for two different entities (1 and 2) in 4 or 2 time steps.
+
+If you set `rolling` to 0, the feature extraction works on
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2	 | 6  |
++----+------+----+----+
+| 1  | t3   | 3	 | 7  |
++----+------+----+----+
+| 1  | t4   | 4	 | 8  |
++----+------+----+----+
+
+and
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 2  | t8   | 10 | 12 |
++----+------+----+----+
+| 2  | t9   | 11 | 13 |
++----+------+----+----+
+
+So it extracts 2 set of features.
+
+If you set rolling to 1, the feature extraction works with all of the following time series:
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2  | 6  |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2  | 6  |
++----+------+----+----+
+| 1  | t3   | 3  | 7  |
++----+------+----+----+
+| 2  | t8   | 10 | 12 |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2  | 6  |
++----+------+----+----+
+| 1  | t3   | 3  | 7  |
++----+------+----+----+
+| 1  | t4   | 4  | 8  |
++----+------+----+----+
+| 2  | t8   | 10 | 12 |
++----+------+----+----+
+| 2  | t9   | 11 | 13 |
++----+------+----+----+
+
+If you set rolling to -1, you end up with features for the time series, rolled in the other direction
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t4   | 4  | 8  |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t3   | 3  | 7  |
++----+------+----+----+
+| 1  | t4   | 4  | 8  |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t2   | 2  | 6  |
++----+------+----+----+
+| 1  | t3   | 3  | 7  |
++----+------+----+----+
+| 1  | t4   | 4  | 8  |
++----+------+----+----+
+| 2  | t9   | 11 | 13 |
++----+------+----+----+
+
++----+------+----+----+
+| id | time | x  | y  |
++====+======+====+====+
+| 1  | t1   | 1  | 5  |
++----+------+----+----+
+| 1  | t2   | 2  | 6  |
++----+------+----+----+
+| 1  | t3   | 3  | 7  |
++----+------+----+----+
+| 1  | t4   | 4  | 8  |
++----+------+----+----+
+| 2  | t8   | 10 | 12 |
++----+------+----+----+
+| 2  | t9   | 11 | 13 |
++----+------+----+----+
\ No newline at end of file
diff --git a/tests/utilities/test_dataframe_functions.py b/tests/utilities/test_dataframe_functions.py
index aa869c188..f2b487ac3 100644
--- a/tests/utilities/test_dataframe_functions.py
+++ b/tests/utilities/test_dataframe_functions.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # This file as well as the whole tsfresh package are licenced under the MIT licence (see the LICENCE.txt)
 # Maximilian Christ (maximilianchrist.com), Blue Yonder Gmbh, 2016
-
+import warnings
 from unittest import TestCase
 
 import pandas as pd
@@ -157,6 +157,178 @@ def test_with_wrong_input(self):
         self.assertRaises(ValueError, dataframe_functions.normalize_input_to_internal_representation, test_df,
                           "id", None, None, "value")
 
+        test_df = pd.DataFrame([{"id": 0, "value": np.NaN}])
+        self.assertRaises(ValueError, dataframe_functions.normalize_input_to_internal_representation, test_df,
+                          None, None, None, "value")
+
+
+class RollingTestCase(TestCase):
+    def test_with_wrong_input(self):
+        test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.NaN}])
+        self.assertRaises(ValueError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id="id",
+                          column_sort="sort", column_kind="kind",
+                          rolling_direction=1)
+
+        test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": 1}])
+        self.assertRaises(AttributeError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id="strange_id",
+                          column_sort="sort", column_kind="kind",
+                          rolling_direction=1)
+
+        test_df = {"a": pd.DataFrame([{"id": 0}])}
+        self.assertRaises(ValueError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id="id",
+                          column_sort=None, column_kind="kind",
+                          rolling_direction=1)
+
+        self.assertRaises(ValueError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id=None,
+                          column_sort=None, column_kind="kind",
+                          rolling_direction=1)
+
+        self.assertRaises(ValueError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id="id",
+                          column_sort=None, column_kind=None,
+                          rolling_direction=0)
+
+        self.assertRaises(ValueError, dataframe_functions.roll_time_series,
+                          df_or_dict=test_df, column_id=None,
+                          column_sort=None, column_kind=None,
+                          rolling_direction=0)
+
+    def test_single_row(self):
+        test_df = pd.DataFrame([{"id": np.NaN, "kind": "a", "value": 3, "sort": 1}])
+        dataframe_functions.roll_time_series(
+            df_or_dict=test_df, column_id="id",
+            column_sort="sort", column_kind="kind",
+            rolling_direction=1)
+
+    def test_positive_rolling(self):
+        first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)})
+        second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)})
+
+        first_class["id"] = 1
+        second_class["id"] = 2
+
+        df_full = pd.concat([first_class, second_class], ignore_index=True)
+
+        df = dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time",
+                                                  column_kind=None, rolling_direction=1)
+
+        correct_indices = (["id=1, shift=3"] * 1 +
+                           ["id=1, shift=2"] * 2 +
+                           ["id=1, shift=1"] * 3 +
+                           ["id=2, shift=1"] * 1 +
+                           ["id=1, shift=0"] * 4 +
+                           ["id=2, shift=0"] * 2)
+
+        self.assertListEqual(list(df["id"]), correct_indices)
+
+        self.assertListEqual(list(df["a"].values),
+                             [1, 1, 2, 1, 2, 3, 10, 1, 2, 3, 4, 10, 11])
+        self.assertListEqual(list(df["b"].values),
+                             [5, 5, 6, 5, 6, 7, 12, 5, 6, 7, 8, 12, 13])
+
+    def test_negative_rolling(self):
+        first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)})
+        second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)})
+
+        first_class["id"] = 1
+        second_class["id"] = 2
+
+        df_full = pd.concat([first_class, second_class], ignore_index=True)
+
+        df = dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time",
+                                                  column_kind=None, rolling_direction=-1)
+
+        correct_indices = (["id=1, shift=-3"] * 1 +
+                           ["id=1, shift=-2"] * 2 +
+                           ["id=1, shift=-1"] * 3 +
+                           ["id=2, shift=-1"] * 1 +
+                           ["id=1, shift=0"] * 4 +
+                           ["id=2, shift=0"] * 2)
+
+        self.assertListEqual(list(df["id"].values), correct_indices)
+
+        self.assertListEqual(list(df["a"].values),
+                             [4, 3, 4, 2, 3, 4, 11, 1, 2, 3, 4, 10, 11])
+        self.assertListEqual(list(df["b"].values),
+                             [8, 7, 8, 6, 7, 8, 13, 5, 6, 7, 8, 12, 13])
+
+    def test_stacked_rolling(self):
+        first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)})
+        second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)})
+
+        first_class["id"] = 1
+        second_class["id"] = 2
+
+        df_full = pd.concat([first_class, second_class], ignore_index=True)
+
+        df_stacked = pd.concat([df_full[["time", "id", "a"]].rename(columns={"a": "_value"}),
+                                df_full[["time", "id", "b"]].rename(columns={"b": "_value"})], ignore_index=True)
+        df_stacked["kind"] = ["a"] * 6 + ["b"] * 6
+
+        df = dataframe_functions.roll_time_series(df_stacked, column_id="id", column_sort="time",
+                                                  column_kind="kind", rolling_direction=-1)
+
+        correct_indices = (["id=1, shift=-3"] * 2 +
+                           ["id=1, shift=-2"] * 4 +
+                           ["id=1, shift=-1"] * 6 +
+                           ["id=2, shift=-1"] * 2 +
+                           ["id=1, shift=0"] * 8 +
+                           ["id=2, shift=0"] * 4)
+
+        self.assertListEqual(list(df["id"].values), correct_indices)
+
+        self.assertListEqual(list(df["kind"].values), ["a", "b"] * 13)
+        self.assertListEqual(list(df["_value"].values),
+                             [4, 8, 3, 7, 4, 8, 2, 6, 3, 7, 4, 8, 11, 13, 1, 5, 2, 6, 3, 7, 4, 8, 10, 12, 11, 13])
+
+    def test_dict_rolling(self):
+        df_dict = {
+            "a": pd.DataFrame({"_value": [1, 2, 3, 4, 10, 11], "id": [1, 1, 1, 1, 2, 2]}),
+            "b": pd.DataFrame({"_value": [5, 6, 7, 8, 12, 13], "id": [1, 1, 1, 1, 2, 2]})
+        }
+
+        df = dataframe_functions.roll_time_series(df_dict, column_id="id", column_sort=None,
+                                                  column_kind=None, rolling_direction=-1)
+
+        correct_indices = (["id=1, shift=-3"] * 1 +
+                           ["id=1, shift=-2"] * 2 +
+                           ["id=1, shift=-1"] * 3 +
+                           ["id=2, shift=-1"] * 1 +
+                           ["id=1, shift=0"] * 4 +
+                           ["id=2, shift=0"] * 2)
+
+        self.assertListEqual(list(df["a"]["id"].values), correct_indices)
+        self.assertListEqual(list(df["b"]["id"].values), correct_indices)
+
+        self.assertListEqual(list(df["a"]["_value"].values),
+                             [4, 3, 4, 2, 3, 4, 11, 1, 2, 3, 4, 10, 11])
+        self.assertListEqual(list(df["b"]["_value"].values),
+                             [8, 7, 8, 6, 7, 8, 13, 5, 6, 7, 8, 12, 13])
+
+
+
+    def test_warning_on_non_uniform_time_steps(self):
+        with warnings.catch_warnings(record=True) as w:
+            first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": [1, 2, 4, 5]})
+            second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)})
+
+            first_class["id"] = 1
+            second_class["id"] = 2
+
+            df_full = pd.concat([first_class, second_class], ignore_index=True)
+
+            dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time",
+                                                 column_kind=None, rolling_direction=1)
+
+            self.assertEqual(len(w), 1)
+            self.assertEqual(str(w[0].message),
+                             "Your time stamps are not uniformly sampled, which makes rolling "
+                             "nonsensical in some domains.")
+
 
 class CheckForNanTestCase(TestCase):
     def test_all_columns(self):
@@ -284,6 +456,11 @@ def test_restrict_dict(self):
         self.assertTrue(kind_to_df_restricted2['a'].equals(kind_to_df['a']))
         self.assertTrue(kind_to_df_restricted2['b'].equals(kind_to_df['b']))
 
+    def test_restrict_wrong(self):
+        other_type = np.array([1, 2, 3])
+
+        self.assertRaises(TypeError, dataframe_functions.restrict_input_to_index, other_type, "id", [1, 2, 3])
+
 
 class GetRangeValuesPerColumnTestCase(TestCase):
     def test_ignores_non_finite_values(self):
diff --git a/tsfresh/feature_extraction/extraction.py b/tsfresh/feature_extraction/extraction.py
index 3a59c6d4f..6bd4c343e 100644
--- a/tsfresh/feature_extraction/extraction.py
+++ b/tsfresh/feature_extraction/extraction.py
@@ -89,8 +89,11 @@ def extract_features(timeseries_container, feature_extraction_settings=None,
     # Always use the standardized way of storing the data.
     # See the function normalize_input_to_internal_representation for more information.
     kind_to_df_map, column_id, column_value = \
-        dataframe_functions.normalize_input_to_internal_representation(timeseries_container, column_id, column_sort,
-                                                                       column_kind, column_value)
+        dataframe_functions.normalize_input_to_internal_representation(df_or_dict=timeseries_container,
+                                                                       column_id=column_id,
+                                                                       column_sort=column_sort,
+                                                                       column_kind=column_kind,
+                                                                       column_value=column_value)
 
     # Use the standard setting if the user did not supply ones himself.
     if feature_extraction_settings is None:
diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py
index 01703e677..f16c6755d 100644
--- a/tsfresh/utilities/dataframe_functions.py
+++ b/tsfresh/utilities/dataframe_functions.py
@@ -5,6 +5,7 @@
 Utility functions for handling the DataFrame conversions to the internal normalized format
 (see ``normalize_input_to_internal_representation``) or on how to handle ``NaN`` and ``inf`` in the DataFrames.
 """
+import warnings
 
 import numpy as np
 import pandas as pd
@@ -231,12 +232,10 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor
     :param df_or_dict: a pandas DataFrame or a dictionary. The required shape/form of the object depends on the rest of
         the passed arguments.
     :type df_or_dict: pandas.DataFrame or dict
-    :param column_id: if not None, it must be present in the pandas DataFrame or in all DataFrames in the dictionary.
+    :param column_id: it must be present in the pandas DataFrame or in all DataFrames in the dictionary.
         It is not allowed to have NaN values in this column.
-        If this column name is None, a new column will be added to the pandas DataFrame (or all pandas DataFrames in
-        the dictionary) and the same id for all entries is assumed.
     :type column_id: basestring or None
-    :param column_sort: if not None, sort the rows by this column. Then, the column is dropped. It is not allowed to
+    :param column_sort: if not None, sort the rows by this column. It is not allowed to
         have NaN values in this column.
     :type column_sort: basestring or None
     :param column_kind: It can only be used when passing a pandas DataFrame (the dictionary is already assumed to be
@@ -274,18 +273,11 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor
                 kind_to_df_map = {key: df_or_dict[[key] + id_and_sort_column].copy().rename(columns={key: "_value"})
                                   for key in df_or_dict.columns if key not in id_and_sort_column}
 
-                # todo: is this the right check?
+                # TODO: is this the right check?
                 if len(kind_to_df_map) < 1:
                     raise ValueError("You passed in a dataframe without a value column.")
                 column_value = "_value"
 
-    if column_sort is not None:
-        for kind in kind_to_df_map:
-            # Require no Nans in column
-            if kind_to_df_map[kind][column_sort].isnull().any():
-                raise ValueError("You have NaN values in your sort column.")
-            kind_to_df_map[kind] = kind_to_df_map[kind].sort_values(column_sort).drop(column_sort, axis=1)
-
     if column_id is not None:
         for kind in kind_to_df_map:
             if column_id not in kind_to_df_map[kind].columns:
@@ -295,6 +287,14 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor
     else:
         raise ValueError("You have to set the column_id which contains the ids of the different time series")
 
+    if column_sort is not None:
+        for kind in kind_to_df_map:
+            # Require no Nans in column
+            if kind_to_df_map[kind][column_sort].isnull().any():
+                raise ValueError("You have NaN values in your sort column.")
+
+            kind_to_df_map[kind] = kind_to_df_map[kind].sort_values(column_sort).drop(column_sort, axis=1)
+
     # Either the column for the value must be given...
     if column_value is not None:
         for kind in kind_to_df_map:
@@ -323,3 +323,109 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor
             raise ValueError("You have NaN values in your value column.")
 
     return kind_to_df_map, column_id, column_value
+
+
+def roll_time_series(df_or_dict, column_id, column_sort, column_kind, rolling_direction):
+    """
+    Roll the (sorted) data frames for each kind and each id separately in "time"
+    (time is here the abstract sort order defined by the sort column). For each rolling step a new id will be
+    created, with the name "id={id}, shift={shift}" where the id is the former id of the column and shift is the
+    amount of "time" shifts. ATTENTION: This will (obviously) create new IDs! The sign of rolling defines the
+    direction of time rolling.
+    For more information, please see :ref:`rolling-label`.
+
+    :param df_or_dict: a pandas DataFrame or a dictionary. The required shape/form of the object depends on the rest of
+        the passed arguments.
+    :type df_or_dict: pandas.DataFrame or dict
+    :param column_id: it must be present in the pandas DataFrame or in all DataFrames in the dictionary.
+        It is not allowed to have NaN values in this column.
+    :type column_id: basestring or None
+    :param column_sort: if not None, sort the rows by this column. It is not allowed to
+        have NaN values in this column.
+    :type column_sort: basestring or None
+    :param column_kind: It can only be used when passing a pandas DataFrame (the dictionary is already assumed to be
+        grouped by the kind). Is must be present in the DataFrame and no NaN values are allowed.
+        If the kind column is not passed, it is assumed that each column in the pandas DataFrame (except the id or
+        sort column) is a possible kind.
+    :type column_kind: basestring or None
+    :param rolling_direction: The sign decides, if to roll backwards or forwards in "time"
+    :type rolling_direction: int
+
+    :return: The rolled data frame or dictionary of data frames
+    :rtype: the one from df_or_dict
+    """
+
+    if isinstance(df_or_dict, dict):
+        if column_kind is not None:
+            raise ValueError("You passed in a dictionary and gave a column name for the kind. Both are not possible.")
+
+        return {key: roll_time_series(df_or_dict=df_or_dict[key],
+                                      column_id=column_id,
+                                      column_sort=column_sort,
+                                      column_kind=column_kind,
+                                      rolling_direction=rolling_direction)
+                for key in df_or_dict}
+
+    # Now we know that this is a pandas data frame
+    df = df_or_dict
+
+    if column_id is not None:
+        if column_id not in df:
+                raise AttributeError("The given column for the id is not present in the data.")
+    else:
+        raise ValueError("You have to set the column_id which contains the ids of the different time series")
+
+    if column_kind is not None:
+        grouper = (column_kind, column_id)
+    else:
+        grouper = (column_id,)
+
+    if column_sort is not None:
+        # Require no Nans in column
+        if df[column_sort].isnull().any():
+            raise ValueError("You have NaN values in your sort column.")
+
+        df = df.sort_values(column_sort)
+
+        # if rolling is enabled, the data should be uniformly sampled in this column
+        # Build the differences between consecutive time sort values
+
+        differences = df.groupby(grouper)[column_sort].apply(
+            lambda x: x.values[:-1] - x.values[1:])
+        # Write all of them into one big list
+        differences = sum(map(list, differences), [])
+        # Test if all differences are the same
+        if differences and min(differences) != max(differences):
+            warnings.warn("Your time stamps are not uniformly sampled, which makes rolling "
+                          "nonsensical in some domains.")
+
+    # Roll the data frames if requested
+    rolling_direction = np.sign(rolling_direction)
+
+    if rolling_direction == 0:
+        raise ValueError("Rolling direction of 0 is not possible")
+
+    grouped_data = df.groupby(grouper)
+    maximum_number_of_timeshifts = grouped_data.count().max().max()
+
+    if np.isnan(maximum_number_of_timeshifts):
+        maximum_number_of_timeshifts = 0
+
+    if rolling_direction > 0:
+        range_of_shifts = range(maximum_number_of_timeshifts, -1, -1)
+    else:
+        range_of_shifts = range(-maximum_number_of_timeshifts, 1)
+
+    def roll_out_time_series(time_shift):
+        # Shift out only the first "time_shift" rows
+        df_temp = grouped_data.shift(time_shift)
+        df_temp[column_id] = "id=" + df[column_id].map(str) + ", shift={}".format(time_shift)
+        if column_kind:
+            df_temp[column_kind] = df[column_kind]
+        return df_temp.dropna()
+
+    return pd.concat([roll_out_time_series(time_shift) for time_shift in range_of_shifts],
+                     ignore_index=True)
+
+
+