From 8cd6a4ccf8eaaca68f014bbe25b85ed384a6ddcd Mon Sep 17 00:00:00 2001 From: Nils Braun Date: Sat, 25 Mar 2017 14:31:10 +0100 Subject: [PATCH] Rolling timeseries (#170) * Added a rolling parameter to the extract function and the normalize function, to roll out time series in time in both directions * Added documentation in the code on the new rolling feature * Fixed the normalize test and added a test case for rolling * Increase coverage * Added text documentation for the new feature * Included a warning if the time is not uniformly sampled. For this I had to move the id check before the sort check * Added some formulas to the docu * Do only enable the test when rolling is enabled... * Faktored out the rolling into a new function * Fixed documentation for the new function * Forget to upload some changes --- docs/index.rst | 1 + docs/text/faq.rst | 8 +- docs/text/rolling.rst | 167 ++++++++++++++++++ tests/utilities/test_dataframe_functions.py | 179 +++++++++++++++++++- tsfresh/feature_extraction/extraction.py | 7 +- tsfresh/utilities/dataframe_functions.py | 130 ++++++++++++-- 6 files changed, 476 insertions(+), 16 deletions(-) create mode 100644 docs/text/rolling.rst diff --git a/docs/index.rst b/docs/index.rst index 333755dbc..2f11ff1a1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,6 +27,7 @@ The following chapters will explain the tsfresh package in detail: Feature Filtering How to write custom Feature Calculators Parallelization + How to handle rolling time series FAQ Authors License diff --git a/docs/text/faq.rst b/docs/text/faq.rst index 6acffaec6..41641d23b 100644 --- a/docs/text/faq.rst +++ b/docs/text/faq.rst @@ -1,8 +1,14 @@ FAQ -================= +=== 1. *Does tsfresh support different time series lengths?* Yes, it supports different time series lengths. However, some feature calculators can demand a minimal length of the time series. If a shorter time series is passed to the calculator, normally a NaN is returned. + + 2. *Is it possible to extract features from rolling/shifted time series?* + Yes, there is the option `rolling` for the :func:`tsfresh.feature_extraction.extract_features` function. + Set it to a non-zero value to enable rolling. In the moment, this just rolls the input data into + as many time series as there are time steps - so there is no internal optimization for rolling calculations. + Please see :ref:`rolling-label` for more information. diff --git a/docs/text/rolling.rst b/docs/text/rolling.rst new file mode 100644 index 000000000..304d88abe --- /dev/null +++ b/docs/text/rolling.rst @@ -0,0 +1,167 @@ +.. _rolling-label: + +How to handle rolling time series +================================= + +In many application with time series on real-world problems, the "time" column +(we will call it time in the following, although it can be anything) +gives a certain sequential order to the data. We can exploit this sequence to generate +more input data out of single time series, by *rolling* over the data. + +Imagine the following situation: you have the data of EEG measurements, that +you want to use to classify patients into healthy and not healthy (we oversimplify the problem here). +You have e.g. 100 time steps of data, so you can extract features that may forecast the healthiness +of the patients. But what would happen if you had only the recorded measurement for 50 time steps? +The patients would be as healthy as with 100 time steps. So you can easily increase the amount of +training data by reusing time series cut into smaller pieces. + +Another example is streaming data, e.g. in Industry 4.0 applications. Here you typically get one +new data row at a time and use this to predict machine failures for example. To train you model, +you could act as if you would stream the data, by feeding your classifier the data after one time step, +the data after the first two time steps etc. + +Both examples imply, that you extract the features not only on the full data set, but also +on all temporal coherent subsets of data, which is the process of *rolling*. You can do this easily, +by calling the function :func:`tsfresh.utilities.dataframe_functions.roll_time_series`. + +The rolling mechanism takes a time series :math:`x` with its data rows :math:`[x_1, x_2, x_3, ..., x_n]` +and creates :math:`n` new time series :math:`\hat x^k`, each of them with a different consecutive part +of :math:`x`: + +.. math:: + \hat x^k = [x_k, x_{k-1}, x_{k-2}, ..., x_1] + +To see what this does in real-world applications, we look into the following example data frame (we show only one possible data format, +but rolling works on all 3 data formats :ref:`data-formats-label`): + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ +| 2 | t8 | 10 | 12 | ++----+------+----+----+ +| 2 | t9 | 11 | 13 | ++----+------+----+----+ + +where you have measured two values (x and y) for two different entities (1 and 2) in 4 or 2 time steps. + +If you set `rolling` to 0, the feature extraction works on + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ + +and + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 2 | t8 | 10 | 12 | ++----+------+----+----+ +| 2 | t9 | 11 | 13 | ++----+------+----+----+ + +So it extracts 2 set of features. + +If you set rolling to 1, the feature extraction works with all of the following time series: + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 2 | t8 | 10 | 12 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ +| 2 | t8 | 10 | 12 | ++----+------+----+----+ +| 2 | t9 | 11 | 13 | ++----+------+----+----+ + +If you set rolling to -1, you end up with features for the time series, rolled in the other direction + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ +| 2 | t9 | 11 | 13 | ++----+------+----+----+ + ++----+------+----+----+ +| id | time | x | y | ++====+======+====+====+ +| 1 | t1 | 1 | 5 | ++----+------+----+----+ +| 1 | t2 | 2 | 6 | ++----+------+----+----+ +| 1 | t3 | 3 | 7 | ++----+------+----+----+ +| 1 | t4 | 4 | 8 | ++----+------+----+----+ +| 2 | t8 | 10 | 12 | ++----+------+----+----+ +| 2 | t9 | 11 | 13 | ++----+------+----+----+ \ No newline at end of file diff --git a/tests/utilities/test_dataframe_functions.py b/tests/utilities/test_dataframe_functions.py index aa869c188..f2b487ac3 100644 --- a/tests/utilities/test_dataframe_functions.py +++ b/tests/utilities/test_dataframe_functions.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # This file as well as the whole tsfresh package are licenced under the MIT licence (see the LICENCE.txt) # Maximilian Christ (maximilianchrist.com), Blue Yonder Gmbh, 2016 - +import warnings from unittest import TestCase import pandas as pd @@ -157,6 +157,178 @@ def test_with_wrong_input(self): self.assertRaises(ValueError, dataframe_functions.normalize_input_to_internal_representation, test_df, "id", None, None, "value") + test_df = pd.DataFrame([{"id": 0, "value": np.NaN}]) + self.assertRaises(ValueError, dataframe_functions.normalize_input_to_internal_representation, test_df, + None, None, None, "value") + + +class RollingTestCase(TestCase): + def test_with_wrong_input(self): + test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": np.NaN}]) + self.assertRaises(ValueError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id="id", + column_sort="sort", column_kind="kind", + rolling_direction=1) + + test_df = pd.DataFrame([{"id": 0, "kind": "a", "value": 3, "sort": 1}]) + self.assertRaises(AttributeError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id="strange_id", + column_sort="sort", column_kind="kind", + rolling_direction=1) + + test_df = {"a": pd.DataFrame([{"id": 0}])} + self.assertRaises(ValueError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id="id", + column_sort=None, column_kind="kind", + rolling_direction=1) + + self.assertRaises(ValueError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id=None, + column_sort=None, column_kind="kind", + rolling_direction=1) + + self.assertRaises(ValueError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id="id", + column_sort=None, column_kind=None, + rolling_direction=0) + + self.assertRaises(ValueError, dataframe_functions.roll_time_series, + df_or_dict=test_df, column_id=None, + column_sort=None, column_kind=None, + rolling_direction=0) + + def test_single_row(self): + test_df = pd.DataFrame([{"id": np.NaN, "kind": "a", "value": 3, "sort": 1}]) + dataframe_functions.roll_time_series( + df_or_dict=test_df, column_id="id", + column_sort="sort", column_kind="kind", + rolling_direction=1) + + def test_positive_rolling(self): + first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)}) + second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)}) + + first_class["id"] = 1 + second_class["id"] = 2 + + df_full = pd.concat([first_class, second_class], ignore_index=True) + + df = dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time", + column_kind=None, rolling_direction=1) + + correct_indices = (["id=1, shift=3"] * 1 + + ["id=1, shift=2"] * 2 + + ["id=1, shift=1"] * 3 + + ["id=2, shift=1"] * 1 + + ["id=1, shift=0"] * 4 + + ["id=2, shift=0"] * 2) + + self.assertListEqual(list(df["id"]), correct_indices) + + self.assertListEqual(list(df["a"].values), + [1, 1, 2, 1, 2, 3, 10, 1, 2, 3, 4, 10, 11]) + self.assertListEqual(list(df["b"].values), + [5, 5, 6, 5, 6, 7, 12, 5, 6, 7, 8, 12, 13]) + + def test_negative_rolling(self): + first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)}) + second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)}) + + first_class["id"] = 1 + second_class["id"] = 2 + + df_full = pd.concat([first_class, second_class], ignore_index=True) + + df = dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time", + column_kind=None, rolling_direction=-1) + + correct_indices = (["id=1, shift=-3"] * 1 + + ["id=1, shift=-2"] * 2 + + ["id=1, shift=-1"] * 3 + + ["id=2, shift=-1"] * 1 + + ["id=1, shift=0"] * 4 + + ["id=2, shift=0"] * 2) + + self.assertListEqual(list(df["id"].values), correct_indices) + + self.assertListEqual(list(df["a"].values), + [4, 3, 4, 2, 3, 4, 11, 1, 2, 3, 4, 10, 11]) + self.assertListEqual(list(df["b"].values), + [8, 7, 8, 6, 7, 8, 13, 5, 6, 7, 8, 12, 13]) + + def test_stacked_rolling(self): + first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": range(4)}) + second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)}) + + first_class["id"] = 1 + second_class["id"] = 2 + + df_full = pd.concat([first_class, second_class], ignore_index=True) + + df_stacked = pd.concat([df_full[["time", "id", "a"]].rename(columns={"a": "_value"}), + df_full[["time", "id", "b"]].rename(columns={"b": "_value"})], ignore_index=True) + df_stacked["kind"] = ["a"] * 6 + ["b"] * 6 + + df = dataframe_functions.roll_time_series(df_stacked, column_id="id", column_sort="time", + column_kind="kind", rolling_direction=-1) + + correct_indices = (["id=1, shift=-3"] * 2 + + ["id=1, shift=-2"] * 4 + + ["id=1, shift=-1"] * 6 + + ["id=2, shift=-1"] * 2 + + ["id=1, shift=0"] * 8 + + ["id=2, shift=0"] * 4) + + self.assertListEqual(list(df["id"].values), correct_indices) + + self.assertListEqual(list(df["kind"].values), ["a", "b"] * 13) + self.assertListEqual(list(df["_value"].values), + [4, 8, 3, 7, 4, 8, 2, 6, 3, 7, 4, 8, 11, 13, 1, 5, 2, 6, 3, 7, 4, 8, 10, 12, 11, 13]) + + def test_dict_rolling(self): + df_dict = { + "a": pd.DataFrame({"_value": [1, 2, 3, 4, 10, 11], "id": [1, 1, 1, 1, 2, 2]}), + "b": pd.DataFrame({"_value": [5, 6, 7, 8, 12, 13], "id": [1, 1, 1, 1, 2, 2]}) + } + + df = dataframe_functions.roll_time_series(df_dict, column_id="id", column_sort=None, + column_kind=None, rolling_direction=-1) + + correct_indices = (["id=1, shift=-3"] * 1 + + ["id=1, shift=-2"] * 2 + + ["id=1, shift=-1"] * 3 + + ["id=2, shift=-1"] * 1 + + ["id=1, shift=0"] * 4 + + ["id=2, shift=0"] * 2) + + self.assertListEqual(list(df["a"]["id"].values), correct_indices) + self.assertListEqual(list(df["b"]["id"].values), correct_indices) + + self.assertListEqual(list(df["a"]["_value"].values), + [4, 3, 4, 2, 3, 4, 11, 1, 2, 3, 4, 10, 11]) + self.assertListEqual(list(df["b"]["_value"].values), + [8, 7, 8, 6, 7, 8, 13, 5, 6, 7, 8, 12, 13]) + + + + def test_warning_on_non_uniform_time_steps(self): + with warnings.catch_warnings(record=True) as w: + first_class = pd.DataFrame({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "time": [1, 2, 4, 5]}) + second_class = pd.DataFrame({"a": [10, 11], "b": [12, 13], "time": range(20, 22)}) + + first_class["id"] = 1 + second_class["id"] = 2 + + df_full = pd.concat([first_class, second_class], ignore_index=True) + + dataframe_functions.roll_time_series(df_full, column_id="id", column_sort="time", + column_kind=None, rolling_direction=1) + + self.assertEqual(len(w), 1) + self.assertEqual(str(w[0].message), + "Your time stamps are not uniformly sampled, which makes rolling " + "nonsensical in some domains.") + class CheckForNanTestCase(TestCase): def test_all_columns(self): @@ -284,6 +456,11 @@ def test_restrict_dict(self): self.assertTrue(kind_to_df_restricted2['a'].equals(kind_to_df['a'])) self.assertTrue(kind_to_df_restricted2['b'].equals(kind_to_df['b'])) + def test_restrict_wrong(self): + other_type = np.array([1, 2, 3]) + + self.assertRaises(TypeError, dataframe_functions.restrict_input_to_index, other_type, "id", [1, 2, 3]) + class GetRangeValuesPerColumnTestCase(TestCase): def test_ignores_non_finite_values(self): diff --git a/tsfresh/feature_extraction/extraction.py b/tsfresh/feature_extraction/extraction.py index 3a59c6d4f..6bd4c343e 100644 --- a/tsfresh/feature_extraction/extraction.py +++ b/tsfresh/feature_extraction/extraction.py @@ -89,8 +89,11 @@ def extract_features(timeseries_container, feature_extraction_settings=None, # Always use the standardized way of storing the data. # See the function normalize_input_to_internal_representation for more information. kind_to_df_map, column_id, column_value = \ - dataframe_functions.normalize_input_to_internal_representation(timeseries_container, column_id, column_sort, - column_kind, column_value) + dataframe_functions.normalize_input_to_internal_representation(df_or_dict=timeseries_container, + column_id=column_id, + column_sort=column_sort, + column_kind=column_kind, + column_value=column_value) # Use the standard setting if the user did not supply ones himself. if feature_extraction_settings is None: diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py index 01703e677..f16c6755d 100644 --- a/tsfresh/utilities/dataframe_functions.py +++ b/tsfresh/utilities/dataframe_functions.py @@ -5,6 +5,7 @@ Utility functions for handling the DataFrame conversions to the internal normalized format (see ``normalize_input_to_internal_representation``) or on how to handle ``NaN`` and ``inf`` in the DataFrames. """ +import warnings import numpy as np import pandas as pd @@ -231,12 +232,10 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor :param df_or_dict: a pandas DataFrame or a dictionary. The required shape/form of the object depends on the rest of the passed arguments. :type df_or_dict: pandas.DataFrame or dict - :param column_id: if not None, it must be present in the pandas DataFrame or in all DataFrames in the dictionary. + :param column_id: it must be present in the pandas DataFrame or in all DataFrames in the dictionary. It is not allowed to have NaN values in this column. - If this column name is None, a new column will be added to the pandas DataFrame (or all pandas DataFrames in - the dictionary) and the same id for all entries is assumed. :type column_id: basestring or None - :param column_sort: if not None, sort the rows by this column. Then, the column is dropped. It is not allowed to + :param column_sort: if not None, sort the rows by this column. It is not allowed to have NaN values in this column. :type column_sort: basestring or None :param column_kind: It can only be used when passing a pandas DataFrame (the dictionary is already assumed to be @@ -274,18 +273,11 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor kind_to_df_map = {key: df_or_dict[[key] + id_and_sort_column].copy().rename(columns={key: "_value"}) for key in df_or_dict.columns if key not in id_and_sort_column} - # todo: is this the right check? + # TODO: is this the right check? if len(kind_to_df_map) < 1: raise ValueError("You passed in a dataframe without a value column.") column_value = "_value" - if column_sort is not None: - for kind in kind_to_df_map: - # Require no Nans in column - if kind_to_df_map[kind][column_sort].isnull().any(): - raise ValueError("You have NaN values in your sort column.") - kind_to_df_map[kind] = kind_to_df_map[kind].sort_values(column_sort).drop(column_sort, axis=1) - if column_id is not None: for kind in kind_to_df_map: if column_id not in kind_to_df_map[kind].columns: @@ -295,6 +287,14 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor else: raise ValueError("You have to set the column_id which contains the ids of the different time series") + if column_sort is not None: + for kind in kind_to_df_map: + # Require no Nans in column + if kind_to_df_map[kind][column_sort].isnull().any(): + raise ValueError("You have NaN values in your sort column.") + + kind_to_df_map[kind] = kind_to_df_map[kind].sort_values(column_sort).drop(column_sort, axis=1) + # Either the column for the value must be given... if column_value is not None: for kind in kind_to_df_map: @@ -323,3 +323,109 @@ def normalize_input_to_internal_representation(df_or_dict, column_id, column_sor raise ValueError("You have NaN values in your value column.") return kind_to_df_map, column_id, column_value + + +def roll_time_series(df_or_dict, column_id, column_sort, column_kind, rolling_direction): + """ + Roll the (sorted) data frames for each kind and each id separately in "time" + (time is here the abstract sort order defined by the sort column). For each rolling step a new id will be + created, with the name "id={id}, shift={shift}" where the id is the former id of the column and shift is the + amount of "time" shifts. ATTENTION: This will (obviously) create new IDs! The sign of rolling defines the + direction of time rolling. + For more information, please see :ref:`rolling-label`. + + :param df_or_dict: a pandas DataFrame or a dictionary. The required shape/form of the object depends on the rest of + the passed arguments. + :type df_or_dict: pandas.DataFrame or dict + :param column_id: it must be present in the pandas DataFrame or in all DataFrames in the dictionary. + It is not allowed to have NaN values in this column. + :type column_id: basestring or None + :param column_sort: if not None, sort the rows by this column. It is not allowed to + have NaN values in this column. + :type column_sort: basestring or None + :param column_kind: It can only be used when passing a pandas DataFrame (the dictionary is already assumed to be + grouped by the kind). Is must be present in the DataFrame and no NaN values are allowed. + If the kind column is not passed, it is assumed that each column in the pandas DataFrame (except the id or + sort column) is a possible kind. + :type column_kind: basestring or None + :param rolling_direction: The sign decides, if to roll backwards or forwards in "time" + :type rolling_direction: int + + :return: The rolled data frame or dictionary of data frames + :rtype: the one from df_or_dict + """ + + if isinstance(df_or_dict, dict): + if column_kind is not None: + raise ValueError("You passed in a dictionary and gave a column name for the kind. Both are not possible.") + + return {key: roll_time_series(df_or_dict=df_or_dict[key], + column_id=column_id, + column_sort=column_sort, + column_kind=column_kind, + rolling_direction=rolling_direction) + for key in df_or_dict} + + # Now we know that this is a pandas data frame + df = df_or_dict + + if column_id is not None: + if column_id not in df: + raise AttributeError("The given column for the id is not present in the data.") + else: + raise ValueError("You have to set the column_id which contains the ids of the different time series") + + if column_kind is not None: + grouper = (column_kind, column_id) + else: + grouper = (column_id,) + + if column_sort is not None: + # Require no Nans in column + if df[column_sort].isnull().any(): + raise ValueError("You have NaN values in your sort column.") + + df = df.sort_values(column_sort) + + # if rolling is enabled, the data should be uniformly sampled in this column + # Build the differences between consecutive time sort values + + differences = df.groupby(grouper)[column_sort].apply( + lambda x: x.values[:-1] - x.values[1:]) + # Write all of them into one big list + differences = sum(map(list, differences), []) + # Test if all differences are the same + if differences and min(differences) != max(differences): + warnings.warn("Your time stamps are not uniformly sampled, which makes rolling " + "nonsensical in some domains.") + + # Roll the data frames if requested + rolling_direction = np.sign(rolling_direction) + + if rolling_direction == 0: + raise ValueError("Rolling direction of 0 is not possible") + + grouped_data = df.groupby(grouper) + maximum_number_of_timeshifts = grouped_data.count().max().max() + + if np.isnan(maximum_number_of_timeshifts): + maximum_number_of_timeshifts = 0 + + if rolling_direction > 0: + range_of_shifts = range(maximum_number_of_timeshifts, -1, -1) + else: + range_of_shifts = range(-maximum_number_of_timeshifts, 1) + + def roll_out_time_series(time_shift): + # Shift out only the first "time_shift" rows + df_temp = grouped_data.shift(time_shift) + df_temp[column_id] = "id=" + df[column_id].map(str) + ", shift={}".format(time_shift) + if column_kind: + df_temp[column_kind] = df[column_kind] + return df_temp.dropna() + + return pd.concat([roll_out_time_series(time_shift) for time_shift in range_of_shifts], + ignore_index=True) + + +