Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/blue-yonder/tsfresh
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxBenChrist committed Sep 19, 2018
2 parents fd4fea2 + a53fb6a commit 52e50bd
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 105 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Unreleased
- change chunking in energy_ratio_by_chunks to use all data points
- fix warning for spkt_welch_density
- adapt default settings for "value_count" and "range_count"
- added
- maxlag parameter to agg_autocorrelation function

Version 0.11.1
==============
Expand Down
38 changes: 0 additions & 38 deletions docs/api/tests.integrations.rst

This file was deleted.

22 changes: 0 additions & 22 deletions docs/api/tests.rst

This file was deleted.

22 changes: 0 additions & 22 deletions docs/api/tests.units.rst

This file was deleted.

2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
exclude_patterns = ['_build', 'api/tests*']

# The reST default role (used for this markup: `text`) to use for all documents.
# default_role = None
Expand Down
2 changes: 1 addition & 1 deletion rdocs-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sphinx>=1.6.4
Sphinx==1.6.4
sphinx_rtd_theme>=0.2.4
-r requirements.txt
31 changes: 23 additions & 8 deletions tests/units/feature_extraction/test_feature_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,36 +129,51 @@ def test_sum(self):
self.assertEqualOnAllArrayTypes(sum_values, [-1.2, -2, -3, -4], -10.2)
self.assertEqualOnAllArrayTypes(sum_values, [], 0)

def test_agg_autocorrelation(self):
def test_agg_autocorrelation_returns_correct_values(self):

param = [{"f_agg": "mean"}]
param = [{"f_agg": "mean", "maxlag": 10}]
x = [1, 1, 1, 1, 1, 1, 1]
expected_res = 0
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""]
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"]
self.assertAlmostEqual(res, expected_res, places=4)

x = [1, 2, -3]
expected_res = 1 / np.var(x) * (((1 * 2 + 2 * (-3)) / 2 + (1 * -3)) / 2)
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""]
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"]
self.assertAlmostEqual(res, expected_res, places=4)

np.random.seed(42)
x = np.random.normal(size=3000)
expected_res = 0
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\""]
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"]
self.assertAlmostEqual(res, expected_res, places=2)

param=[{"f_agg": "median"}]
param = [{"f_agg": "median", "maxlag": 10}]
x = [1, 1, 1, 1, 1, 1, 1]
expected_res = 0
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\""]
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\"__maxlag_10"]
self.assertAlmostEqual(res, expected_res, places=4)

x = [1, 2, -3]
expected_res = 1 / np.var(x) * (((1 * 2 + 2 * (-3)) / 2 + (1 * -3)) / 2)
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\""]
res = dict(agg_autocorrelation(x, param=param))["f_agg_\"median\"__maxlag_10"]
self.assertAlmostEqual(res, expected_res, places=4)

def test_agg_autocorrelation_returns_max_lag_does_not_affect_other_results(self):

param = [{"f_agg": "mean", "maxlag": 1},
{"f_agg": "mean", "maxlag": 10}]
x = range(10)
res1 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_1"]
res10 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_10"]
self.assertAlmostEqual(res1, 0.77777777, places=4)
self.assertAlmostEqual(res10, -0.64983164983165, places=4)

param = [{"f_agg": "mean", "maxlag": 1}]
x = range(10)
res1 = dict(agg_autocorrelation(x, param=param))["f_agg_\"mean\"__maxlag_1"]
self.assertAlmostEqual(res1, 0.77777777, places=4)

def test_partial_autocorrelation(self):

# Test for altering time series
Expand Down
39 changes: 27 additions & 12 deletions tsfresh/feature_extraction/feature_calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,33 +276,48 @@ def sum_values(x):

@set_property("fctype", "combiner")
def agg_autocorrelation(x, param):
"""
Calculates the value of an aggregation function f_agg (e.g. var or mean) of the autocorrelation
(Compare to http://en.wikipedia.org/wiki/Autocorrelation#Estimation), taken over different all possible lags
(1 to length of x)
r"""
Calculates the value of an aggregation function :math:`f_{agg}` (e.g. the variance or the mean) over the
autocorrelation :math:`R(l)` for different lags. The autocorrelation :math:`R(l)` for lag :math:`l` is defined as
.. math::
\\frac{1}{n-1} \\sum_{l=1,\ldots, n} \\frac{1}{(n-l)\sigma^{2}} \\sum_{t=1}^{n-l}(X_{t}-\\mu )(X_{t+l}-\\mu)
R(l) = \frac{1}{(n-l)\sigma^{2}} \sum_{t=1}^{n-l}(X_{t}-\mu )(X_{t+l}-\mu)
where :math:`n` is the length of the time series :math:`X_i`, :math:`\sigma^2` its variance and :math:`\mu` its
mean.
where :math:`X_i` are the values of the time series, :math:`n` its length. Finally, :math:`\sigma^2` and
:math:`\mu` are estimators for its variance and mean
(See `Estimation of the Autocorrelation function <http://en.wikipedia.org/wiki/Autocorrelation#Estimation>`_).
The :math:`R(l)` for different lags :math:`l` form a vector. This feature calculator applies the aggregation
function :math:`f_{agg}` to this vector and returns
.. math::
f_{agg} \left( R(1), \ldots, R(m)\right) \quad \text{for} \quad m = max(n, maxlag).
Here :math:`maxlag` is the second parameter passed to this function.
:param x: the time series to calculate the feature of
:type x: pandas.Series
:param param: contains dictionaries {"attr": x} with x str, name of a numpy function (e.g. mean, var, std, median),
the name of the aggregator function that is applied to the autocorrelations
:param param: contains dictionaries {"attr": x, "maxlag", n} with x str, the name of a numpy function
(e.g. mean, var, std, median), its the name of the aggregator function that is applied to the
autocorrelations. Further, n is an int and the maximal number of lags to consider.
:type param: list
:return: the value of this feature
:return type: float
"""
# if the time series is longer than the following threshold, we use fft to calculate the acf
THRESHOLD_TO_USE_FFT = 1250
var = np.var(x)
n = len(x)
max_maxlag = max([config["maxlag"] for config in param])

if np.abs(var) < 10**-10 or n == 1:
a = 0
a = [0] * len(x)
else:
a = acf(x, unbiased=True, fft=n > 1250)[1:]
return [("f_agg_\"{}\"".format(config["f_agg"]), getattr(np, config["f_agg"])(a)) for config in param]
a = acf(x, unbiased=True, fft=n > THRESHOLD_TO_USE_FFT, nlags=max_maxlag)[1:]
return [("f_agg_\"{}\"__maxlag_{}".format(config["f_agg"], config["maxlag"]),
getattr(np, config["f_agg"])(a[:int(config["maxlag"])])) for config in param]


@set_property("fctype", "combiner")
Expand Down
2 changes: 1 addition & 1 deletion tsfresh/feature_extraction/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def __init__(self):
"large_standard_deviation": [{"r": r * 0.05} for r in range(1, 20)],
"quantile": [{"q": q} for q in [.1, .2, .3, .4, .6, .7, .8, .9]],
"autocorrelation": [{"lag": lag} for lag in range(10)],
"agg_autocorrelation": [{"f_agg": s} for s in ["mean", "median", "var"]],
"agg_autocorrelation": [{"f_agg": s, "maxlag": 40} for s in ["mean", "median", "var"]],
"partial_autocorrelation": [{"lag": lag} for lag in range(10)],
"number_cwt_peaks": [{"n": n} for n in [1, 5]],
"number_peaks": [{"n": n} for n in [1, 3, 5, 10, 50]],
Expand Down

0 comments on commit 52e50bd

Please sign in to comment.