From 3394c46d67c982b3fc397dbe9456b4e13a867e82 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Thu, 12 Nov 2015 17:11:31 +0100 Subject: [PATCH 1/9] Added setup.py --- setup.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b2727f1 --- /dev/null +++ b/setup.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +from distutils.core import setup +from setuptools import find_packages + +setup( + name='chrony', + version='0.1.0', + author='Guillaume Thomas', + author_email='guillaume.thomas@optimdata.eu', + license='LICENSE', + description='Timeseries analysis tools with specific focus on timespans. Built on top of pandas.', + url='https://github.com/optimdata/chrony', + include_package_data=True, + packages=find_packages(), +) From c55e758945abfe25cf9bc170668a861b4fa97ca8 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Fri, 13 Nov 2015 05:44:23 +0100 Subject: [PATCH 2/9] Moved function --- chrony/charting.py | 5 +---- chrony/core.py | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/chrony/charting.py b/chrony/charting.py index f570ba9..d1e69f6 100644 --- a/chrony/charting.py +++ b/chrony/charting.py @@ -4,10 +4,7 @@ import matplotlib.pyplot as plt import numpy as np - - -def compute_category_index(categories): - return {category: index + 1 for index, category in enumerate(sorted(set(categories)))} +from .core import compute_category_index def plot_events(categories, xmin, xmax, labels=None, xlim=None, linewidth=10): diff --git a/chrony/core.py b/chrony/core.py index 366dac9..b0f2827 100644 --- a/chrony/core.py +++ b/chrony/core.py @@ -2,3 +2,6 @@ from __future__ import absolute_import, division, print_function, unicode_literals + +def compute_category_index(categories): + return {category: index + 1 for index, category in enumerate(sorted(set(categories)))} From 2e7918ca2b6fa150e9549525b8e52584dcdfc7c4 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Fri, 13 Nov 2015 05:44:34 +0100 Subject: [PATCH 3/9] Use ufunc in audit timestapn --- chrony/tests.py | 2 +- chrony/timespans.py | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/chrony/tests.py b/chrony/tests.py index ebc1d6e..d286f06 100644 --- a/chrony/tests.py +++ b/chrony/tests.py @@ -8,7 +8,7 @@ import pytz import unittest -from .charting import compute_category_index +from .core import compute_category_index from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments diff --git a/chrony/timespans.py b/chrony/timespans.py index 70df898..9228a72 100644 --- a/chrony/timespans.py +++ b/chrony/timespans.py @@ -15,11 +15,10 @@ def audit_timespan(begs, ends): for beg, end in zip(begs, ends): if beg > end: raise BegPosteriorToEndError - for i in range(len(begs) - 1): - if begs[i + 1] < begs[i]: - raise NotSortedError - if ends[i] > begs[i + 1]: - raise OverlapError('At row %s end %s is posterior to %s' % (i, ends[i], begs[i + 1])) + if (begs < begs.shift()).sum(): + raise NotSortedError + if (ends > begs.shift())[1:].sum(): + raise OverlapError def describe_timespan(begs, ends): @@ -121,20 +120,21 @@ def to_spans(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end return pd.DataFrame(dict(list(df_beg.to_dict('series').items()) + list(df_end.to_dict('series').items()))) -def merge_spans(spans, stamps, columns_states): - for key in ('beg', 'end'): - spans['ts'] = spans['ts_%s' % key] - spans = pd.merge(stamps, spans, how='outer', on='ts') - spans.set_index('ts', inplace=True) - spans.sort_index(inplace=True) - for column in columns_states: - spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time') - spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True) - spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True) - spans.reset_index(inplace=True) - spans.pop('ts') - spans = spans[~pd.isnull(spans['ts_%s' % key])] - return spans +# def merge_spans(left, right): + + # for key in ('beg', 'end'): + # spans['ts'] = spans['ts_%s' % key] + # spans = pd.merge(stamps, spans, how='outer', on='ts') + # spans.set_index('ts', inplace=True) + # spans.sort_index(inplace=True) + # for column in columns_states: + # spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time') + # spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True) + # spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True) + # spans.reset_index(inplace=True) + # spans.pop('ts') + # spans = spans[~pd.isnull(spans['ts_%s' % key])] + # return spans def compute_segments(df, columns): From 2dbbe3f613aa28fa6fd3402d216a9cbd63dc2c37 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Fri, 13 Nov 2015 05:59:15 +0100 Subject: [PATCH 4/9] Added weighted_interpolate func --- chrony/core.py | 17 +++++++++++++++++ chrony/tests.py | 10 ++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/chrony/core.py b/chrony/core.py index b0f2827..012e4ee 100644 --- a/chrony/core.py +++ b/chrony/core.py @@ -2,6 +2,23 @@ from __future__ import absolute_import, division, print_function, unicode_literals +import numpy as np +import pandas as pd + def compute_category_index(categories): return {category: index + 1 for index, category in enumerate(sorted(set(categories)))} + + +def weighted_interpolate(serie, weights): + sb = serie.fillna(method='ffill') + se = serie.fillna(method='bfill') + cw = weights.cumsum() + w2 = pd.Series(None, index=serie.index) + w2[~np.isnan(serie)] = cw[~np.isnan(serie)] + wb = w2.fillna(method='ffill') + we = w2.fillna(method='bfill') + cw = (cw - wb) / (we - wb) + r = sb + cw * (se - sb) + r.update(serie) + return r diff --git a/chrony/tests.py b/chrony/tests.py index d286f06..106380c 100644 --- a/chrony/tests.py +++ b/chrony/tests.py @@ -8,7 +8,7 @@ import pytz import unittest -from .core import compute_category_index +from .core import compute_category_index, weighted_interpolate from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments @@ -120,8 +120,14 @@ def test_compute_segments(self): ) -class ChartingCase(unittest.TestCase): +class CoreCase(unittest.TestCase): def test_all(self): self.assertTrue(compute_category_index([]) == {}) self.assertTrue(compute_category_index(['a']) == {'a': 1}) self.assertTrue(compute_category_index(['b', 'a', 'b']) == {'a': 1, 'b': 2}) + + def test_weighted_interpolate(self): + s = pd.Series([0, np.nan, np.nan, 1, np.nan, np.nan, np.nan, 2]) + w = pd.Series([0, 1, 0, 1, 1, 2, 0, 1]) + r = pd.Series([0, .5, .5, 1, 1.25, 1.75, 1.75, 2]) + pd.util.testing.assert_series_equal(weighted_interpolate(s, w), r) From e6d7850db465aaed0709e446f4c7ab9b559f5da7 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Fri, 13 Nov 2015 06:22:47 +0100 Subject: [PATCH 5/9] Fixed bug in audit timespan --- chrony/timespans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chrony/timespans.py b/chrony/timespans.py index 9228a72..aeaa0df 100644 --- a/chrony/timespans.py +++ b/chrony/timespans.py @@ -17,7 +17,7 @@ def audit_timespan(begs, ends): raise BegPosteriorToEndError if (begs < begs.shift()).sum(): raise NotSortedError - if (ends > begs.shift())[1:].sum(): + if (ends.shift() > begs)[1:].sum(): raise OverlapError From c9ec0f32514d19559aaaa4b4e3f97b3967ec7f68 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Wed, 18 Nov 2015 16:02:45 +0100 Subject: [PATCH 6/9] Added method in timespans --- chrony/tests.py | 19 ++++++++++++++++++- chrony/timespans.py | 22 ++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/chrony/tests.py b/chrony/tests.py index 106380c..ae3bd81 100644 --- a/chrony/tests.py +++ b/chrony/tests.py @@ -10,7 +10,7 @@ from .core import compute_category_index, weighted_interpolate from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError -from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments +from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments, clean_overlap_timespan, fill_na_dataframe pd.set_option('display.width', 1000) @@ -45,6 +45,12 @@ def test_all(self): begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True) ends = pd.date_range('1970-1-2', freq='d', periods=2).to_series().reset_index(drop=True) describe_timespan(begs, ends) + describe_timespan(pd.Series(), pd.Series()) + self.assertIsNone(audit_timespan(pd.Series(), pd.Series())) + begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True) + ends = pd.date_range('1970-1-3', freq='d', periods=2).to_series().reset_index(drop=True) + ret = pd.to_datetime(['1970-1-2', '1970-1-4']).to_series().reset_index(drop=True) + pd.util.testing.assert_series_equal(ret, clean_overlap_timespan(begs, ends)) # self.assertTrue(pd.Series().equals(describe_timespan(begs, ends))) def test_merge(self): @@ -75,6 +81,15 @@ def test_merge(self): 'value_d': [10., 20., 30.], 'value_s': ['10', '20', '30'] }, columns=stamp_columns) + df2b = pd.DataFrame({ + 'ts': pd.to_datetime(['2015-1-1', '2015-1-2', '2015-1-3']), + 'beg_state_d': [1., 2., -1.], + 'end_state_d': [-1., 1., 2.], + 'beg_state_s': ['1', '2', 'UNDEFINED'], + 'end_state_s': ['UNDEFINED', '1', '2'], + 'value_d': [10., 20., 30.], + 'value_s': ['10', '20', '30'] + }, columns=stamp_columns) df3 = pd.DataFrame( to_stamps( df1, @@ -84,6 +99,8 @@ def test_merge(self): columns=stamp_columns ) pd.util.testing.assert_frame_equal(df3, df2) + fill_na_dataframe(df3) + pd.util.testing.assert_frame_equal(df3, df2b) df4 = pd.DataFrame( to_spans( df3, diff --git a/chrony/timespans.py b/chrony/timespans.py index aeaa0df..9342174 100644 --- a/chrony/timespans.py +++ b/chrony/timespans.py @@ -8,6 +8,8 @@ def audit_timespan(begs, ends): + if begs.empty and ends.empty: + return if begs.dt.tz or ends.dt.tz: raise HasTimezoneError if len(begs) != len(ends): @@ -22,6 +24,9 @@ def audit_timespan(begs, ends): def describe_timespan(begs, ends): + if begs.empty and ends.empty: + print('Empty series') + return contiguous_transitions = (begs == ends.shift()).sum() coverage = (ends - begs).sum().total_seconds() / (ends[len(ends) - 1] - begs[0]).total_seconds() metrics = ( @@ -36,6 +41,23 @@ def describe_timespan(begs, ends): return retval +def clean_overlap_timespan(begs, ends): + return pd.DataFrame({'ts_end': ends, 'ts_end_shifted': begs.shift(-1)}).min(axis=1) + + +def fill_na_series(series): + if series.dtype.char == 'O': + series.fillna('UNDEFINED', inplace=True) + else: + series.fillna(-1, inplace=True) + + +def fill_na_dataframe(df): + for column in df.columns: + if column.startswith('beg_') or column.startswith('end_'): + fill_na_series(df[column]) + + def to_stamps(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end'): ''' Convert an frame representing periods (eg each row has a beg and end) to a frame representing change of periods. From b9238a8a44c01ea313339bf95486ccc7dee44399 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Wed, 18 Nov 2015 16:02:51 +0100 Subject: [PATCH 7/9] Updated README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 1af3de1..2e14e47 100644 --- a/README.md +++ b/README.md @@ -82,3 +82,8 @@ Check out tests for examples. A **timespan** is a row of a `pandas.DataFrame` which represents a period of time between two fixed points. These are represented using a beg and a end column. +### Development + +#### Tests + + nosetests chrony --with-coverage --cover-package chrony From ac74a03bed8777c650b4b75649cf3817ce8ed221 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Wed, 18 Nov 2015 16:06:12 +0100 Subject: [PATCH 8/9] Added travis.yml --- .travis.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..aa5422e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python + +python: + - 3.4 + +env: + +install: + - pip install -r requirements.txt + - pip install coveralls + +script: + - nosetests --with-cover --cover-package chrony chrony + +after_success: coveralls \ No newline at end of file From e5866bb662c4d8dae11b456213d8bfe04c03f503 Mon Sep 17 00:00:00 2001 From: Guillaume Thomas Date: Wed, 18 Nov 2015 16:35:54 +0100 Subject: [PATCH 9/9] Updated README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 2e14e47..56e358c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # chrony +[![Build Status](https://travis-ci.org/optimdata/chrony.svg?branch=master)](https://travis-ci.org/optimdata/chrony) +[![Coverage Status](https://coveralls.io/repos/optimdata/chrony/badge.svg?branch=master&service=github)](https://coveralls.io/github/optimdata/chrony?branch=master) + Timeseries analysis tools with specific focus on timespans. Built on top of pandas. ## tldr