Skip to content
This repository has been archived by the owner on Sep 19, 2018. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:optimdata/chrony
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandrecaze committed Dec 3, 2015
2 parents 8ffd58a + e5866bb commit 5b16267
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 26 deletions.
15 changes: 15 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
language: python

python:
- 3.4

env:

install:
- pip install -r requirements.txt
- pip install coveralls

script:
- nosetests --with-cover --cover-package chrony chrony

after_success: coveralls
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# chrony

[![Build Status](https://travis-ci.org/optimdata/chrony.svg?branch=master)](https://travis-ci.org/optimdata/chrony)
[![Coverage Status](https://coveralls.io/repos/optimdata/chrony/badge.svg?branch=master&service=github)](https://coveralls.io/github/optimdata/chrony?branch=master)

Timeseries analysis tools with specific focus on timespans. Built on top of pandas.

## tldr
Expand Down Expand Up @@ -82,3 +85,8 @@ Check out tests for examples.
A **timespan** is a row of a `pandas.DataFrame` which represents a period of time between two fixed points. These are represented using a beg and a end column.


### Development

#### Tests

nosetests chrony --with-coverage --cover-package chrony
5 changes: 1 addition & 4 deletions chrony/charting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@

import matplotlib.pyplot as plt
import numpy as np


def compute_category_index(categories):
return {category: index + 1 for index, category in enumerate(sorted(set(categories)))}
from .core import compute_category_index


def plot_events(categories, xmin, xmax, labels=None, xlim=None, linewidth=10):
Expand Down
20 changes: 20 additions & 0 deletions chrony/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,23 @@

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd


def compute_category_index(categories):
return {category: index + 1 for index, category in enumerate(sorted(set(categories)))}


def weighted_interpolate(serie, weights):
sb = serie.fillna(method='ffill')
se = serie.fillna(method='bfill')
cw = weights.cumsum()
w2 = pd.Series(None, index=serie.index)
w2[~np.isnan(serie)] = cw[~np.isnan(serie)]
wb = w2.fillna(method='ffill')
we = w2.fillna(method='bfill')
cw = (cw - wb) / (we - wb)
r = sb + cw * (se - sb)
r.update(serie)
return r
29 changes: 26 additions & 3 deletions chrony/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import pytz
import unittest

from .charting import compute_category_index
from .core import compute_category_index, weighted_interpolate
from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError
from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments
from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments, clean_overlap_timespan, fill_na_dataframe

pd.set_option('display.width', 1000)

Expand Down Expand Up @@ -45,6 +45,12 @@ def test_all(self):
begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True)
ends = pd.date_range('1970-1-2', freq='d', periods=2).to_series().reset_index(drop=True)
describe_timespan(begs, ends)
describe_timespan(pd.Series(), pd.Series())
self.assertIsNone(audit_timespan(pd.Series(), pd.Series()))
begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True)
ends = pd.date_range('1970-1-3', freq='d', periods=2).to_series().reset_index(drop=True)
ret = pd.to_datetime(['1970-1-2', '1970-1-4']).to_series().reset_index(drop=True)
pd.util.testing.assert_series_equal(ret, clean_overlap_timespan(begs, ends))
# self.assertTrue(pd.Series().equals(describe_timespan(begs, ends)))

def test_merge(self):
Expand Down Expand Up @@ -75,6 +81,15 @@ def test_merge(self):
'value_d': [10., 20., 30.],
'value_s': ['10', '20', '30']
}, columns=stamp_columns)
df2b = pd.DataFrame({
'ts': pd.to_datetime(['2015-1-1', '2015-1-2', '2015-1-3']),
'beg_state_d': [1., 2., -1.],
'end_state_d': [-1., 1., 2.],
'beg_state_s': ['1', '2', 'UNDEFINED'],
'end_state_s': ['UNDEFINED', '1', '2'],
'value_d': [10., 20., 30.],
'value_s': ['10', '20', '30']
}, columns=stamp_columns)
df3 = pd.DataFrame(
to_stamps(
df1,
Expand All @@ -84,6 +99,8 @@ def test_merge(self):
columns=stamp_columns
)
pd.util.testing.assert_frame_equal(df3, df2)
fill_na_dataframe(df3)
pd.util.testing.assert_frame_equal(df3, df2b)
df4 = pd.DataFrame(
to_spans(
df3,
Expand Down Expand Up @@ -120,8 +137,14 @@ def test_compute_segments(self):
)


class ChartingCase(unittest.TestCase):
class CoreCase(unittest.TestCase):
def test_all(self):
self.assertTrue(compute_category_index([]) == {})
self.assertTrue(compute_category_index(['a']) == {'a': 1})
self.assertTrue(compute_category_index(['b', 'a', 'b']) == {'a': 1, 'b': 2})

def test_weighted_interpolate(self):
s = pd.Series([0, np.nan, np.nan, 1, np.nan, np.nan, np.nan, 2])
w = pd.Series([0, 1, 0, 1, 1, 2, 0, 1])
r = pd.Series([0, .5, .5, 1, 1.25, 1.75, 1.75, 2])
pd.util.testing.assert_series_equal(weighted_interpolate(s, w), r)
60 changes: 41 additions & 19 deletions chrony/timespans.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@


def audit_timespan(begs, ends):
if begs.empty and ends.empty:
return
if begs.dt.tz or ends.dt.tz:
raise HasTimezoneError
if len(begs) != len(ends):
raise BadLengthsError
for beg, end in zip(begs, ends):
if beg > end:
raise BegPosteriorToEndError
for i in range(len(begs) - 1):
if begs[i + 1] < begs[i]:
raise NotSortedError
if ends[i] > begs[i + 1]:
raise OverlapError('At row %s end %s is posterior to %s' % (i, ends[i], begs[i + 1]))
if (begs < begs.shift()).sum():
raise NotSortedError
if (ends.shift() > begs)[1:].sum():
raise OverlapError


def audit_timespan_print(begs, ends):
Expand All @@ -42,6 +43,9 @@ def audit_timespan_print(begs, ends):


def describe_timespan(begs, ends):
if begs.empty and ends.empty:
print('Empty series')
return
contiguous_transitions = (begs == ends.shift()).sum()
coverage = (ends - begs).sum().total_seconds() / (ends[len(ends) - 1] - begs[0]).total_seconds()
metrics = (
Expand All @@ -56,6 +60,23 @@ def describe_timespan(begs, ends):
return retval


def clean_overlap_timespan(begs, ends):
return pd.DataFrame({'ts_end': ends, 'ts_end_shifted': begs.shift(-1)}).min(axis=1)


def fill_na_series(series):
if series.dtype.char == 'O':
series.fillna('UNDEFINED', inplace=True)
else:
series.fillna(-1, inplace=True)


def fill_na_dataframe(df):
for column in df.columns:
if column.startswith('beg_') or column.startswith('end_'):
fill_na_series(df[column])


def to_stamps(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end'):
'''
Convert an frame representing periods (eg each row has a beg and end) to a frame representing change of periods.
Expand Down Expand Up @@ -140,20 +161,21 @@ def to_spans(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end
return pd.DataFrame(dict(list(df_beg.to_dict('series').items()) + list(df_end.to_dict('series').items())))


def merge_spans(spans, stamps, columns_states):
for key in ('beg', 'end'):
spans['ts'] = spans['ts_%s' % key]
spans = pd.merge(stamps, spans, how='outer', on='ts')
spans.set_index('ts', inplace=True)
spans.sort_index(inplace=True)
for column in columns_states:
spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time')
spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True)
spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True)
spans.reset_index(inplace=True)
spans.pop('ts')
spans = spans[~pd.isnull(spans['ts_%s' % key])]
return spans
# def merge_spans(left, right):

# for key in ('beg', 'end'):
# spans['ts'] = spans['ts_%s' % key]
# spans = pd.merge(stamps, spans, how='outer', on='ts')
# spans.set_index('ts', inplace=True)
# spans.sort_index(inplace=True)
# for column in columns_states:
# spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time')
# spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True)
# spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True)
# spans.reset_index(inplace=True)
# spans.pop('ts')
# spans = spans[~pd.isnull(spans['ts_%s' % key])]
# return spans


def compute_segments(df, columns):
Expand Down
16 changes: 16 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-

from distutils.core import setup
from setuptools import find_packages

setup(
name='chrony',
version='0.1.0',
author='Guillaume Thomas',
author_email='[email protected]',
license='LICENSE',
description='Timeseries analysis tools with specific focus on timespans. Built on top of pandas.',
url='https://github.com/optimdata/chrony',
include_package_data=True,
packages=find_packages(),
)

0 comments on commit 5b16267

Please sign in to comment.