From 3394c46d67c982b3fc397dbe9456b4e13a867e82 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Thu, 12 Nov 2015 17:11:31 +0100
Subject: [PATCH 1/9] Added setup.py

---
 setup.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..b2727f1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+
+from distutils.core import setup
+from setuptools import find_packages
+
+setup(
+    name='chrony',
+    version='0.1.0',
+    author='Guillaume Thomas',
+    author_email='guillaume.thomas@optimdata.eu',
+    license='LICENSE',
+    description='Timeseries analysis tools with specific focus on timespans. Built on top of pandas.',
+    url='https://github.com/optimdata/chrony',
+    include_package_data=True,
+    packages=find_packages(),
+)

From c55e758945abfe25cf9bc170668a861b4fa97ca8 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Fri, 13 Nov 2015 05:44:23 +0100
Subject: [PATCH 2/9] Moved function

---
 chrony/charting.py | 5 +----
 chrony/core.py     | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/chrony/charting.py b/chrony/charting.py
index f570ba9..d1e69f6 100644
--- a/chrony/charting.py
+++ b/chrony/charting.py
@@ -4,10 +4,7 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-
-
-def compute_category_index(categories):
-    return {category: index + 1 for index, category in enumerate(sorted(set(categories)))}
+from .core import compute_category_index
 
 
 def plot_events(categories, xmin, xmax, labels=None, xlim=None, linewidth=10):
diff --git a/chrony/core.py b/chrony/core.py
index 366dac9..b0f2827 100644
--- a/chrony/core.py
+++ b/chrony/core.py
@@ -2,3 +2,6 @@
 
 from __future__ import absolute_import, division, print_function, unicode_literals
 
+
+def compute_category_index(categories):
+    return {category: index + 1 for index, category in enumerate(sorted(set(categories)))}

From 2e7918ca2b6fa150e9549525b8e52584dcdfc7c4 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Fri, 13 Nov 2015 05:44:34 +0100
Subject: [PATCH 3/9] Use ufunc in audit timestapn

---
 chrony/tests.py     |  2 +-
 chrony/timespans.py | 38 +++++++++++++++++++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/chrony/tests.py b/chrony/tests.py
index ebc1d6e..d286f06 100644
--- a/chrony/tests.py
+++ b/chrony/tests.py
@@ -8,7 +8,7 @@
 import pytz
 import unittest
 
-from .charting import compute_category_index
+from .core import compute_category_index
 from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError
 from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments
 
diff --git a/chrony/timespans.py b/chrony/timespans.py
index 70df898..9228a72 100644
--- a/chrony/timespans.py
+++ b/chrony/timespans.py
@@ -15,11 +15,10 @@ def audit_timespan(begs, ends):
     for beg, end in zip(begs, ends):
         if beg > end:
             raise BegPosteriorToEndError
-    for i in range(len(begs) - 1):
-        if begs[i + 1] < begs[i]:
-            raise NotSortedError
-        if ends[i] > begs[i + 1]:
-            raise OverlapError('At row %s end %s is posterior to %s' % (i, ends[i], begs[i + 1]))
+    if (begs < begs.shift()).sum():
+        raise NotSortedError
+    if (ends > begs.shift())[1:].sum():
+        raise OverlapError
 
 
 def describe_timespan(begs, ends):
@@ -121,20 +120,21 @@ def to_spans(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end
     return pd.DataFrame(dict(list(df_beg.to_dict('series').items()) + list(df_end.to_dict('series').items())))
 
 
-def merge_spans(spans, stamps, columns_states):
-    for key in ('beg', 'end'):
-        spans['ts'] = spans['ts_%s' % key]
-        spans = pd.merge(stamps, spans, how='outer', on='ts')
-        spans.set_index('ts', inplace=True)
-        spans.sort_index(inplace=True)
-        for column in columns_states:
-            spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time')
-            spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True)
-            spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True)
-        spans.reset_index(inplace=True)
-        spans.pop('ts')
-        spans = spans[~pd.isnull(spans['ts_%s' % key])]
-    return spans
+# def merge_spans(left, right):
+    
+    # for key in ('beg', 'end'):
+    #     spans['ts'] = spans['ts_%s' % key]
+    #     spans = pd.merge(stamps, spans, how='outer', on='ts')
+    #     spans.set_index('ts', inplace=True)
+    #     spans.sort_index(inplace=True)
+    #     for column in columns_states:
+    #         spans['%s_%s' % (column, key)] = spans.pop(column).interpolate(method='time')
+    #         spans['%s_%s' % (column, key)].fillna(method='ffill', inplace=True)
+    #         spans['%s_%s' % (column, key)].fillna(method='bfill', inplace=True)
+    #     spans.reset_index(inplace=True)
+    #     spans.pop('ts')
+    #     spans = spans[~pd.isnull(spans['ts_%s' % key])]
+    # return spans
 
 
 def compute_segments(df, columns):

From 2dbbe3f613aa28fa6fd3402d216a9cbd63dc2c37 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Fri, 13 Nov 2015 05:59:15 +0100
Subject: [PATCH 4/9] Added weighted_interpolate func

---
 chrony/core.py  | 17 +++++++++++++++++
 chrony/tests.py | 10 ++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/chrony/core.py b/chrony/core.py
index b0f2827..012e4ee 100644
--- a/chrony/core.py
+++ b/chrony/core.py
@@ -2,6 +2,23 @@
 
 from __future__ import absolute_import, division, print_function, unicode_literals
 
+import numpy as np
+import pandas as pd
+
 
 def compute_category_index(categories):
     return {category: index + 1 for index, category in enumerate(sorted(set(categories)))}
+
+
+def weighted_interpolate(serie, weights):
+    sb = serie.fillna(method='ffill')
+    se = serie.fillna(method='bfill')
+    cw = weights.cumsum()
+    w2 = pd.Series(None, index=serie.index)
+    w2[~np.isnan(serie)] = cw[~np.isnan(serie)]
+    wb = w2.fillna(method='ffill')
+    we = w2.fillna(method='bfill')
+    cw = (cw - wb) / (we - wb)
+    r = sb + cw * (se - sb)
+    r.update(serie)
+    return r
diff --git a/chrony/tests.py b/chrony/tests.py
index d286f06..106380c 100644
--- a/chrony/tests.py
+++ b/chrony/tests.py
@@ -8,7 +8,7 @@
 import pytz
 import unittest
 
-from .core import compute_category_index
+from .core import compute_category_index, weighted_interpolate
 from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError
 from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments
 
@@ -120,8 +120,14 @@ def test_compute_segments(self):
         )
 
 
-class ChartingCase(unittest.TestCase):
+class CoreCase(unittest.TestCase):
     def test_all(self):
         self.assertTrue(compute_category_index([]) == {})
         self.assertTrue(compute_category_index(['a']) == {'a': 1})
         self.assertTrue(compute_category_index(['b', 'a', 'b']) == {'a': 1, 'b': 2})
+
+    def test_weighted_interpolate(self):
+        s = pd.Series([0, np.nan, np.nan, 1, np.nan, np.nan, np.nan, 2])
+        w = pd.Series([0, 1, 0, 1, 1, 2, 0, 1])
+        r = pd.Series([0, .5, .5, 1, 1.25, 1.75, 1.75, 2])
+        pd.util.testing.assert_series_equal(weighted_interpolate(s, w), r)

From e6d7850db465aaed0709e446f4c7ab9b559f5da7 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Fri, 13 Nov 2015 06:22:47 +0100
Subject: [PATCH 5/9] Fixed bug in audit timespan

---
 chrony/timespans.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chrony/timespans.py b/chrony/timespans.py
index 9228a72..aeaa0df 100644
--- a/chrony/timespans.py
+++ b/chrony/timespans.py
@@ -17,7 +17,7 @@ def audit_timespan(begs, ends):
             raise BegPosteriorToEndError
     if (begs < begs.shift()).sum():
         raise NotSortedError
-    if (ends > begs.shift())[1:].sum():
+    if (ends.shift() > begs)[1:].sum():
         raise OverlapError
 
 

From c9ec0f32514d19559aaaa4b4e3f97b3967ec7f68 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Wed, 18 Nov 2015 16:02:45 +0100
Subject: [PATCH 6/9] Added method in timespans

---
 chrony/tests.py     | 19 ++++++++++++++++++-
 chrony/timespans.py | 22 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/chrony/tests.py b/chrony/tests.py
index 106380c..ae3bd81 100644
--- a/chrony/tests.py
+++ b/chrony/tests.py
@@ -10,7 +10,7 @@
 
 from .core import compute_category_index, weighted_interpolate
 from .exceptions import BadLengthsError, BegPosteriorToEndError, OverlapError, NotSortedError, HasTimezoneError, IntegrityError
-from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments
+from .timespans import audit_timespan, describe_timespan, to_stamps, to_spans, compute_segments, clean_overlap_timespan, fill_na_dataframe
 
 pd.set_option('display.width', 1000)
 
@@ -45,6 +45,12 @@ def test_all(self):
         begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True)
         ends = pd.date_range('1970-1-2', freq='d', periods=2).to_series().reset_index(drop=True)
         describe_timespan(begs, ends)
+        describe_timespan(pd.Series(), pd.Series())
+        self.assertIsNone(audit_timespan(pd.Series(), pd.Series()))
+        begs = pd.date_range('1970-1-1', freq='d', periods=2).to_series().reset_index(drop=True)
+        ends = pd.date_range('1970-1-3', freq='d', periods=2).to_series().reset_index(drop=True)
+        ret = pd.to_datetime(['1970-1-2', '1970-1-4']).to_series().reset_index(drop=True)
+        pd.util.testing.assert_series_equal(ret, clean_overlap_timespan(begs, ends))
         # self.assertTrue(pd.Series().equals(describe_timespan(begs, ends)))
 
     def test_merge(self):
@@ -75,6 +81,15 @@ def test_merge(self):
             'value_d': [10., 20., 30.],
             'value_s': ['10', '20', '30']
         }, columns=stamp_columns)
+        df2b = pd.DataFrame({
+            'ts': pd.to_datetime(['2015-1-1', '2015-1-2', '2015-1-3']),
+            'beg_state_d': [1., 2., -1.],
+            'end_state_d': [-1., 1., 2.],
+            'beg_state_s': ['1', '2', 'UNDEFINED'],
+            'end_state_s': ['UNDEFINED', '1', '2'],
+            'value_d': [10., 20., 30.],
+            'value_s': ['10', '20', '30']
+        }, columns=stamp_columns)
         df3 = pd.DataFrame(
             to_stamps(
                 df1,
@@ -84,6 +99,8 @@ def test_merge(self):
             columns=stamp_columns
         )
         pd.util.testing.assert_frame_equal(df3, df2)
+        fill_na_dataframe(df3)
+        pd.util.testing.assert_frame_equal(df3, df2b)
         df4 = pd.DataFrame(
             to_spans(
                 df3,
diff --git a/chrony/timespans.py b/chrony/timespans.py
index aeaa0df..9342174 100644
--- a/chrony/timespans.py
+++ b/chrony/timespans.py
@@ -8,6 +8,8 @@
 
 
 def audit_timespan(begs, ends):
+    if begs.empty and ends.empty:
+        return
     if begs.dt.tz or ends.dt.tz:
         raise HasTimezoneError
     if len(begs) != len(ends):
@@ -22,6 +24,9 @@ def audit_timespan(begs, ends):
 
 
 def describe_timespan(begs, ends):
+    if begs.empty and ends.empty:
+        print('Empty series')
+        return
     contiguous_transitions = (begs == ends.shift()).sum()
     coverage = (ends - begs).sum().total_seconds() / (ends[len(ends) - 1] - begs[0]).total_seconds()
     metrics = (
@@ -36,6 +41,23 @@ def describe_timespan(begs, ends):
     return retval
 
 
+def clean_overlap_timespan(begs, ends):
+    return pd.DataFrame({'ts_end': ends, 'ts_end_shifted': begs.shift(-1)}).min(axis=1)
+
+
+def fill_na_series(series):
+    if series.dtype.char == 'O':
+        series.fillna('UNDEFINED', inplace=True)
+    else:
+        series.fillna(-1, inplace=True)
+
+
+def fill_na_dataframe(df):
+    for column in df.columns:
+        if column.startswith('beg_') or column.startswith('end_'):
+            fill_na_series(df[column])
+
+
 def to_stamps(df, state_columns, value_columns, beg_col='ts_beg', end_col='ts_end'):
     '''
         Convert an frame representing periods (eg each row has a beg and end) to a frame representing change of periods.

From b9238a8a44c01ea313339bf95486ccc7dee44399 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Wed, 18 Nov 2015 16:02:51 +0100
Subject: [PATCH 7/9] Updated README

---
 README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.md b/README.md
index 1af3de1..2e14e47 100644
--- a/README.md
+++ b/README.md
@@ -82,3 +82,8 @@ Check out tests for examples.
 A **timespan** is a row of a `pandas.DataFrame` which represents a period of time between two fixed points. These are represented using a beg and a end column.
 
 
+### Development
+
+#### Tests
+
+    nosetests chrony --with-coverage --cover-package chrony

From ac74a03bed8777c650b4b75649cf3817ce8ed221 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Wed, 18 Nov 2015 16:06:12 +0100
Subject: [PATCH 8/9] Added travis.yml

---
 .travis.yml | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..aa5422e
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,15 @@
+language: python
+
+python:
+  - 3.4
+
+env:
+
+install:
+  - pip install -r requirements.txt
+  - pip install coveralls
+
+script:
+  - nosetests --with-cover --cover-package chrony chrony
+
+after_success: coveralls
\ No newline at end of file

From e5866bb662c4d8dae11b456213d8bfe04c03f503 Mon Sep 17 00:00:00 2001
From: Guillaume Thomas <guillaume.thomas642@gmail.com>
Date: Wed, 18 Nov 2015 16:35:54 +0100
Subject: [PATCH 9/9] Updated README

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 2e14e47..56e358c 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # chrony
 
+[![Build Status](https://travis-ci.org/optimdata/chrony.svg?branch=master)](https://travis-ci.org/optimdata/chrony)
+[![Coverage Status](https://coveralls.io/repos/optimdata/chrony/badge.svg?branch=master&service=github)](https://coveralls.io/github/optimdata/chrony?branch=master)
+
 Timeseries analysis tools with specific focus on timespans. Built on top of pandas.
 
 ## tldr