Merge pull request quantopian#1302 from quantopian/point-in-time-asse…

…t-db Point in time asset db
volconst · Jul 26, 2016 · d6e3da9 · d6e3da9
2 parents f445671 + b7bb6ca
commit d6e3da9
Show file tree

Hide file tree

Showing 19 changed files with 1,327 additions and 619 deletions.
diff --git a/tests/data/test_us_equity_pricing.py b/tests/data/test_us_equity_pricing.py
@@ -74,6 +74,7 @@
     index=arange(1, 7),
     columns=['start_date', 'end_date'],
 ).astype(datetime64)
+EQUITY_INFO['symbol'] = [chr(ord('A') + n) for n in range(len(EQUITY_INFO))]
 
 TEST_QUERY_ASSETS = EQUITY_INFO.index
 

diff --git a/tests/pipeline/test_us_equity_pricing_loader.py b/tests/pipeline/test_us_equity_pricing_loader.py
@@ -91,6 +91,7 @@
     index=arange(1, 7),
     columns=['start_date', 'end_date'],
 ).astype(datetime64)
+EQUITY_INFO['symbol'] = [chr(ord('A') + n) for n in range(len(EQUITY_INFO))]
 
 TEST_QUERY_ASSETS = EQUITY_INFO.index
 

diff --git a/tests/resources/example_data.tar.gz b/tests/resources/example_data.tar.gz
diff --git a/tests/test_algorithm.py b/tests/test_algorithm.py
@@ -777,7 +777,10 @@ class TestTransformAlgorithm(WithLogger,
 
     @classmethod
     def make_futures_info(cls):
-        return pd.DataFrame.from_dict({3: {'multiplier': 10}}, 'index')
+        return pd.DataFrame.from_dict(
+            {3: {'multiplier': 10, 'symbol': 'F'}},
+            orient='index',
+        )
 
     @classmethod
     def make_equity_daily_bar_data(cls):
@@ -985,6 +988,7 @@ def test_minute_data(self, algo_class):
             'start_date': start_session,
             'end_date': period_end + timedelta(days=1)
         }] * 2)
+        equities['symbol'] = ['A', 'B']
         with TempDirectory() as tempdir, \
                 tmp_trading_env(equities=equities) as env:
             sim_params = SimulationParameters(
@@ -2813,6 +2817,7 @@ def test_set_max_order_count(self):
         metadata = pd.DataFrame.from_dict(
             {
                 1: {
+                    'symbol': 'SYM',
                     'start_date': start,
                     'end_date': start + timedelta(days=6)
                 },
@@ -2940,6 +2945,7 @@ def handle_data(algo, data):
 
     def test_asset_date_bounds(self):
         metadata = pd.DataFrame([{
+            'symbol': 'SYM',
             'start_date': self.sim_params.start_session,
             'end_date': '2020-01-01',
         }])
@@ -2959,6 +2965,7 @@ def test_asset_date_bounds(self):
             algo.run(data_portal)
 
         metadata = pd.DataFrame([{
+            'symbol': 'SYM',
             'start_date': '1989-01-01',
             'end_date': '1990-01-01',
         }])
@@ -2979,6 +2986,7 @@ def test_asset_date_bounds(self):
                 algo.run(data_portal)
 
         metadata = pd.DataFrame([{
+            'symbol': 'SYM',
             'start_date': '2020-01-01',
             'end_date': '2021-01-01',
         }])

diff --git a/tests/test_assets.py b/tests/test_assets.py
@@ -18,14 +18,14 @@
 """
 from contextlib import contextmanager
 from datetime import datetime, timedelta
+from functools import partial
 import pickle
 import sys
 from types import GetSetDescriptorType
 from unittest import TestCase
 import uuid
 import warnings
 
-from nose.tools import raises
 from nose_parameterized import parameterized
 from numpy import full, int32, int64
 import pandas as pd
@@ -39,7 +39,6 @@
     Future,
     AssetDBWriter,
     AssetFinder,
-    AssetFinderCachedEquities,
 )
 from zipline.assets.synthetic import (
     make_commodity_future_info,
@@ -341,7 +340,6 @@ def test_repr(self):
         self.assertIn("tick_size=0.01", reprd)
         self.assertIn("multiplier=500", reprd)
 
-    @raises(AssertionError)
     def test_reduce(self):
         assert_equal(
             pickle.loads(pickle.dumps(self.future)).to_dict(),
@@ -485,6 +483,97 @@ def test_lookup_symbol_fuzzy(self):
         self.assertEqual(2, finder.lookup_symbol('BRK_A', None, fuzzy=True))
         self.assertEqual(2, finder.lookup_symbol('BRK_A', dt, fuzzy=True))
 
+    def test_lookup_symbol_change_ticker(self):
+        T = partial(pd.Timestamp, tz='utc')
+        metadata = pd.DataFrame.from_records(
+            [
+                # sid 0
+                {
+                    'symbol': 'A',
+                    'start_date': T('2014-01-01'),
+                    'end_date': T('2014-01-05'),
+                },
+                {
+                    'symbol': 'B',
+                    'start_date': T('2014-01-06'),
+                    'end_date': T('2014-01-10'),
+                },
+
+                # sid 1
+                {
+                    'symbol': 'C',
+                    'start_date': T('2014-01-01'),
+                    'end_date': T('2014-01-05'),
+                },
+                {
+                    'symbol': 'A',  # claiming the unused symbol 'A'
+                    'start_date': T('2014-01-06'),
+                    'end_date': T('2014-01-10'),
+                },
+            ],
+            index=[0, 0, 1, 1],
+        )
+        self.write_assets(equities=metadata)
+        finder = self.asset_finder
+
+        # note: these assertions walk forward in time, starting at assertions
+        # about ownership before the start_date and ending with assertions
+        # after the end_date; new assertions should be inserted in the correct
+        # locations
+
+        # no one held 'A' before 01
+        with self.assertRaises(SymbolNotFound):
+            finder.lookup_symbol('A', T('2013-12-31'))
+
+        # no one held 'C' before 01
+        with self.assertRaises(SymbolNotFound):
+            finder.lookup_symbol('C', T('2013-12-31'))
+
+        for asof in pd.date_range('2014-01-01', '2014-01-05', tz='utc'):
+            # from 01 through 05 sid 0 held 'A'
+            assert_equal(
+                finder.lookup_symbol('A', asof),
+                finder.retrieve_asset(0),
+                msg=str(asof),
+            )
+
+            # from 01 through 05 sid 1 held 'C'
+            assert_equal(
+                finder.lookup_symbol('C', asof),
+                finder.retrieve_asset(1),
+                msg=str(asof),
+            )
+
+        # no one held 'B' before 06
+        with self.assertRaises(SymbolNotFound):
+            finder.lookup_symbol('B', T('2014-01-05'))
+
+        # no one held 'C' after 06, however, no one has claimed it yet
+        # so it still maps to sid 1
+        assert_equal(
+            finder.lookup_symbol('C', T('2014-01-07')),
+            finder.retrieve_asset(1),
+        )
+
+        for asof in pd.date_range('2014-01-06', '2014-01-11', tz='utc'):
+            # from 06 through 10 sid 0 held 'B'
+            # we test through the 11th because sid 1 is the last to hold 'B'
+            # so it should ffill
+            assert_equal(
+                finder.lookup_symbol('B', asof),
+                finder.retrieve_asset(0),
+                msg=str(asof),
+            )
+
+            # from 06 through 10 sid 1 held 'A'
+            # we test through the 11th because sid 1 is the last to hold 'A'
+            # so it should ffill
+            assert_equal(
+                finder.lookup_symbol('A', asof),
+                finder.retrieve_asset(1),
+                msg=str(asof),
+            )
+
     def test_lookup_symbol(self):
 
         # Incrementing by two so that start and end dates for each
@@ -519,27 +608,7 @@ def test_lookup_symbol(self):
                 self.assertEqual(result.symbol, 'EXISTING')
                 self.assertEqual(result.sid, i)
 
-    def test_lookup_symbol_from_multiple_valid(self):
-        # This test asserts that we resolve conflicts in accordance with the
-        # following rules when we have multiple assets holding the same symbol
-        # at the same time:
-
-        # If multiple SIDs exist for symbol S at time T, return the candidate
-        # SID whose start_date is highest. (200 cases)
-
-        # If multiple SIDs exist for symbol S at time T, the best candidate
-        # SIDs share the highest start_date, return the SID with the highest
-        # end_date. (34 cases)
-
-        # It is the opinion of the author (ssanderson) that we should consider
-        # this malformed input and fail here.  But this is the current indended
-        # behavior of the code, and I accidentally broke it while refactoring.
-        # These will serve as regression tests until the time comes that we
-        # decide to enforce this as an error.
-
-        # See https://github.com/quantopian/zipline/issues/837 for more
-        # details.
-
+    def test_fail_to_write_overlapping_data(self):
         df = pd.DataFrame.from_records(
             [
                 {
@@ -568,22 +637,16 @@ def test_lookup_symbol_from_multiple_valid(self):
             ]
         )
 
-        self.write_assets(equities=df)
-
-        def check(expected_sid, date):
-            result = self.asset_finder.lookup_symbol(
-                'MULTIPLE', date,
-            )
-            self.assertEqual(result.symbol, 'MULTIPLE')
-            self.assertEqual(result.sid, expected_sid)
+        with self.assertRaises(ValueError) as e:
+            self.write_assets(equities=df)
 
-        # Sids 1 and 2 are eligible here.  We should get asset 2 because it
-        # has the later end_date.
-        check(2, pd.Timestamp('2010-12-31'))
-
-        # Sids 1, 2, and 3 are eligible here.  We should get sid 3 because
-        # it has a later start_date
-        check(3, pd.Timestamp('2011-01-01'))
+        self.assertEqual(
+            str(e.exception),
+            "Ambiguous ownership of 'MULTIPLE', multiple companies held this"
+            " ticker over the following ranges:\n"
+            "[('2010-01-01 00:00:00', '2012-01-01 00:00:00'),"
+            " ('2011-01-01 00:00:00', '2012-01-01 00:00:00')]",
+        )
 
     def test_lookup_generic(self):
         """
@@ -1000,14 +1063,6 @@ def test_error_message_plurality(self,
             )
 
 
-class AssetFinderCachedEquitiesTestCase(AssetFinderTestCase):
-    asset_finder_type = AssetFinderCachedEquities
-
-    def write_assets(self, **kwargs):
-        super(AssetFinderCachedEquitiesTestCase, self).write_assets(**kwargs)
-        self.asset_finder.rehash_equities()
-
-
 class TestFutureChain(WithAssetFinder, ZiplineTestCase):
     @classmethod
     def make_futures_info(cls):
@@ -1259,15 +1314,23 @@ def test_check_version(self):
         version_table = self.metadata.tables['version_info']
 
         # This should not raise an error
-        check_version_info(version_table, ASSET_DB_VERSION)
+        check_version_info(self.engine, version_table, ASSET_DB_VERSION)
 
         # This should fail because the version is too low
         with self.assertRaises(AssetDBVersionError):
-            check_version_info(version_table, ASSET_DB_VERSION - 1)
+            check_version_info(
+                self.engine,
+                version_table,
+                ASSET_DB_VERSION - 1,
+            )
 
         # This should fail because the version is too high
         with self.assertRaises(AssetDBVersionError):
-            check_version_info(version_table, ASSET_DB_VERSION + 1)
+            check_version_info(
+                self.engine,
+                version_table,
+                ASSET_DB_VERSION + 1,
+            )
 
     def test_write_version(self):
         version_table = self.metadata.tables['version_info']
@@ -1279,33 +1342,33 @@ def test_write_version(self):
         # This should fail because the table has no version info and is,
         # therefore, consdered v0
         with self.assertRaises(AssetDBVersionError):
-            check_version_info(version_table, -2)
+            check_version_info(self.engine, version_table, -2)
 
         # This should not raise an error because the version has been written
-        write_version_info(version_table, -2)
-        check_version_info(version_table, -2)
+        write_version_info(self.engine, version_table, -2)
+        check_version_info(self.engine, version_table, -2)
 
         # Assert that the version is in the table and correct
         self.assertEqual(sa.select((version_table.c.version,)).scalar(), -2)
 
         # Assert that trying to overwrite the version fails
         with self.assertRaises(sa.exc.IntegrityError):
-            write_version_info(version_table, -3)
+            write_version_info(self.engine, version_table, -3)
 
     def test_finder_checks_version(self):
         version_table = self.metadata.tables['version_info']
         version_table.delete().execute()
-        write_version_info(version_table, -2)
-        check_version_info(version_table, -2)
+        write_version_info(self.engine, version_table, -2)
+        check_version_info(self.engine, version_table, -2)
 
         # Assert that trying to build a finder with a bad db raises an error
         with self.assertRaises(AssetDBVersionError):
             AssetFinder(engine=self.engine)
 
         # Change the version number of the db to the correct version
         version_table.delete().execute()
-        write_version_info(version_table, ASSET_DB_VERSION)
-        check_version_info(version_table, ASSET_DB_VERSION)
+        write_version_info(self.engine, version_table, ASSET_DB_VERSION)
+        check_version_info(self.engine, version_table, ASSET_DB_VERSION)
 
         # Now that the versions match, this Finder should succeed
         AssetFinder(engine=self.engine)
@@ -1319,7 +1382,7 @@ def test_downgrade(self):
         metadata = sa.MetaData(conn)
         metadata.reflect(bind=self.engine)
         version_table = metadata.tables['version_info']
-        check_version_info(version_table, 0)
+        check_version_info(self.engine, version_table, 0)
 
         # Check some of the v1-to-v0 downgrades
         self.assertTrue('futures_contracts' in metadata.tables)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
@@ -45,20 +45,24 @@ def make_equity_info(cls):
         return pd.DataFrame.from_dict(
             {
                 1: {
-                    "start_date": cls.START_DATE,
-                    "end_date": cls.END_DATE + pd.Timedelta(days=1)
+                    'symbol': 'A',
+                    'start_date': cls.START_DATE,
+                    'end_date': cls.END_DATE + pd.Timedelta(days=1)
                 },
                 2: {
-                    "start_date": cls.START_DATE,
-                    "end_date": cls.END_DATE + pd.Timedelta(days=1)
+                    'symbol': 'B',
+                    'start_date': cls.START_DATE,
+                    'end_date': cls.END_DATE + pd.Timedelta(days=1)
                 },
                 3: {
-                    "start_date": pd.Timestamp('2006-05-26', tz='utc'),
-                    "end_date": pd.Timestamp('2006-08-09', tz='utc')
+                    'symbol': 'C',
+                    'start_date': pd.Timestamp('2006-05-26', tz='utc'),
+                    'end_date': pd.Timestamp('2006-08-09', tz='utc')
                 },
                 4: {
-                    "start_date": cls.START_DATE,
-                    "end_date": cls.END_DATE + pd.Timedelta(days=1)
+                    'symbol': 'D',
+                    'start_date': cls.START_DATE,
+                    'end_date': cls.END_DATE + pd.Timedelta(days=1)
                 },
             },
             orient='index',

diff --git a/zipline/__main__.py b/zipline/__main__.py
@@ -348,6 +348,9 @@ def bundles():
     """List all of the available data bundles.
     """
     for bundle in sorted(bundles_module.bundles.keys()):
+        if bundle.startswith('.'):
+            # hide the test data
+            continue
         try:
             ingestions = sorted(
                 (str(bundles_module.from_bundle_ingest_dirname(ing))